xref: /openbsd-src/sys/netinet/ip_output.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: ip_output.c,v 1.265 2014/07/12 18:44:23 tedu Exp $	*/
2 /*	$NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
33  */
34 
35 #include "pf.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/mbuf.h>
40 #include <sys/protosw.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 
46 #include <net/if.h>
47 #include <net/if_enc.h>
48 #include <net/route.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/in_pcb.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip_var.h>
56 #include <netinet/ip_icmp.h>
57 #include <netinet/tcp.h>
58 #include <netinet/udp.h>
59 #include <netinet/tcp_timer.h>
60 #include <netinet/tcp_var.h>
61 #include <netinet/udp_var.h>
62 
63 #if NPF > 0
64 #include <net/pfvar.h>
65 #endif
66 
67 #ifdef IPSEC
68 #ifdef ENCDEBUG
69 #define DPRINTF(x)    do { if (encdebug) printf x ; } while (0)
70 #else
71 #define DPRINTF(x)
72 #endif
73 #endif /* IPSEC */
74 
75 struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
76 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *);
77 static __inline u_int16_t __attribute__((__unused__))
78     in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t);
79 void in_delayed_cksum(struct mbuf *);
80 
81 /*
82  * IP output.  The packet in mbuf chain m contains a skeletal IP
83  * header (with len, off, ttl, proto, tos, src, dst).
84  * The mbuf chain containing the packet will be freed.
85  * The mbuf opt, if present, will not be freed.
86  */
87 int
88 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags,
89     struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo)
90 {
91 	struct ip *ip;
92 	struct ifnet *ifp;
93 	struct mbuf *m = m0;
94 	int hlen = sizeof (struct ip);
95 	int len, error = 0;
96 	struct route iproute;
97 	struct sockaddr_in *dst;
98 	struct in_ifaddr *ia;
99 	u_int8_t sproto = 0, donerouting = 0;
100 	u_long mtu;
101 #ifdef IPSEC
102 	u_int32_t icmp_mtu = 0;
103 	union sockaddr_union sdst;
104 	u_int32_t sspi;
105 	struct m_tag *mtag;
106 	struct tdb_ident *tdbi;
107 
108 	struct tdb *tdb;
109 #if NPF > 0
110 	struct ifnet *encif;
111 #endif
112 #endif /* IPSEC */
113 
114 #ifdef IPSEC
115 	if (inp && (inp->inp_flags & INP_IPV6) != 0)
116 		panic("ip_output: IPv6 pcb is passed");
117 #endif /* IPSEC */
118 
119 #ifdef	DIAGNOSTIC
120 	if ((m->m_flags & M_PKTHDR) == 0)
121 		panic("ip_output no HDR");
122 #endif
123 	if (opt) {
124 		m = ip_insertoptions(m, opt, &len);
125 		hlen = len;
126 	}
127 
128 	ip = mtod(m, struct ip *);
129 
130 	/*
131 	 * Fill in IP header.
132 	 */
133 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
134 		ip->ip_v = IPVERSION;
135 		ip->ip_off &= htons(IP_DF);
136 		ip->ip_id = htons(ip_randomid());
137 		ip->ip_hl = hlen >> 2;
138 		ipstat.ips_localout++;
139 	} else {
140 		hlen = ip->ip_hl << 2;
141 	}
142 
143 	/*
144 	 * We should not send traffic to 0/8 say both Stevens and RFCs
145 	 * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6.
146 	 */
147 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) {
148 		error = ENETUNREACH;
149 		goto bad;
150 	}
151 
152 	/*
153 	 * If we're missing the IP source address, do a route lookup. We'll
154 	 * remember this result, in case we don't need to do any IPsec
155 	 * processing on the packet. We need the source address so we can
156 	 * do an SPD lookup in IPsec; for most packets, the source address
157 	 * is set at a higher level protocol. ICMPs and other packets
158 	 * though (e.g., traceroute) have a source address of zeroes.
159 	 */
160 	if (ip->ip_src.s_addr == INADDR_ANY) {
161 		if (flags & IP_ROUTETOETHER) {
162 			error = EINVAL;
163 			goto bad;
164 		}
165 		donerouting = 1;
166 
167 		if (ro == 0) {
168 			ro = &iproute;
169 			memset(ro, 0, sizeof(*ro));
170 		}
171 
172 		dst = satosin(&ro->ro_dst);
173 
174 		/*
175 		 * If there is a cached route, check that it is to the same
176 		 * destination and is still up.  If not, free it and try again.
177 		 */
178 		if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
179 		    dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
180 		    ro->ro_tableid != m->m_pkthdr.ph_rtableid)) {
181 			RTFREE(ro->ro_rt);
182 			ro->ro_rt = (struct rtentry *)0;
183 		}
184 
185 		if (ro->ro_rt == 0) {
186 			dst->sin_family = AF_INET;
187 			dst->sin_len = sizeof(*dst);
188 			dst->sin_addr = ip->ip_dst;
189 			ro->ro_tableid = m->m_pkthdr.ph_rtableid;
190 		}
191 
192 		if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
193 		    (ip->ip_dst.s_addr == INADDR_BROADCAST)) &&
194 		    imo != NULL && imo->imo_multicast_ifp != NULL) {
195 			ifp = imo->imo_multicast_ifp;
196 			mtu = ifp->if_mtu;
197 			IFP_TO_IA(ifp, ia);
198 		} else {
199 			if (ro->ro_rt == 0)
200 				rtalloc_mpath(ro, NULL);
201 
202 			if (ro->ro_rt == 0) {
203 				ipstat.ips_noroute++;
204 				error = EHOSTUNREACH;
205 				goto bad;
206 			}
207 
208 			ia = ifatoia(ro->ro_rt->rt_ifa);
209 			ifp = ro->ro_rt->rt_ifp;
210 			if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
211 				mtu = ifp->if_mtu;
212 			ro->ro_rt->rt_use++;
213 
214 			if (ro->ro_rt->rt_flags & RTF_GATEWAY)
215 				dst = satosin(ro->ro_rt->rt_gateway);
216 		}
217 
218 		/* Set the source IP address */
219 		if (!IN_MULTICAST(ip->ip_dst.s_addr))
220 			ip->ip_src = ia->ia_addr.sin_addr;
221 	}
222 
223 #if NPF > 0
224 reroute:
225 #endif
226 
227 #ifdef IPSEC
228 	if (!ipsec_in_use && inp == NULL)
229 		goto done_spd;
230 
231 	/* Do we have any pending SAs to apply ? */
232 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
233 	if (mtag != NULL) {
234 #ifdef DIAGNOSTIC
235 		if (mtag->m_tag_len != sizeof (struct tdb_ident))
236 			panic("ip_output: tag of length %hu (should be %zu",
237 			    mtag->m_tag_len, sizeof (struct tdb_ident));
238 #endif
239 		tdbi = (struct tdb_ident *)(mtag + 1);
240 		tdb = gettdb(tdbi->rdomain,
241 		    tdbi->spi, &tdbi->dst, tdbi->proto);
242 		if (tdb == NULL)
243 			error = -EINVAL;
244 		m_tag_delete(m, mtag);
245 	}
246 	else
247 		tdb = ipsp_spd_lookup(m, AF_INET, hlen, &error,
248 		    IPSP_DIRECTION_OUT, NULL, inp, ipsecflowinfo);
249 
250 	if (tdb == NULL) {
251 		if (error == 0) {
252 			/*
253 			 * No IPsec processing required, we'll just send the
254 			 * packet out.
255 			 */
256 			sproto = 0;
257 
258 			/* Fall through to routing/multicast handling */
259 		} else {
260 			/*
261 			 * -EINVAL is used to indicate that the packet should
262 			 * be silently dropped, typically because we've asked
263 			 * key management for an SA.
264 			 */
265 			if (error == -EINVAL) /* Should silently drop packet */
266 			  error = 0;
267 
268 			m_freem(m);
269 			goto done;
270 		}
271 	} else {
272 		/* Loop detection */
273 		for (mtag = m_tag_first(m); mtag != NULL;
274 		    mtag = m_tag_next(m, mtag)) {
275 			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
276 			    mtag->m_tag_id !=
277 			    PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
278 				continue;
279 			tdbi = (struct tdb_ident *)(mtag + 1);
280 			if (tdbi->spi == tdb->tdb_spi &&
281 			    tdbi->proto == tdb->tdb_sproto &&
282 			    tdbi->rdomain == tdb->tdb_rdomain &&
283 			    !memcmp(&tdbi->dst, &tdb->tdb_dst,
284 			    sizeof(union sockaddr_union))) {
285 				sproto = 0; /* mark as no-IPsec-needed */
286 				goto done_spd;
287 			}
288 		}
289 
290 		/* We need to do IPsec */
291 		bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst));
292 		sspi = tdb->tdb_spi;
293 		sproto = tdb->tdb_sproto;
294 
295 		/*
296 		 * If it needs TCP/UDP hardware-checksumming, do the
297 		 * computation now.
298 		 */
299 		in_proto_cksum_out(m, NULL);
300 
301 		/* If it's not a multicast packet, try to fast-path */
302 		if (!IN_MULTICAST(ip->ip_dst.s_addr)) {
303 			goto sendit;
304 		}
305 	}
306 
307 	/* Fall through to the routing/multicast handling code */
308  done_spd:
309 #endif /* IPSEC */
310 
311 	if (flags & IP_ROUTETOETHER) {
312 		dst = satosin(&ro->ro_dst);
313 		ifp = ro->ro_rt->rt_ifp;
314 		mtu = ifp->if_mtu;
315 		ro->ro_rt = NULL;
316 	} else if (donerouting == 0) {
317 		if (ro == 0) {
318 			ro = &iproute;
319 			memset(ro, 0, sizeof(*ro));
320 		}
321 
322 		dst = satosin(&ro->ro_dst);
323 
324 		/*
325 		 * If there is a cached route, check that it is to the same
326 		 * destination and is still up.  If not, free it and try again.
327 		 */
328 		if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
329 		    dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
330 		    ro->ro_tableid != m->m_pkthdr.ph_rtableid)) {
331 			RTFREE(ro->ro_rt);
332 			ro->ro_rt = (struct rtentry *)0;
333 		}
334 
335 		if (ro->ro_rt == 0) {
336 			dst->sin_family = AF_INET;
337 			dst->sin_len = sizeof(*dst);
338 			dst->sin_addr = ip->ip_dst;
339 			ro->ro_tableid = m->m_pkthdr.ph_rtableid;
340 		}
341 
342 		if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
343 		    (ip->ip_dst.s_addr == INADDR_BROADCAST)) &&
344 		    imo != NULL && imo->imo_multicast_ifp != NULL) {
345 			ifp = imo->imo_multicast_ifp;
346 			mtu = ifp->if_mtu;
347 			IFP_TO_IA(ifp, ia);
348 		} else {
349 			if (ro->ro_rt == 0)
350 				rtalloc_mpath(ro, &ip->ip_src.s_addr);
351 
352 			if (ro->ro_rt == 0) {
353 				ipstat.ips_noroute++;
354 				error = EHOSTUNREACH;
355 				goto bad;
356 			}
357 
358 			ia = ifatoia(ro->ro_rt->rt_ifa);
359 			ifp = ro->ro_rt->rt_ifp;
360 			if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0)
361 				mtu = ifp->if_mtu;
362 			ro->ro_rt->rt_use++;
363 
364 			if (ro->ro_rt->rt_flags & RTF_GATEWAY)
365 				dst = satosin(ro->ro_rt->rt_gateway);
366 		}
367 
368 		/* Set the source IP address */
369 		if (ip->ip_src.s_addr == INADDR_ANY)
370 			ip->ip_src = ia->ia_addr.sin_addr;
371 	}
372 
373 	if (IN_MULTICAST(ip->ip_dst.s_addr) ||
374 	    (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
375 		struct in_multi *inm;
376 
377 		m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
378 			M_BCAST : M_MCAST;
379 
380 		/*
381 		 * IP destination address is multicast.  Make sure "dst"
382 		 * still points to the address in "ro".  (It may have been
383 		 * changed to point to a gateway address, above.)
384 		 */
385 		dst = satosin(&ro->ro_dst);
386 
387 		/*
388 		 * See if the caller provided any multicast options
389 		 */
390 		if (imo != NULL)
391 			ip->ip_ttl = imo->imo_multicast_ttl;
392 		else
393 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
394 
395 		/*
396 		 * if we don't know the outgoing ifp yet, we can't generate
397 		 * output
398 		 */
399 		if (!ifp) {
400 			ipstat.ips_noroute++;
401 			error = EHOSTUNREACH;
402 			goto bad;
403 		}
404 
405 		/*
406 		 * Confirm that the outgoing interface supports multicast,
407 		 * but only if the packet actually is going out on that
408 		 * interface (i.e., no IPsec is applied).
409 		 */
410 		if ((((m->m_flags & M_MCAST) &&
411 		      (ifp->if_flags & IFF_MULTICAST) == 0) ||
412 		     ((m->m_flags & M_BCAST) &&
413 		      (ifp->if_flags & IFF_BROADCAST) == 0)) && (sproto == 0)) {
414 			ipstat.ips_noroute++;
415 			error = ENETUNREACH;
416 			goto bad;
417 		}
418 
419 		/*
420 		 * If source address not specified yet, use address
421 		 * of outgoing interface.
422 		 */
423 		if (ip->ip_src.s_addr == INADDR_ANY) {
424 			IFP_TO_IA(ifp, ia);
425 			if (ia != NULL)
426 				ip->ip_src = ia->ia_addr.sin_addr;
427 		}
428 
429 		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
430 		if (inm != NULL &&
431 		   (imo == NULL || imo->imo_multicast_loop)) {
432 			/*
433 			 * If we belong to the destination multicast group
434 			 * on the outgoing interface, and the caller did not
435 			 * forbid loopback, loop back a copy.
436 			 * Can't defer TCP/UDP checksumming, do the
437 			 * computation now.
438 			 */
439 			in_proto_cksum_out(m, NULL);
440 			ip_mloopback(ifp, m, dst);
441 		}
442 #ifdef MROUTING
443 		else {
444 			/*
445 			 * If we are acting as a multicast router, perform
446 			 * multicast forwarding as if the packet had just
447 			 * arrived on the interface to which we are about
448 			 * to send.  The multicast forwarding function
449 			 * recursively calls this function, using the
450 			 * IP_FORWARDING flag to prevent infinite recursion.
451 			 *
452 			 * Multicasts that are looped back by ip_mloopback(),
453 			 * above, will be forwarded by the ip_input() routine,
454 			 * if necessary.
455 			 */
456 			if (ipmforwarding && ip_mrouter &&
457 			    (flags & IP_FORWARDING) == 0) {
458 				if (ip_mforward(m, ifp) != 0) {
459 					m_freem(m);
460 					goto done;
461 				}
462 			}
463 		}
464 #endif
465 		/*
466 		 * Multicasts with a time-to-live of zero may be looped-
467 		 * back, above, but must not be transmitted on a network.
468 		 * Also, multicasts addressed to the loopback interface
469 		 * are not sent -- the above call to ip_mloopback() will
470 		 * loop back a copy if this host actually belongs to the
471 		 * destination group on the loopback interface.
472 		 */
473 		if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
474 			m_freem(m);
475 			goto done;
476 		}
477 
478 		goto sendit;
479 	}
480 
481 	/*
482 	 * Look for broadcast address and verify user is allowed to send
483 	 * such a packet; if the packet is going in an IPsec tunnel, skip
484 	 * this check.
485 	 */
486 	if ((sproto == 0) && (in_broadcast(dst->sin_addr, ifp,
487 	    m->m_pkthdr.ph_rtableid))) {
488 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
489 			error = EADDRNOTAVAIL;
490 			goto bad;
491 		}
492 		if ((flags & IP_ALLOWBROADCAST) == 0) {
493 			error = EACCES;
494 			goto bad;
495 		}
496 
497 		/* Don't allow broadcast messages to be fragmented */
498 		if (ntohs(ip->ip_len) > ifp->if_mtu) {
499 			error = EMSGSIZE;
500 			goto bad;
501 		}
502 		m->m_flags |= M_BCAST;
503 	} else
504 		m->m_flags &= ~M_BCAST;
505 
506 sendit:
507 	/*
508 	 * If we're doing Path MTU discovery, we need to set DF unless
509 	 * the route's MTU is locked.
510 	 */
511 	if ((flags & IP_MTUDISC) && ro && ro->ro_rt &&
512 	    (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
513 		ip->ip_off |= htons(IP_DF);
514 
515 #ifdef IPSEC
516 	/*
517 	 * Check if the packet needs encapsulation.
518 	 */
519 	if (sproto != 0) {
520 		tdb = gettdb(rtable_l2(m->m_pkthdr.ph_rtableid),
521 		    sspi, &sdst, sproto);
522 		if (tdb == NULL) {
523 			DPRINTF(("ip_output: unknown TDB"));
524 			error = EHOSTUNREACH;
525 			m_freem(m);
526 			goto done;
527 		}
528 
529 		/*
530 		 * Packet filter
531 		 */
532 #if NPF > 0
533 		if ((encif = enc_getif(tdb->tdb_rdomain,
534 		    tdb->tdb_tap)) == NULL ||
535 		    pf_test(AF_INET, PF_OUT, encif, &m, NULL) != PF_PASS) {
536 			error = EACCES;
537 			m_freem(m);
538 			goto done;
539 		}
540 		if (m == NULL) {
541 			goto done;
542 		}
543 		ip = mtod(m, struct ip *);
544 		hlen = ip->ip_hl << 2;
545 		/*
546 		 * PF_TAG_REROUTE handling or not...
547 		 * Packet is entering IPsec so the routing is
548 		 * already overruled by the IPsec policy.
549 		 * Until now the change was not reconsidered.
550 		 * What's the behaviour?
551 		 */
552 #endif
553 		in_proto_cksum_out(m, encif);
554 
555 		/* Check if we are allowed to fragment */
556 		if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu &&
557 		    ntohs(ip->ip_len) > tdb->tdb_mtu &&
558 		    tdb->tdb_mtutimeout > time_second) {
559 			struct rtentry *rt = NULL;
560 			int rt_mtucloned = 0;
561 			int transportmode = 0;
562 
563 			transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) &&
564 			    (tdb->tdb_dst.sin.sin_addr.s_addr ==
565 			    ip->ip_dst.s_addr);
566 			icmp_mtu = tdb->tdb_mtu;
567 
568 			/* Find a host route to store the mtu in */
569 			if (ro != NULL)
570 				rt = ro->ro_rt;
571 			/* but don't add a PMTU route for transport mode SAs */
572 			if (transportmode)
573 				rt = NULL;
574 			else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
575 				rt = icmp_mtudisc_clone(ip->ip_dst,
576 				    m->m_pkthdr.ph_rtableid);
577 				rt_mtucloned = 1;
578 			}
579 			DPRINTF(("ip_output: spi %08x mtu %d rt %p cloned %d\n",
580 			    ntohl(tdb->tdb_spi), icmp_mtu, rt, rt_mtucloned));
581 			if (rt != NULL) {
582 				rt->rt_rmx.rmx_mtu = icmp_mtu;
583 				if (ro && ro->ro_rt != NULL) {
584 					RTFREE(ro->ro_rt);
585 					ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT,
586 					    m->m_pkthdr.ph_rtableid);
587 				}
588 				if (rt_mtucloned)
589 					rtfree(rt);
590 			}
591 			error = EMSGSIZE;
592 			goto bad;
593 		}
594 
595 		/*
596 		 * Clear these -- they'll be set in the recursive invocation
597 		 * as needed.
598 		 */
599 		m->m_flags &= ~(M_MCAST | M_BCAST);
600 
601 		/* Callee frees mbuf */
602 		error = ipsp_process_packet(m, tdb, AF_INET, 0);
603 		return error;  /* Nothing more to be done */
604 	}
605 
606 	/*
607 	 * If we got here and IPsec crypto processing didn't happen, drop it.
608 	 */
609 	if (ipsec_in_use && (mtag = m_tag_find(m,
610 	    PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) != NULL) {
611 		/* Notify IPsec to do its own crypto. */
612 		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
613 		m_freem(m);
614 		error = EHOSTUNREACH;
615 		goto done;
616 	}
617 #endif /* IPSEC */
618 
619 	/*
620 	 * Packet filter
621 	 */
622 #if NPF > 0
623 	if (pf_test(AF_INET, PF_OUT, ifp, &m, NULL) != PF_PASS) {
624 		error = EHOSTUNREACH;
625 		m_freem(m);
626 		goto done;
627 	}
628 	if (m == NULL)
629 		goto done;
630 	ip = mtod(m, struct ip *);
631 	hlen = ip->ip_hl << 2;
632 	if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) ==
633 	    (PF_TAG_REROUTE | PF_TAG_GENERATED))
634 		/* already rerun the route lookup, go on */
635 		m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE);
636 	else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) {
637 		/* tag as generated to skip over pf_test on rerun */
638 		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
639 		ro = NULL;
640 		donerouting = 0;
641 		goto reroute;
642 	}
643 #endif
644 	in_proto_cksum_out(m, ifp);
645 
646 #ifdef IPSEC
647 	if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) &&
648 	    (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) {
649 		error = EHOSTUNREACH;
650 		m_freem(m);
651 		goto done;
652 	}
653 #endif
654 
655 	/*
656 	 * If small enough for interface, can just send directly.
657 	 */
658 	if (ntohs(ip->ip_len) <= mtu) {
659 		ip->ip_sum = 0;
660 		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
661 		    (ifp->if_bridgeport == NULL))
662 			m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
663 		else {
664 			ipstat.ips_outswcsum++;
665 			ip->ip_sum = in_cksum(m, hlen);
666 		}
667 
668 		error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
669 		goto done;
670 	}
671 
672 	/*
673 	 * Too large for interface; fragment if possible.
674 	 * Must be able to put at least 8 bytes per fragment.
675 	 */
676 	if (ip->ip_off & htons(IP_DF)) {
677 #ifdef IPSEC
678 		icmp_mtu = ifp->if_mtu;
679 #endif
680 		error = EMSGSIZE;
681 		/*
682 		 * This case can happen if the user changed the MTU
683 		 * of an interface after enabling IP on it.  Because
684 		 * most netifs don't keep track of routes pointing to
685 		 * them, there is no way for one to update all its
686 		 * routes when the MTU is changed.
687 		 */
688 		if (ro->ro_rt != NULL &&
689 		    (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
690 		    !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) &&
691 		    (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
692 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
693 		}
694 		ipstat.ips_cantfrag++;
695 		goto bad;
696 	}
697 
698 	error = ip_fragment(m, ifp, mtu);
699 	if (error) {
700 		m = m0 = NULL;
701 		goto bad;
702 	}
703 
704 	for (; m; m = m0) {
705 		m0 = m->m_nextpkt;
706 		m->m_nextpkt = 0;
707 		if (error == 0)
708 			error = (*ifp->if_output)(ifp, m, sintosa(dst),
709 			    ro->ro_rt);
710 		else
711 			m_freem(m);
712 	}
713 
714 	if (error == 0)
715 		ipstat.ips_fragmented++;
716 
717 done:
718 	if (ro == &iproute && ro->ro_rt)
719 		RTFREE(ro->ro_rt);
720 	return (error);
721 bad:
722 #ifdef IPSEC
723 	if (error == EMSGSIZE && ip_mtudisc && icmp_mtu != 0 && m != NULL)
724 		ipsec_adjust_mtu(m, icmp_mtu);
725 #endif
726 	m_freem(m0);
727 	goto done;
728 }
729 
730 int
731 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu)
732 {
733 	struct ip *ip, *mhip;
734 	struct mbuf *m0;
735 	int len, hlen, off;
736 	int mhlen, firstlen;
737 	struct mbuf **mnext;
738 	int fragments = 0;
739 	int error = 0;
740 
741 	ip = mtod(m, struct ip *);
742 	hlen = ip->ip_hl << 2;
743 
744 	len = (mtu - hlen) &~ 7;
745 	if (len < 8) {
746 		m_freem(m);
747 		return (EMSGSIZE);
748 	}
749 
750 	/*
751 	 * If we are doing fragmentation, we can't defer TCP/UDP
752 	 * checksumming; compute the checksum and clear the flag.
753 	 */
754 	in_proto_cksum_out(m, NULL);
755 	firstlen = len;
756 	mnext = &m->m_nextpkt;
757 
758 	/*
759 	 * Loop through length of segment after first fragment,
760 	 * make new header and copy data of each part and link onto chain.
761 	 */
762 	m0 = m;
763 	mhlen = sizeof (struct ip);
764 	for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
765 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
766 		if (m == 0) {
767 			ipstat.ips_odropped++;
768 			error = ENOBUFS;
769 			goto sendorfree;
770 		}
771 		*mnext = m;
772 		mnext = &m->m_nextpkt;
773 		m->m_data += max_linkhdr;
774 		mhip = mtod(m, struct ip *);
775 		*mhip = *ip;
776 		/* we must inherit MCAST and BCAST flags and routing table */
777 		m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
778 		m->m_pkthdr.ph_rtableid = m0->m_pkthdr.ph_rtableid;
779 		if (hlen > sizeof (struct ip)) {
780 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
781 			mhip->ip_hl = mhlen >> 2;
782 		}
783 		m->m_len = mhlen;
784 		mhip->ip_off = ((off - hlen) >> 3) +
785 		    (ntohs(ip->ip_off) & ~IP_MF);
786 		if (ip->ip_off & htons(IP_MF))
787 			mhip->ip_off |= IP_MF;
788 		if (off + len >= ntohs(ip->ip_len))
789 			len = ntohs(ip->ip_len) - off;
790 		else
791 			mhip->ip_off |= IP_MF;
792 		mhip->ip_len = htons((u_int16_t)(len + mhlen));
793 		m->m_next = m_copy(m0, off, len);
794 		if (m->m_next == 0) {
795 			ipstat.ips_odropped++;
796 			error = ENOBUFS;
797 			goto sendorfree;
798 		}
799 		m->m_pkthdr.len = mhlen + len;
800 		m->m_pkthdr.rcvif = (struct ifnet *)0;
801 		mhip->ip_off = htons((u_int16_t)mhip->ip_off);
802 		mhip->ip_sum = 0;
803 		if ((ifp != NULL) &&
804 		    (ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
805 		    (ifp->if_bridgeport == NULL))
806 			m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
807 		else {
808 			ipstat.ips_outswcsum++;
809 			mhip->ip_sum = in_cksum(m, mhlen);
810 		}
811 		ipstat.ips_ofragments++;
812 		fragments++;
813 	}
814 	/*
815 	 * Update first fragment by trimming what's been copied out
816 	 * and updating header, then send each fragment (in order).
817 	 */
818 	m = m0;
819 	m_adj(m, hlen + firstlen - ntohs(ip->ip_len));
820 	m->m_pkthdr.len = hlen + firstlen;
821 	ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
822 	ip->ip_off |= htons(IP_MF);
823 	ip->ip_sum = 0;
824 	if ((ifp != NULL) &&
825 	    (ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
826 	    (ifp->if_bridgeport == NULL))
827 		m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
828 	else {
829 		ipstat.ips_outswcsum++;
830 		ip->ip_sum = in_cksum(m, hlen);
831 	}
832 sendorfree:
833 	if (error) {
834 		for (m = m0; m; m = m0) {
835 			m0 = m->m_nextpkt;
836 			m->m_nextpkt = NULL;
837 			m_freem(m);
838 		}
839 	}
840 
841 	return (error);
842 }
843 
844 /*
845  * Insert IP options into preformed packet.
846  * Adjust IP destination as required for IP source routing,
847  * as indicated by a non-zero in_addr at the start of the options.
848  */
849 struct mbuf *
850 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
851 {
852 	struct ipoption *p = mtod(opt, struct ipoption *);
853 	struct mbuf *n;
854 	struct ip *ip = mtod(m, struct ip *);
855 	unsigned int optlen;
856 
857 	optlen = opt->m_len - sizeof(p->ipopt_dst);
858 	if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
859 		return (m);		/* XXX should fail */
860 	if (p->ipopt_dst.s_addr)
861 		ip->ip_dst = p->ipopt_dst;
862 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
863 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
864 		if (n == 0)
865 			return (m);
866 		M_MOVE_HDR(n, m);
867 		n->m_pkthdr.len += optlen;
868 		m->m_len -= sizeof(struct ip);
869 		m->m_data += sizeof(struct ip);
870 		n->m_next = m;
871 		m = n;
872 		m->m_len = optlen + sizeof(struct ip);
873 		m->m_data += max_linkhdr;
874 		bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
875 	} else {
876 		m->m_data -= optlen;
877 		m->m_len += optlen;
878 		m->m_pkthdr.len += optlen;
879 		memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip));
880 	}
881 	ip = mtod(m, struct ip *);
882 	bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), optlen);
883 	*phlen = sizeof(struct ip) + optlen;
884 	ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
885 	return (m);
886 }
887 
888 /*
889  * Copy options from ip to jp,
890  * omitting those not copied during fragmentation.
891  */
892 int
893 ip_optcopy(struct ip *ip, struct ip *jp)
894 {
895 	u_char *cp, *dp;
896 	int opt, optlen, cnt;
897 
898 	cp = (u_char *)(ip + 1);
899 	dp = (u_char *)(jp + 1);
900 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
901 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
902 		opt = cp[0];
903 		if (opt == IPOPT_EOL)
904 			break;
905 		if (opt == IPOPT_NOP) {
906 			/* Preserve for IP mcast tunnel's LSRR alignment. */
907 			*dp++ = IPOPT_NOP;
908 			optlen = 1;
909 			continue;
910 		}
911 #ifdef DIAGNOSTIC
912 		if (cnt < IPOPT_OLEN + sizeof(*cp))
913 			panic("malformed IPv4 option passed to ip_optcopy");
914 #endif
915 		optlen = cp[IPOPT_OLEN];
916 #ifdef DIAGNOSTIC
917 		if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
918 			panic("malformed IPv4 option passed to ip_optcopy");
919 #endif
920 		/* bogus lengths should have been caught by ip_dooptions */
921 		if (optlen > cnt)
922 			optlen = cnt;
923 		if (IPOPT_COPIED(opt)) {
924 			bcopy((caddr_t)cp, (caddr_t)dp, optlen);
925 			dp += optlen;
926 		}
927 	}
928 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
929 		*dp++ = IPOPT_EOL;
930 	return (optlen);
931 }
932 
933 /*
934  * IP socket option processing.
935  */
936 int
937 ip_ctloutput(int op, struct socket *so, int level, int optname,
938     struct mbuf **mp)
939 {
940 	struct inpcb *inp = sotoinpcb(so);
941 	struct mbuf *m = *mp;
942 	int optval = 0;
943 	struct proc *p = curproc; /* XXX */
944 #ifdef IPSEC
945 	struct ipsec_ref *ipr;
946 	u_int16_t opt16val;
947 #endif
948 	int error = 0;
949 	u_int rtid = 0;
950 
951 	if (level != IPPROTO_IP) {
952 		error = EINVAL;
953 		if (op == PRCO_SETOPT && *mp)
954 			(void) m_free(*mp);
955 	} else switch (op) {
956 	case PRCO_SETOPT:
957 		switch (optname) {
958 		case IP_OPTIONS:
959 			return (ip_pcbopts(&inp->inp_options, m));
960 
961 		case IP_TOS:
962 		case IP_TTL:
963 		case IP_MINTTL:
964 		case IP_RECVOPTS:
965 		case IP_RECVRETOPTS:
966 		case IP_RECVDSTADDR:
967 		case IP_RECVIF:
968 		case IP_RECVTTL:
969 		case IP_RECVDSTPORT:
970 		case IP_RECVRTABLE:
971 		case IP_IPSECFLOWINFO:
972 			if (m == NULL || m->m_len != sizeof(int))
973 				error = EINVAL;
974 			else {
975 				optval = *mtod(m, int *);
976 				switch (optname) {
977 
978 				case IP_TOS:
979 					inp->inp_ip.ip_tos = optval;
980 					break;
981 
982 				case IP_TTL:
983 					if (optval > 0 && optval <= MAXTTL)
984 						inp->inp_ip.ip_ttl = optval;
985 					else
986 						error = EINVAL;
987 					break;
988 
989 				case IP_MINTTL:
990 					if (optval > 0 && optval <= MAXTTL)
991 						inp->inp_ip_minttl = optval;
992 					else
993 						error = EINVAL;
994 					break;
995 #define	OPTSET(bit) \
996 	if (optval) \
997 		inp->inp_flags |= bit; \
998 	else \
999 		inp->inp_flags &= ~bit;
1000 
1001 				case IP_RECVOPTS:
1002 					OPTSET(INP_RECVOPTS);
1003 					break;
1004 
1005 				case IP_RECVRETOPTS:
1006 					OPTSET(INP_RECVRETOPTS);
1007 					break;
1008 
1009 				case IP_RECVDSTADDR:
1010 					OPTSET(INP_RECVDSTADDR);
1011 					break;
1012 				case IP_RECVIF:
1013 					OPTSET(INP_RECVIF);
1014 					break;
1015 				case IP_RECVTTL:
1016 					OPTSET(INP_RECVTTL);
1017 					break;
1018 				case IP_RECVDSTPORT:
1019 					OPTSET(INP_RECVDSTPORT);
1020 					break;
1021 				case IP_RECVRTABLE:
1022 					OPTSET(INP_RECVRTABLE);
1023 					break;
1024 				case IP_IPSECFLOWINFO:
1025 					OPTSET(INP_IPSECFLOWINFO);
1026 					break;
1027 				}
1028 			}
1029 			break;
1030 #undef OPTSET
1031 
1032 		case IP_MULTICAST_IF:
1033 		case IP_MULTICAST_TTL:
1034 		case IP_MULTICAST_LOOP:
1035 		case IP_ADD_MEMBERSHIP:
1036 		case IP_DROP_MEMBERSHIP:
1037 			error = ip_setmoptions(optname, &inp->inp_moptions, m,
1038 			    inp->inp_rtableid);
1039 			break;
1040 
1041 		case IP_PORTRANGE:
1042 			if (m == 0 || m->m_len != sizeof(int))
1043 				error = EINVAL;
1044 			else {
1045 				optval = *mtod(m, int *);
1046 
1047 				switch (optval) {
1048 
1049 				case IP_PORTRANGE_DEFAULT:
1050 					inp->inp_flags &= ~(INP_LOWPORT);
1051 					inp->inp_flags &= ~(INP_HIGHPORT);
1052 					break;
1053 
1054 				case IP_PORTRANGE_HIGH:
1055 					inp->inp_flags &= ~(INP_LOWPORT);
1056 					inp->inp_flags |= INP_HIGHPORT;
1057 					break;
1058 
1059 				case IP_PORTRANGE_LOW:
1060 					inp->inp_flags &= ~(INP_HIGHPORT);
1061 					inp->inp_flags |= INP_LOWPORT;
1062 					break;
1063 
1064 				default:
1065 
1066 					error = EINVAL;
1067 					break;
1068 				}
1069 			}
1070 			break;
1071 		case IP_AUTH_LEVEL:
1072 		case IP_ESP_TRANS_LEVEL:
1073 		case IP_ESP_NETWORK_LEVEL:
1074 		case IP_IPCOMP_LEVEL:
1075 #ifndef IPSEC
1076 			error = EOPNOTSUPP;
1077 #else
1078 			if (m == 0 || m->m_len != sizeof(int)) {
1079 				error = EINVAL;
1080 				break;
1081 			}
1082 			optval = *mtod(m, int *);
1083 
1084 			if (optval < IPSEC_LEVEL_BYPASS ||
1085 			    optval > IPSEC_LEVEL_UNIQUE) {
1086 				error = EINVAL;
1087 				break;
1088 			}
1089 
1090 			/* Unlink cached output TDB to force a re-search */
1091 			if (inp->inp_tdb_out) {
1092 				int s = splsoftnet();
1093 				TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out,
1094 				    inp, inp_tdb_out_next);
1095 				splx(s);
1096 			}
1097 
1098 			if (inp->inp_tdb_in) {
1099 				int s = splsoftnet();
1100 				TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
1101 				    inp, inp_tdb_in_next);
1102 				splx(s);
1103 			}
1104 
1105 			switch (optname) {
1106 			case IP_AUTH_LEVEL:
1107 				if (optval < IPSEC_AUTH_LEVEL_DEFAULT &&
1108 				    suser(p, 0)) {
1109 					error = EACCES;
1110 					break;
1111 				}
1112 				inp->inp_seclevel[SL_AUTH] = optval;
1113 				break;
1114 
1115 			case IP_ESP_TRANS_LEVEL:
1116 				if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT &&
1117 				    suser(p, 0)) {
1118 					error = EACCES;
1119 					break;
1120 				}
1121 				inp->inp_seclevel[SL_ESP_TRANS] = optval;
1122 				break;
1123 
1124 			case IP_ESP_NETWORK_LEVEL:
1125 				if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT &&
1126 				    suser(p, 0)) {
1127 					error = EACCES;
1128 					break;
1129 				}
1130 				inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1131 				break;
1132 			case IP_IPCOMP_LEVEL:
1133 				if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT &&
1134 				    suser(p, 0)) {
1135 					error = EACCES;
1136 					break;
1137 				}
1138 				inp->inp_seclevel[SL_IPCOMP] = optval;
1139 				break;
1140 			}
1141 			if (!error)
1142 				inp->inp_secrequire = get_sa_require(inp);
1143 #endif
1144 			break;
1145 
1146 		case IP_IPSEC_REMOTE_CRED:
1147 		case IP_IPSEC_REMOTE_AUTH:
1148 			/* Can't set the remote credential or key */
1149 			error = EOPNOTSUPP;
1150 			break;
1151 
1152 		case IP_IPSEC_LOCAL_ID:
1153 		case IP_IPSEC_REMOTE_ID:
1154 		case IP_IPSEC_LOCAL_CRED:
1155 		case IP_IPSEC_LOCAL_AUTH:
1156 #ifndef IPSEC
1157 			error = EOPNOTSUPP;
1158 #else
1159 			if (m == NULL || m->m_len < 2) {
1160 				error = EINVAL;
1161 				break;
1162 			}
1163 
1164 			m_copydata(m, 0, 2, (caddr_t) &opt16val);
1165 
1166 			/* If the type is 0, then we cleanup and return */
1167 			if (opt16val == 0) {
1168 				switch (optname) {
1169 				case IP_IPSEC_LOCAL_ID:
1170 					if (inp->inp_ipo != NULL &&
1171 					    inp->inp_ipo->ipo_srcid != NULL) {
1172 						ipsp_reffree(inp->inp_ipo->ipo_srcid);
1173 						inp->inp_ipo->ipo_srcid = NULL;
1174 					}
1175 					break;
1176 
1177 				case IP_IPSEC_REMOTE_ID:
1178 					if (inp->inp_ipo != NULL &&
1179 					    inp->inp_ipo->ipo_dstid != NULL) {
1180 						ipsp_reffree(inp->inp_ipo->ipo_dstid);
1181 						inp->inp_ipo->ipo_dstid = NULL;
1182 					}
1183 					break;
1184 
1185 				case IP_IPSEC_LOCAL_CRED:
1186 					if (inp->inp_ipo != NULL &&
1187 					    inp->inp_ipo->ipo_local_cred != NULL) {
1188 						ipsp_reffree(inp->inp_ipo->ipo_local_cred);
1189 						inp->inp_ipo->ipo_local_cred = NULL;
1190 					}
1191 					break;
1192 
1193 				case IP_IPSEC_LOCAL_AUTH:
1194 					if (inp->inp_ipo != NULL &&
1195 					    inp->inp_ipo->ipo_local_auth != NULL) {
1196 						ipsp_reffree(inp->inp_ipo->ipo_local_auth);
1197 						inp->inp_ipo->ipo_local_auth = NULL;
1198 					}
1199 					break;
1200 				}
1201 
1202 				error = 0;
1203 				break;
1204 			}
1205 
1206 			/* Can't have an empty payload */
1207 			if (m->m_len == 2) {
1208 				error = EINVAL;
1209 				break;
1210 			}
1211 
1212 			/* Allocate if needed */
1213 			if (inp->inp_ipo == NULL) {
1214 				inp->inp_ipo = ipsec_add_policy(inp,
1215 				    AF_INET, IPSP_DIRECTION_OUT);
1216 				if (inp->inp_ipo == NULL) {
1217 					error = ENOBUFS;
1218 					break;
1219 				}
1220 			}
1221 
1222 			ipr = malloc(sizeof(struct ipsec_ref) + m->m_len - 2,
1223 			       M_CREDENTIALS, M_NOWAIT);
1224 			if (ipr == NULL) {
1225 				error = ENOBUFS;
1226 				break;
1227 			}
1228 
1229 			ipr->ref_count = 1;
1230 			ipr->ref_malloctype = M_CREDENTIALS;
1231 			ipr->ref_len = m->m_len - 2;
1232 			ipr->ref_type = opt16val;
1233 			m_copydata(m, 2, m->m_len - 2, (caddr_t)(ipr + 1));
1234 
1235 			switch (optname) {
1236 			case IP_IPSEC_LOCAL_ID:
1237 				/* Check valid types and NUL-termination */
1238 				if (ipr->ref_type < IPSP_IDENTITY_PREFIX ||
1239 				    ipr->ref_type > IPSP_IDENTITY_CONNECTION ||
1240 				    ((char *)(ipr + 1))[ipr->ref_len - 1]) {
1241 					free(ipr, M_CREDENTIALS, 0);
1242 					error = EINVAL;
1243 				} else {
1244 					if (inp->inp_ipo->ipo_srcid != NULL)
1245 						ipsp_reffree(inp->inp_ipo->ipo_srcid);
1246 					inp->inp_ipo->ipo_srcid = ipr;
1247 				}
1248 				break;
1249 			case IP_IPSEC_REMOTE_ID:
1250 				/* Check valid types and NUL-termination */
1251 				if (ipr->ref_type < IPSP_IDENTITY_PREFIX ||
1252 				    ipr->ref_type > IPSP_IDENTITY_CONNECTION ||
1253 				    ((char *)(ipr + 1))[ipr->ref_len - 1]) {
1254 					free(ipr, M_CREDENTIALS, 0);
1255 					error = EINVAL;
1256 				} else {
1257 					if (inp->inp_ipo->ipo_dstid != NULL)
1258 						ipsp_reffree(inp->inp_ipo->ipo_dstid);
1259 					inp->inp_ipo->ipo_dstid = ipr;
1260 				}
1261 				break;
1262 			case IP_IPSEC_LOCAL_CRED:
1263 				if (ipr->ref_type < IPSP_CRED_KEYNOTE ||
1264 				    ipr->ref_type > IPSP_CRED_X509) {
1265 					free(ipr, M_CREDENTIALS, 0);
1266 					error = EINVAL;
1267 				} else {
1268 					if (inp->inp_ipo->ipo_local_cred != NULL)
1269 						ipsp_reffree(inp->inp_ipo->ipo_local_cred);
1270 					inp->inp_ipo->ipo_local_cred = ipr;
1271 				}
1272 				break;
1273 			case IP_IPSEC_LOCAL_AUTH:
1274 				if (ipr->ref_type < IPSP_AUTH_PASSPHRASE ||
1275 				    ipr->ref_type > IPSP_AUTH_RSA) {
1276 					free(ipr, M_CREDENTIALS, 0);
1277 					error = EINVAL;
1278 				} else {
1279 					if (inp->inp_ipo->ipo_local_auth != NULL)
1280 						ipsp_reffree(inp->inp_ipo->ipo_local_auth);
1281 					inp->inp_ipo->ipo_local_auth = ipr;
1282 				}
1283 				break;
1284 			}
1285 
1286 			/* Unlink cached output TDB to force a re-search */
1287 			if (inp->inp_tdb_out) {
1288 				int s = splsoftnet();
1289 				TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out,
1290 				    inp, inp_tdb_out_next);
1291 				splx(s);
1292 			}
1293 
1294 			if (inp->inp_tdb_in) {
1295 				int s = splsoftnet();
1296 				TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
1297 				    inp, inp_tdb_in_next);
1298 				splx(s);
1299 			}
1300 #endif
1301 			break;
1302 		case SO_RTABLE:
1303 			if (m == NULL || m->m_len < sizeof(u_int)) {
1304 				error = EINVAL;
1305 				break;
1306 			}
1307 			rtid = *mtod(m, u_int *);
1308 			if (inp->inp_rtableid == rtid)
1309 				break;
1310 			/* needs priviledges to switch when already set */
1311 			if (p->p_p->ps_rtableid != rtid &&
1312 			    p->p_p->ps_rtableid != 0 &&
1313 			    (error = suser(p, 0)) != 0)
1314 				break;
1315 			/* table must exist */
1316 			if (!rtable_exists(rtid)) {
1317 				error = EINVAL;
1318 				break;
1319 			}
1320 			inp->inp_rtableid = rtid;
1321 			break;
1322 		case IP_PIPEX:
1323 			if (m != NULL && m->m_len == sizeof(int))
1324 				inp->inp_pipex = *mtod(m, int *);
1325 			else
1326 				error = EINVAL;
1327 			break;
1328 
1329 		default:
1330 			error = ENOPROTOOPT;
1331 			break;
1332 		}
1333 		if (m)
1334 			(void)m_free(m);
1335 		break;
1336 
1337 	case PRCO_GETOPT:
1338 		switch (optname) {
1339 		case IP_OPTIONS:
1340 		case IP_RETOPTS:
1341 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1342 			if (inp->inp_options) {
1343 				m->m_len = inp->inp_options->m_len;
1344 				bcopy(mtod(inp->inp_options, caddr_t),
1345 				    mtod(m, caddr_t), m->m_len);
1346 			} else
1347 				m->m_len = 0;
1348 			break;
1349 
1350 		case IP_TOS:
1351 		case IP_TTL:
1352 		case IP_MINTTL:
1353 		case IP_RECVOPTS:
1354 		case IP_RECVRETOPTS:
1355 		case IP_RECVDSTADDR:
1356 		case IP_RECVIF:
1357 		case IP_RECVTTL:
1358 		case IP_RECVDSTPORT:
1359 		case IP_RECVRTABLE:
1360 		case IP_IPSECFLOWINFO:
1361 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1362 			m->m_len = sizeof(int);
1363 			switch (optname) {
1364 
1365 			case IP_TOS:
1366 				optval = inp->inp_ip.ip_tos;
1367 				break;
1368 
1369 			case IP_TTL:
1370 				optval = inp->inp_ip.ip_ttl;
1371 				break;
1372 
1373 			case IP_MINTTL:
1374 				optval = inp->inp_ip_minttl;
1375 				break;
1376 
1377 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
1378 
1379 			case IP_RECVOPTS:
1380 				optval = OPTBIT(INP_RECVOPTS);
1381 				break;
1382 
1383 			case IP_RECVRETOPTS:
1384 				optval = OPTBIT(INP_RECVRETOPTS);
1385 				break;
1386 
1387 			case IP_RECVDSTADDR:
1388 				optval = OPTBIT(INP_RECVDSTADDR);
1389 				break;
1390 			case IP_RECVIF:
1391 				optval = OPTBIT(INP_RECVIF);
1392 				break;
1393 			case IP_RECVTTL:
1394 				optval = OPTBIT(INP_RECVTTL);
1395 				break;
1396 			case IP_RECVDSTPORT:
1397 				optval = OPTBIT(INP_RECVDSTPORT);
1398 				break;
1399 			case IP_RECVRTABLE:
1400 				optval = OPTBIT(INP_RECVRTABLE);
1401 				break;
1402 			case IP_IPSECFLOWINFO:
1403 				optval = OPTBIT(INP_IPSECFLOWINFO);
1404 				break;
1405 			}
1406 			*mtod(m, int *) = optval;
1407 			break;
1408 
1409 		case IP_MULTICAST_IF:
1410 		case IP_MULTICAST_TTL:
1411 		case IP_MULTICAST_LOOP:
1412 		case IP_ADD_MEMBERSHIP:
1413 		case IP_DROP_MEMBERSHIP:
1414 			error = ip_getmoptions(optname, inp->inp_moptions, mp);
1415 			break;
1416 
1417 		case IP_PORTRANGE:
1418 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1419 			m->m_len = sizeof(int);
1420 
1421 			if (inp->inp_flags & INP_HIGHPORT)
1422 				optval = IP_PORTRANGE_HIGH;
1423 			else if (inp->inp_flags & INP_LOWPORT)
1424 				optval = IP_PORTRANGE_LOW;
1425 			else
1426 				optval = 0;
1427 
1428 			*mtod(m, int *) = optval;
1429 			break;
1430 
1431 		case IP_AUTH_LEVEL:
1432 		case IP_ESP_TRANS_LEVEL:
1433 		case IP_ESP_NETWORK_LEVEL:
1434 		case IP_IPCOMP_LEVEL:
1435 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1436 #ifndef IPSEC
1437 			m->m_len = sizeof(int);
1438 			*mtod(m, int *) = IPSEC_LEVEL_NONE;
1439 #else
1440 			m->m_len = sizeof(int);
1441 			switch (optname) {
1442 			case IP_AUTH_LEVEL:
1443 				optval = inp->inp_seclevel[SL_AUTH];
1444 				break;
1445 
1446 			case IP_ESP_TRANS_LEVEL:
1447 				optval = inp->inp_seclevel[SL_ESP_TRANS];
1448 				break;
1449 
1450 			case IP_ESP_NETWORK_LEVEL:
1451 				optval = inp->inp_seclevel[SL_ESP_NETWORK];
1452 				break;
1453 			case IP_IPCOMP_LEVEL:
1454 				optval = inp->inp_seclevel[SL_IPCOMP];
1455 				break;
1456 			}
1457 			*mtod(m, int *) = optval;
1458 #endif
1459 			break;
1460 		case IP_IPSEC_LOCAL_ID:
1461 		case IP_IPSEC_REMOTE_ID:
1462 		case IP_IPSEC_LOCAL_CRED:
1463 		case IP_IPSEC_REMOTE_CRED:
1464 		case IP_IPSEC_LOCAL_AUTH:
1465 		case IP_IPSEC_REMOTE_AUTH:
1466 #ifndef IPSEC
1467 			error = EOPNOTSUPP;
1468 #else
1469 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1470 			m->m_len = sizeof(u_int16_t);
1471 			ipr = NULL;
1472 			switch (optname) {
1473 			case IP_IPSEC_LOCAL_ID:
1474 				if (inp->inp_ipo != NULL)
1475 					ipr = inp->inp_ipo->ipo_srcid;
1476 				opt16val = IPSP_IDENTITY_NONE;
1477 				break;
1478 			case IP_IPSEC_REMOTE_ID:
1479 				if (inp->inp_ipo != NULL)
1480 					ipr = inp->inp_ipo->ipo_dstid;
1481 				opt16val = IPSP_IDENTITY_NONE;
1482 				break;
1483 			case IP_IPSEC_LOCAL_CRED:
1484 				if (inp->inp_ipo != NULL)
1485 					ipr = inp->inp_ipo->ipo_local_cred;
1486 				opt16val = IPSP_CRED_NONE;
1487 				break;
1488 			case IP_IPSEC_REMOTE_CRED:
1489 				ipr = inp->inp_ipsec_remotecred;
1490 				opt16val = IPSP_CRED_NONE;
1491 				break;
1492 			case IP_IPSEC_LOCAL_AUTH:
1493 				if (inp->inp_ipo != NULL)
1494 					ipr = inp->inp_ipo->ipo_local_auth;
1495 				opt16val = IPSP_AUTH_NONE;
1496 				break;
1497 			case IP_IPSEC_REMOTE_AUTH:
1498 				ipr = inp->inp_ipsec_remoteauth;
1499 				opt16val = IPSP_AUTH_NONE;
1500 				break;
1501 			}
1502 			if (ipr == NULL)
1503 				*mtod(m, u_int16_t *) = opt16val;
1504 			else {
1505 				size_t len;
1506 
1507 				len = m->m_len + ipr->ref_len;
1508 				if (len > MCLBYTES) {
1509 					 m_free(m);
1510 					 error = EINVAL;
1511 					 break;
1512 				}
1513 				/* allocate mbuf cluster for larger option */
1514 				if (len > MLEN) {
1515 					 MCLGET(m, M_WAITOK);
1516 					 if ((m->m_flags & M_EXT) == 0) {
1517 						 m_free(m);
1518 						 error = ENOBUFS;
1519 						 break;
1520 					 }
1521 
1522 				}
1523 				m->m_len = len;
1524 				*mtod(m, u_int16_t *) = ipr->ref_type;
1525 				m_copyback(m, sizeof(u_int16_t), ipr->ref_len,
1526 				    ipr + 1, M_NOWAIT);
1527 			}
1528 #endif
1529 			break;
1530 		case SO_RTABLE:
1531 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1532 			m->m_len = sizeof(u_int);
1533 			*mtod(m, u_int *) = inp->inp_rtableid;
1534 			break;
1535 		case IP_PIPEX:
1536 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1537 			m->m_len = sizeof(int);
1538 			*mtod(m, int *) = inp->inp_pipex;
1539 			break;
1540 		default:
1541 			error = ENOPROTOOPT;
1542 			break;
1543 		}
1544 		break;
1545 	}
1546 	return (error);
1547 }
1548 
1549 /*
1550  * Set up IP options in pcb for insertion in output packets.
1551  * Store in mbuf with pointer in pcbopt, adding pseudo-option
1552  * with destination address if source routed.
1553  */
1554 int
1555 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m)
1556 {
1557 	int cnt, optlen;
1558 	u_char *cp;
1559 	u_char opt;
1560 
1561 	/* turn off any old options */
1562 	if (*pcbopt)
1563 		(void)m_free(*pcbopt);
1564 	*pcbopt = 0;
1565 	if (m == (struct mbuf *)0 || m->m_len == 0) {
1566 		/*
1567 		 * Only turning off any previous options.
1568 		 */
1569 		if (m)
1570 			(void)m_free(m);
1571 		return (0);
1572 	}
1573 
1574 	if (m->m_len % sizeof(int32_t))
1575 		goto bad;
1576 
1577 	/*
1578 	 * IP first-hop destination address will be stored before
1579 	 * actual options; move other options back
1580 	 * and clear it when none present.
1581 	 */
1582 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1583 		goto bad;
1584 	cnt = m->m_len;
1585 	m->m_len += sizeof(struct in_addr);
1586 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
1587 	memmove((caddr_t)cp, mtod(m, caddr_t), (unsigned)cnt);
1588 	memset(mtod(m, caddr_t), 0, sizeof(struct in_addr));
1589 
1590 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1591 		opt = cp[IPOPT_OPTVAL];
1592 		if (opt == IPOPT_EOL)
1593 			break;
1594 		if (opt == IPOPT_NOP)
1595 			optlen = 1;
1596 		else {
1597 			if (cnt < IPOPT_OLEN + sizeof(*cp))
1598 				goto bad;
1599 			optlen = cp[IPOPT_OLEN];
1600 			if (optlen < IPOPT_OLEN  + sizeof(*cp) || optlen > cnt)
1601 				goto bad;
1602 		}
1603 		switch (opt) {
1604 
1605 		default:
1606 			break;
1607 
1608 		case IPOPT_LSRR:
1609 		case IPOPT_SSRR:
1610 			/*
1611 			 * user process specifies route as:
1612 			 *	->A->B->C->D
1613 			 * D must be our final destination (but we can't
1614 			 * check that since we may not have connected yet).
1615 			 * A is first hop destination, which doesn't appear in
1616 			 * actual IP option, but is stored before the options.
1617 			 */
1618 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1619 				goto bad;
1620 			m->m_len -= sizeof(struct in_addr);
1621 			cnt -= sizeof(struct in_addr);
1622 			optlen -= sizeof(struct in_addr);
1623 			cp[IPOPT_OLEN] = optlen;
1624 			/*
1625 			 * Move first hop before start of options.
1626 			 */
1627 			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1628 			    sizeof(struct in_addr));
1629 			/*
1630 			 * Then copy rest of options back
1631 			 * to close up the deleted entry.
1632 			 */
1633 			memmove((caddr_t)&cp[IPOPT_OFFSET+1],
1634 			    (caddr_t)(&cp[IPOPT_OFFSET+1] +
1635 			    sizeof(struct in_addr)),
1636 			    (unsigned)cnt - (IPOPT_OFFSET+1));
1637 			break;
1638 		}
1639 	}
1640 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1641 		goto bad;
1642 	*pcbopt = m;
1643 	return (0);
1644 
1645 bad:
1646 	(void)m_free(m);
1647 	return (EINVAL);
1648 }
1649 
1650 /*
1651  * Set the IP multicast options in response to user setsockopt().
1652  */
1653 int
1654 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m,
1655     u_int rtableid)
1656 {
1657 	int error = 0;
1658 	u_char loop;
1659 	int i;
1660 	struct in_addr addr;
1661 	struct in_ifaddr *ia;
1662 	struct ip_mreq *mreq;
1663 	struct ifnet *ifp = NULL;
1664 	struct ip_moptions *imo = *imop;
1665 	struct in_multi **immp;
1666 	struct route ro;
1667 	struct sockaddr_in *dst;
1668 
1669 	if (imo == NULL) {
1670 		/*
1671 		 * No multicast option buffer attached to the pcb;
1672 		 * allocate one and initialize to default values.
1673 		 */
1674 		imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1675 		    M_WAITOK|M_ZERO);
1676 		immp = (struct in_multi **)malloc(
1677 		    (sizeof(*immp) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
1678 		    M_WAITOK|M_ZERO);
1679 		*imop = imo;
1680 		imo->imo_multicast_ifp = NULL;
1681 		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1682 		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1683 		imo->imo_num_memberships = 0;
1684 		imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1685 		imo->imo_membership = immp;
1686 	}
1687 
1688 	switch (optname) {
1689 
1690 	case IP_MULTICAST_IF:
1691 		/*
1692 		 * Select the interface for outgoing multicast packets.
1693 		 */
1694 		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1695 			error = EINVAL;
1696 			break;
1697 		}
1698 		addr = *(mtod(m, struct in_addr *));
1699 		/*
1700 		 * INADDR_ANY is used to remove a previous selection.
1701 		 * When no interface is selected, a default one is
1702 		 * chosen every time a multicast packet is sent.
1703 		 */
1704 		if (addr.s_addr == INADDR_ANY) {
1705 			imo->imo_multicast_ifp = NULL;
1706 			break;
1707 		}
1708 		/*
1709 		 * The selected interface is identified by its local
1710 		 * IP address.  Find the interface and confirm that
1711 		 * it supports multicasting.
1712 		 */
1713 		ia = in_iawithaddr(addr, rtableid);
1714 		if (ia)
1715 			ifp = ia->ia_ifp;
1716 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1717 			error = EADDRNOTAVAIL;
1718 			break;
1719 		}
1720 		imo->imo_multicast_ifp = ifp;
1721 		break;
1722 
1723 	case IP_MULTICAST_TTL:
1724 		/*
1725 		 * Set the IP time-to-live for outgoing multicast packets.
1726 		 */
1727 		if (m == NULL || m->m_len != 1) {
1728 			error = EINVAL;
1729 			break;
1730 		}
1731 		imo->imo_multicast_ttl = *(mtod(m, u_char *));
1732 		break;
1733 
1734 	case IP_MULTICAST_LOOP:
1735 		/*
1736 		 * Set the loopback flag for outgoing multicast packets.
1737 		 * Must be zero or one.
1738 		 */
1739 		if (m == NULL || m->m_len != 1 ||
1740 		   (loop = *(mtod(m, u_char *))) > 1) {
1741 			error = EINVAL;
1742 			break;
1743 		}
1744 		imo->imo_multicast_loop = loop;
1745 		break;
1746 
1747 	case IP_ADD_MEMBERSHIP:
1748 		/*
1749 		 * Add a multicast group membership.
1750 		 * Group must be a valid IP multicast address.
1751 		 */
1752 		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1753 			error = EINVAL;
1754 			break;
1755 		}
1756 		mreq = mtod(m, struct ip_mreq *);
1757 		if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1758 			error = EINVAL;
1759 			break;
1760 		}
1761 		/*
1762 		 * If no interface address was provided, use the interface of
1763 		 * the route to the given multicast address.
1764 		 */
1765 		if (mreq->imr_interface.s_addr == INADDR_ANY) {
1766 			ro.ro_rt = NULL;
1767 			dst = satosin(&ro.ro_dst);
1768 			dst->sin_len = sizeof(*dst);
1769 			dst->sin_family = AF_INET;
1770 			dst->sin_addr = mreq->imr_multiaddr;
1771 			if (!(ro.ro_rt && ro.ro_rt->rt_ifp &&
1772 			    (ro.ro_rt->rt_flags & RTF_UP)))
1773 				ro.ro_rt = rtalloc1(&ro.ro_dst, RT_REPORT,
1774 				    rtableid);
1775 			if (ro.ro_rt == NULL) {
1776 				error = EADDRNOTAVAIL;
1777 				break;
1778 			}
1779 			ifp = ro.ro_rt->rt_ifp;
1780 			rtfree(ro.ro_rt);
1781 		} else {
1782 			ia = in_iawithaddr(mreq->imr_interface, rtableid);
1783 			if (ia)
1784 				ifp = ia->ia_ifp;
1785 		}
1786 		/*
1787 		 * See if we found an interface, and confirm that it
1788 		 * supports multicast.
1789 		 */
1790 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1791 			error = EADDRNOTAVAIL;
1792 			break;
1793 		}
1794 		/*
1795 		 * See if the membership already exists or if all the
1796 		 * membership slots are full.
1797 		 */
1798 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1799 			if (imo->imo_membership[i]->inm_ifidx
1800 						== ifp->if_index &&
1801 			    imo->imo_membership[i]->inm_addr.s_addr
1802 						== mreq->imr_multiaddr.s_addr)
1803 				break;
1804 		}
1805 		if (i < imo->imo_num_memberships) {
1806 			error = EADDRINUSE;
1807 			break;
1808 		}
1809 		if (imo->imo_num_memberships == imo->imo_max_memberships) {
1810 			struct in_multi **nmships, **omships;
1811 			size_t newmax;
1812 			/*
1813 			 * Resize the vector to next power-of-two minus 1. If the
1814 			 * size would exceed the maximum then we know we've really
1815 			 * run out of entries. Otherwise, we reallocate the vector.
1816 			 */
1817 			nmships = NULL;
1818 			omships = imo->imo_membership;
1819 			newmax = ((imo->imo_max_memberships + 1) * 2) - 1;
1820 			if (newmax <= IP_MAX_MEMBERSHIPS) {
1821 				nmships = (struct in_multi **)malloc(
1822 				    sizeof(*nmships) * newmax, M_IPMOPTS,
1823 				    M_NOWAIT|M_ZERO);
1824 				if (nmships != NULL) {
1825 					bcopy(omships, nmships,
1826 					    sizeof(*omships) *
1827 					    imo->imo_max_memberships);
1828 					free(omships, M_IPMOPTS, 0);
1829 					imo->imo_membership = nmships;
1830 					imo->imo_max_memberships = newmax;
1831 				}
1832 			}
1833 			if (nmships == NULL) {
1834 				error = ETOOMANYREFS;
1835 				break;
1836 			}
1837 		}
1838 		/*
1839 		 * Everything looks good; add a new record to the multicast
1840 		 * address list for the given interface.
1841 		 */
1842 		if ((imo->imo_membership[i] =
1843 		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1844 			error = ENOBUFS;
1845 			break;
1846 		}
1847 		++imo->imo_num_memberships;
1848 		break;
1849 
1850 	case IP_DROP_MEMBERSHIP:
1851 		/*
1852 		 * Drop a multicast group membership.
1853 		 * Group must be a valid IP multicast address.
1854 		 */
1855 		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1856 			error = EINVAL;
1857 			break;
1858 		}
1859 		mreq = mtod(m, struct ip_mreq *);
1860 		if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1861 			error = EINVAL;
1862 			break;
1863 		}
1864 		/*
1865 		 * If an interface address was specified, get a pointer
1866 		 * to its ifnet structure.
1867 		 */
1868 		if (mreq->imr_interface.s_addr == INADDR_ANY)
1869 			ifp = NULL;
1870 		else {
1871 			ia = in_iawithaddr(mreq->imr_interface, rtableid);
1872 			if (ia == NULL) {
1873 				error = EADDRNOTAVAIL;
1874 				break;
1875 			}
1876 			ifp = ia->ia_ifp;
1877 		}
1878 		/*
1879 		 * Find the membership in the membership array.
1880 		 */
1881 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1882 			if ((ifp == NULL ||
1883 			    imo->imo_membership[i]->inm_ifidx ==
1884 			        ifp->if_index) &&
1885 			     imo->imo_membership[i]->inm_addr.s_addr ==
1886 			     mreq->imr_multiaddr.s_addr)
1887 				break;
1888 		}
1889 		if (i == imo->imo_num_memberships) {
1890 			error = EADDRNOTAVAIL;
1891 			break;
1892 		}
1893 		/*
1894 		 * Give up the multicast address record to which the
1895 		 * membership points.
1896 		 */
1897 		in_delmulti(imo->imo_membership[i]);
1898 		/*
1899 		 * Remove the gap in the membership array.
1900 		 */
1901 		for (++i; i < imo->imo_num_memberships; ++i)
1902 			imo->imo_membership[i-1] = imo->imo_membership[i];
1903 		--imo->imo_num_memberships;
1904 		break;
1905 
1906 	default:
1907 		error = EOPNOTSUPP;
1908 		break;
1909 	}
1910 
1911 	/*
1912 	 * If all options have default values, no need to keep the data.
1913 	 */
1914 	if (imo->imo_multicast_ifp == NULL &&
1915 	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1916 	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1917 	    imo->imo_num_memberships == 0) {
1918 		free(imo->imo_membership , M_IPMOPTS, 0);
1919 		free(*imop, M_IPMOPTS, 0);
1920 		*imop = NULL;
1921 	}
1922 
1923 	return (error);
1924 }
1925 
1926 /*
1927  * Return the IP multicast options in response to user getsockopt().
1928  */
1929 int
1930 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf **mp)
1931 {
1932 	u_char *ttl;
1933 	u_char *loop;
1934 	struct in_addr *addr;
1935 	struct in_ifaddr *ia;
1936 
1937 	*mp = m_get(M_WAIT, MT_SOOPTS);
1938 
1939 	switch (optname) {
1940 
1941 	case IP_MULTICAST_IF:
1942 		addr = mtod(*mp, struct in_addr *);
1943 		(*mp)->m_len = sizeof(struct in_addr);
1944 		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1945 			addr->s_addr = INADDR_ANY;
1946 		else {
1947 			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1948 			addr->s_addr = (ia == NULL) ? INADDR_ANY
1949 					: ia->ia_addr.sin_addr.s_addr;
1950 		}
1951 		return (0);
1952 
1953 	case IP_MULTICAST_TTL:
1954 		ttl = mtod(*mp, u_char *);
1955 		(*mp)->m_len = 1;
1956 		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1957 				     : imo->imo_multicast_ttl;
1958 		return (0);
1959 
1960 	case IP_MULTICAST_LOOP:
1961 		loop = mtod(*mp, u_char *);
1962 		(*mp)->m_len = 1;
1963 		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1964 				      : imo->imo_multicast_loop;
1965 		return (0);
1966 
1967 	default:
1968 		return (EOPNOTSUPP);
1969 	}
1970 }
1971 
1972 /*
1973  * Discard the IP multicast options.
1974  */
1975 void
1976 ip_freemoptions(struct ip_moptions *imo)
1977 {
1978 	int i;
1979 
1980 	if (imo != NULL) {
1981 		for (i = 0; i < imo->imo_num_memberships; ++i)
1982 			in_delmulti(imo->imo_membership[i]);
1983 		free(imo->imo_membership, M_IPMOPTS, 0);
1984 		free(imo, M_IPMOPTS, 0);
1985 	}
1986 }
1987 
1988 /*
1989  * Routine called from ip_output() to loop back a copy of an IP multicast
1990  * packet to the input queue of a specified interface.  Note that this
1991  * calls the output routine of the loopback "driver", but with an interface
1992  * pointer that might NOT be &loif -- easier than replicating that code here.
1993  */
1994 void
1995 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst)
1996 {
1997 	struct ip *ip;
1998 	struct mbuf *copym;
1999 
2000 	copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
2001 	if (copym != NULL) {
2002 		/*
2003 		 * We don't bother to fragment if the IP length is greater
2004 		 * than the interface's MTU.  Can this possibly matter?
2005 		 */
2006 		ip = mtod(copym, struct ip *);
2007 		ip->ip_sum = 0;
2008 		ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
2009 		(void) looutput(ifp, copym, sintosa(dst), NULL);
2010 	}
2011 }
2012 
2013 /*
2014  *	Compute significant parts of the IPv4 checksum pseudo-header
2015  *	for use in a delayed TCP/UDP checksum calculation.
2016  */
2017 static __inline u_int16_t __attribute__((__unused__))
2018 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto)
2019 {
2020 	u_int32_t sum;
2021 
2022 	sum = lenproto +
2023 	      (u_int16_t)(src >> 16) +
2024 	      (u_int16_t)(src /*& 0xffff*/) +
2025 	      (u_int16_t)(dst >> 16) +
2026 	      (u_int16_t)(dst /*& 0xffff*/);
2027 
2028 	sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/);
2029 
2030 	if (sum > 0xffff)
2031 		sum -= 0xffff;
2032 
2033 	return (sum);
2034 }
2035 
2036 /*
2037  * Process a delayed payload checksum calculation.
2038  */
2039 void
2040 in_delayed_cksum(struct mbuf *m)
2041 {
2042 	struct ip *ip;
2043 	u_int16_t csum, offset;
2044 
2045 	ip = mtod(m, struct ip *);
2046 	offset = ip->ip_hl << 2;
2047 	csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset);
2048 	if (csum == 0 && ip->ip_p == IPPROTO_UDP)
2049 		csum = 0xffff;
2050 
2051 	switch (ip->ip_p) {
2052 	case IPPROTO_TCP:
2053 		offset += offsetof(struct tcphdr, th_sum);
2054 		break;
2055 
2056 	case IPPROTO_UDP:
2057 		offset += offsetof(struct udphdr, uh_sum);
2058 		break;
2059 
2060 	case IPPROTO_ICMP:
2061 		offset += offsetof(struct icmp, icmp_cksum);
2062 		break;
2063 
2064 	default:
2065 		return;
2066 	}
2067 
2068 	if ((offset + sizeof(u_int16_t)) > m->m_len)
2069 		m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT);
2070 	else
2071 		*(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
2072 }
2073 
2074 void
2075 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp)
2076 {
2077 	/* some hw and in_delayed_cksum need the pseudo header cksum */
2078 	if (m->m_pkthdr.csum_flags &
2079 	    (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) {
2080 		struct ip *ip;
2081 		u_int16_t csum = 0, offset;
2082 
2083 		ip  = mtod(m, struct ip *);
2084 		offset = ip->ip_hl << 2;
2085 		if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
2086 			csum = in_cksum_phdr(ip->ip_src.s_addr,
2087 			    ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) -
2088 			    offset + ip->ip_p));
2089 		if (ip->ip_p == IPPROTO_TCP)
2090 			offset += offsetof(struct tcphdr, th_sum);
2091 		else if (ip->ip_p == IPPROTO_UDP)
2092 			offset += offsetof(struct udphdr, uh_sum);
2093 		else if (ip->ip_p == IPPROTO_ICMP)
2094 			offset += offsetof(struct icmp, icmp_cksum);
2095 		if ((offset + sizeof(u_int16_t)) > m->m_len)
2096 			m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT);
2097 		else
2098 			*(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
2099 	}
2100 
2101 	if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
2102 		if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
2103 		    ifp->if_bridgeport != NULL) {
2104 			tcpstat.tcps_outswcsum++;
2105 			in_delayed_cksum(m);
2106 			m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
2107 		}
2108 	} else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
2109 		if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
2110 		    ifp->if_bridgeport != NULL) {
2111 			udpstat.udps_outswcsum++;
2112 			in_delayed_cksum(m);
2113 			m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */
2114 		}
2115 	} else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) {
2116 		in_delayed_cksum(m);
2117 		m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */
2118 	}
2119 }
2120