xref: /netbsd-src/sys/netinet6/ip6_output.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: ip6_output.c,v 1.211 2018/06/01 08:56:00 maxv Exp $	*/
2 /*	$KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
62  */
63 
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: ip6_output.c,v 1.211 2018/06/01 08:56:00 maxv Exp $");
66 
67 #ifdef _KERNEL_OPT
68 #include "opt_inet.h"
69 #include "opt_inet6.h"
70 #include "opt_ipsec.h"
71 #endif
72 
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/errno.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/syslog.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 #include <sys/kauth.h>
83 
84 #include <net/if.h>
85 #include <net/route.h>
86 #include <net/pfil.h>
87 
88 #include <netinet/in.h>
89 #include <netinet/in_var.h>
90 #include <netinet/ip6.h>
91 #include <netinet/ip_var.h>
92 #include <netinet/icmp6.h>
93 #include <netinet/in_offload.h>
94 #include <netinet/portalgo.h>
95 #include <netinet6/in6_offload.h>
96 #include <netinet6/ip6_var.h>
97 #include <netinet6/ip6_private.h>
98 #include <netinet6/in6_pcb.h>
99 #include <netinet6/nd6.h>
100 #include <netinet6/ip6protosw.h>
101 #include <netinet6/scope6_var.h>
102 
103 #ifdef IPSEC
104 #include <netipsec/ipsec.h>
105 #include <netipsec/ipsec6.h>
106 #include <netipsec/key.h>
107 #endif
108 
109 extern pfil_head_t *inet6_pfil_hook;	/* XXX */
110 
111 struct ip6_exthdrs {
112 	struct mbuf *ip6e_ip6;
113 	struct mbuf *ip6e_hbh;
114 	struct mbuf *ip6e_dest1;
115 	struct mbuf *ip6e_rthdr;
116 	struct mbuf *ip6e_dest2;
117 };
118 
119 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
120 	kauth_cred_t, int);
121 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
122 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, kauth_cred_t,
123 	int, int, int);
124 static int ip6_setmoptions(const struct sockopt *, struct in6pcb *);
125 static int ip6_getmoptions(struct sockopt *, struct in6pcb *);
126 static int ip6_copyexthdr(struct mbuf **, void *, int);
127 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
128 	struct ip6_frag **);
129 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
130 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
131 static int ip6_getpmtu(struct rtentry *, struct ifnet *, u_long *, int *);
132 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
133 static int ip6_ifaddrvalid(const struct in6_addr *, const struct in6_addr *);
134 static int ip6_handle_rthdr(struct ip6_rthdr *, struct ip6_hdr *);
135 
136 #ifdef RFC2292
137 static int ip6_pcbopts(struct ip6_pktopts **, struct socket *, struct sockopt *);
138 #endif
139 
140 static int
141 ip6_handle_rthdr(struct ip6_rthdr *rh, struct ip6_hdr *ip6)
142 {
143 	int error = 0;
144 
145 	switch (rh->ip6r_type) {
146 	case IPV6_RTHDR_TYPE_0:
147 		/* Dropped, RFC5095. */
148 	default:	/* is it possible? */
149 		error = EINVAL;
150 	}
151 
152 	return error;
153 }
154 
155 /*
156  * Send an IP packet to a host.
157  */
158 int
159 ip6_if_output(struct ifnet * const ifp, struct ifnet * const origifp,
160     struct mbuf * const m, const struct sockaddr_in6 * const dst,
161     const struct rtentry *rt)
162 {
163 	int error = 0;
164 
165 	if (rt != NULL) {
166 		error = rt_check_reject_route(rt, ifp);
167 		if (error != 0) {
168 			m_freem(m);
169 			return error;
170 		}
171 	}
172 
173 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
174 		error = if_output_lock(ifp, origifp, m, sin6tocsa(dst), rt);
175 	else
176 		error = if_output_lock(ifp, ifp, m, sin6tocsa(dst), rt);
177 	return error;
178 }
179 
180 /*
181  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
182  * header (with pri, len, nxt, hlim, src, dst).
183  *
184  * This function may modify ver and hlim only. The mbuf chain containing the
185  * packet will be freed. The mbuf opt, if present, will not be freed.
186  *
187  * Type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
188  * nd_ifinfo.linkmtu is u_int32_t. So we use u_long to hold largest one,
189  * which is rt_rmx.rmx_mtu.
190  */
191 int
192 ip6_output(
193     struct mbuf *m0,
194     struct ip6_pktopts *opt,
195     struct route *ro,
196     int flags,
197     struct ip6_moptions *im6o,
198     struct in6pcb *in6p,
199     struct ifnet **ifpp		/* XXX: just for statistics */
200 )
201 {
202 	struct ip6_hdr *ip6, *mhip6;
203 	struct ifnet *ifp = NULL, *origifp = NULL;
204 	struct mbuf *m = m0;
205 	int tlen, len, off;
206 	bool tso;
207 	struct route ip6route;
208 	struct rtentry *rt = NULL, *rt_pmtu;
209 	const struct sockaddr_in6 *dst;
210 	struct sockaddr_in6 src_sa, dst_sa;
211 	int error = 0;
212 	struct in6_ifaddr *ia = NULL;
213 	u_long mtu;
214 	int alwaysfrag, dontfrag;
215 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
216 	struct ip6_exthdrs exthdrs;
217 	struct in6_addr finaldst, src0, dst0;
218 	u_int32_t zone;
219 	struct route *ro_pmtu = NULL;
220 	int hdrsplit = 0;
221 	int needipsec = 0;
222 #ifdef IPSEC
223 	struct secpolicy *sp = NULL;
224 #endif
225 	struct psref psref, psref_ia;
226 	int bound = curlwp_bind();
227 	bool release_psref_ia = false;
228 
229 #ifdef DIAGNOSTIC
230 	if ((m->m_flags & M_PKTHDR) == 0)
231 		panic("ip6_output: no HDR");
232 	if ((m->m_pkthdr.csum_flags &
233 	    (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TSOv4)) != 0) {
234 		panic("ip6_output: IPv4 checksum offload flags: %d",
235 		    m->m_pkthdr.csum_flags);
236 	}
237 	if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) ==
238 	    (M_CSUM_TCPv6|M_CSUM_UDPv6)) {
239 		panic("ip6_output: conflicting checksum offload flags: %d",
240 		    m->m_pkthdr.csum_flags);
241 	}
242 #endif
243 
244 	M_CSUM_DATA_IPv6_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));
245 
246 #define MAKE_EXTHDR(hp, mp)						\
247     do {								\
248 	if (hp) {							\
249 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
250 		error = ip6_copyexthdr((mp), (void *)(hp), 		\
251 		    ((eh)->ip6e_len + 1) << 3);				\
252 		if (error)						\
253 			goto freehdrs;					\
254 	}								\
255     } while (/*CONSTCOND*/ 0)
256 
257 	memset(&exthdrs, 0, sizeof(exthdrs));
258 	if (opt) {
259 		/* Hop-by-Hop options header */
260 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
261 		/* Destination options header (1st part) */
262 		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
263 		/* Routing header */
264 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
265 		/* Destination options header (2nd part) */
266 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
267 	}
268 
269 	/*
270 	 * Calculate the total length of the extension header chain.
271 	 * Keep the length of the unfragmentable part for fragmentation.
272 	 */
273 	optlen = 0;
274 	if (exthdrs.ip6e_hbh)
275 		optlen += exthdrs.ip6e_hbh->m_len;
276 	if (exthdrs.ip6e_dest1)
277 		optlen += exthdrs.ip6e_dest1->m_len;
278 	if (exthdrs.ip6e_rthdr)
279 		optlen += exthdrs.ip6e_rthdr->m_len;
280 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
281 	/* NOTE: we don't add AH/ESP length here. do that later. */
282 	if (exthdrs.ip6e_dest2)
283 		optlen += exthdrs.ip6e_dest2->m_len;
284 
285 #ifdef IPSEC
286 	if (ipsec_used) {
287 		/* Check the security policy (SP) for the packet */
288 		sp = ipsec6_check_policy(m, in6p, flags, &needipsec, &error);
289 		if (error != 0) {
290 			/*
291 			 * Hack: -EINVAL is used to signal that a packet
292 			 * should be silently discarded.  This is typically
293 			 * because we asked key management for an SA and
294 			 * it was delayed (e.g. kicked up to IKE).
295 			 */
296 			if (error == -EINVAL)
297 				error = 0;
298 			goto freehdrs;
299 		}
300 	}
301 #endif
302 
303 	if (needipsec &&
304 	    (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
305 		in6_delayed_cksum(m);
306 		m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
307 	}
308 
309 	/*
310 	 * If we need IPsec, or there is at least one extension header,
311 	 * separate IP6 header from the payload.
312 	 */
313 	if ((needipsec || optlen) && !hdrsplit) {
314 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
315 			m = NULL;
316 			goto freehdrs;
317 		}
318 		m = exthdrs.ip6e_ip6;
319 		hdrsplit++;
320 	}
321 
322 	/* adjust pointer */
323 	ip6 = mtod(m, struct ip6_hdr *);
324 
325 	/* adjust mbuf packet header length */
326 	m->m_pkthdr.len += optlen;
327 	plen = m->m_pkthdr.len - sizeof(*ip6);
328 
329 	/* If this is a jumbo payload, insert a jumbo payload option. */
330 	if (plen > IPV6_MAXPACKET) {
331 		if (!hdrsplit) {
332 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
333 				m = NULL;
334 				goto freehdrs;
335 			}
336 			m = exthdrs.ip6e_ip6;
337 			hdrsplit++;
338 		}
339 		/* adjust pointer */
340 		ip6 = mtod(m, struct ip6_hdr *);
341 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
342 			goto freehdrs;
343 		optlen += 8; /* XXX JUMBOOPTLEN */
344 		ip6->ip6_plen = 0;
345 	} else
346 		ip6->ip6_plen = htons(plen);
347 
348 	/*
349 	 * Concatenate headers and fill in next header fields.
350 	 * Here we have, on "m"
351 	 *	IPv6 payload
352 	 * and we insert headers accordingly.  Finally, we should be getting:
353 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
354 	 *
355 	 * during the header composing process, "m" points to IPv6 header.
356 	 * "mprev" points to an extension header prior to esp.
357 	 */
358 	{
359 		u_char *nexthdrp = &ip6->ip6_nxt;
360 		struct mbuf *mprev = m;
361 
362 		/*
363 		 * we treat dest2 specially.  this makes IPsec processing
364 		 * much easier.  the goal here is to make mprev point the
365 		 * mbuf prior to dest2.
366 		 *
367 		 * result: IPv6 dest2 payload
368 		 * m and mprev will point to IPv6 header.
369 		 */
370 		if (exthdrs.ip6e_dest2) {
371 			if (!hdrsplit)
372 				panic("assumption failed: hdr not split");
373 			exthdrs.ip6e_dest2->m_next = m->m_next;
374 			m->m_next = exthdrs.ip6e_dest2;
375 			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
376 			ip6->ip6_nxt = IPPROTO_DSTOPTS;
377 		}
378 
379 #define MAKE_CHAIN(m, mp, p, i)\
380     do {\
381 	if (m) {\
382 		if (!hdrsplit) \
383 			panic("assumption failed: hdr not split"); \
384 		*mtod((m), u_char *) = *(p);\
385 		*(p) = (i);\
386 		p = mtod((m), u_char *);\
387 		(m)->m_next = (mp)->m_next;\
388 		(mp)->m_next = (m);\
389 		(mp) = (m);\
390 	}\
391     } while (/*CONSTCOND*/ 0)
392 		/*
393 		 * result: IPv6 hbh dest1 rthdr dest2 payload
394 		 * m will point to IPv6 header.  mprev will point to the
395 		 * extension header prior to dest2 (rthdr in the above case).
396 		 */
397 		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
398 		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
399 		    IPPROTO_DSTOPTS);
400 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
401 		    IPPROTO_ROUTING);
402 
403 		M_CSUM_DATA_IPv6_SET(m->m_pkthdr.csum_data,
404 		    sizeof(struct ip6_hdr) + optlen);
405 	}
406 
407 	/* Need to save for pmtu */
408 	finaldst = ip6->ip6_dst;
409 
410 	/*
411 	 * If there is a routing header, replace destination address field
412 	 * with the first hop of the routing header.
413 	 */
414 	if (exthdrs.ip6e_rthdr) {
415 		struct ip6_rthdr *rh;
416 
417 		rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
418 
419 		error = ip6_handle_rthdr(rh, ip6);
420 		if (error != 0)
421 			goto bad;
422 	}
423 
424 	/* Source address validation */
425 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
426 	    (flags & IPV6_UNSPECSRC) == 0) {
427 		error = EOPNOTSUPP;
428 		IP6_STATINC(IP6_STAT_BADSCOPE);
429 		goto bad;
430 	}
431 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
432 		error = EOPNOTSUPP;
433 		IP6_STATINC(IP6_STAT_BADSCOPE);
434 		goto bad;
435 	}
436 
437 	IP6_STATINC(IP6_STAT_LOCALOUT);
438 
439 	/*
440 	 * Route packet.
441 	 */
442 	/* initialize cached route */
443 	if (ro == NULL) {
444 		memset(&ip6route, 0, sizeof(ip6route));
445 		ro = &ip6route;
446 	}
447 	ro_pmtu = ro;
448 	if (opt && opt->ip6po_rthdr)
449 		ro = &opt->ip6po_route;
450 
451 	/*
452 	 * if specified, try to fill in the traffic class field.
453 	 * do not override if a non-zero value is already set.
454 	 * we check the diffserv field and the ecn field separately.
455 	 */
456 	if (opt && opt->ip6po_tclass >= 0) {
457 		int mask = 0;
458 
459 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
460 			mask |= 0xfc;
461 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
462 			mask |= 0x03;
463 		if (mask != 0)
464 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
465 	}
466 
467 	/* fill in or override the hop limit field, if necessary. */
468 	if (opt && opt->ip6po_hlim != -1)
469 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
470 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
471 		if (im6o != NULL)
472 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
473 		else
474 			ip6->ip6_hlim = ip6_defmcasthlim;
475 	}
476 
477 #ifdef IPSEC
478 	if (needipsec) {
479 		int s = splsoftnet();
480 		error = ipsec6_process_packet(m, sp->req);
481 		splx(s);
482 
483 		/*
484 		 * Preserve KAME behaviour: ENOENT can be returned
485 		 * when an SA acquire is in progress.  Don't propagate
486 		 * this to user-level; it confuses applications.
487 		 * XXX this will go away when the SADB is redone.
488 		 */
489 		if (error == ENOENT)
490 			error = 0;
491 
492 		goto done;
493 	}
494 #endif
495 
496 	/* adjust pointer */
497 	ip6 = mtod(m, struct ip6_hdr *);
498 
499 	sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
500 
501 	/* We do not need a route for multicast */
502 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
503 		struct in6_pktinfo *pi = NULL;
504 
505 		/*
506 		 * If the outgoing interface for the address is specified by
507 		 * the caller, use it.
508 		 */
509 		if (opt && (pi = opt->ip6po_pktinfo) != NULL) {
510 			/* XXX boundary check is assumed to be already done. */
511 			ifp = if_get_byindex(pi->ipi6_ifindex, &psref);
512 		} else if (im6o != NULL) {
513 			ifp = if_get_byindex(im6o->im6o_multicast_if_index,
514 			    &psref);
515 		}
516 	}
517 
518 	if (ifp == NULL) {
519 		error = in6_selectroute(&dst_sa, opt, &ro, &rt, true);
520 		if (error != 0)
521 			goto bad;
522 		ifp = if_get_byindex(rt->rt_ifp->if_index, &psref);
523 	}
524 
525 	if (rt == NULL) {
526 		/*
527 		 * If in6_selectroute() does not return a route entry,
528 		 * dst may not have been updated.
529 		 */
530 		error = rtcache_setdst(ro, sin6tosa(&dst_sa));
531 		if (error) {
532 			goto bad;
533 		}
534 	}
535 
536 	/*
537 	 * then rt (for unicast) and ifp must be non-NULL valid values.
538 	 */
539 	if ((flags & IPV6_FORWARDING) == 0) {
540 		/* XXX: the FORWARDING flag can be set for mrouting. */
541 		in6_ifstat_inc(ifp, ifs6_out_request);
542 	}
543 	if (rt != NULL) {
544 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
545 		rt->rt_use++;
546 	}
547 
548 	/*
549 	 * The outgoing interface must be in the zone of source and
550 	 * destination addresses.  We should use ia_ifp to support the
551 	 * case of sending packets to an address of our own.
552 	 */
553 	if (ia != NULL && ia->ia_ifp) {
554 		origifp = ia->ia_ifp;
555 		if (if_is_deactivated(origifp))
556 			goto bad;
557 		if_acquire(origifp, &psref_ia);
558 		release_psref_ia = true;
559 	} else
560 		origifp = ifp;
561 
562 	src0 = ip6->ip6_src;
563 	if (in6_setscope(&src0, origifp, &zone))
564 		goto badscope;
565 	sockaddr_in6_init(&src_sa, &ip6->ip6_src, 0, 0, 0);
566 	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
567 		goto badscope;
568 
569 	dst0 = ip6->ip6_dst;
570 	if (in6_setscope(&dst0, origifp, &zone))
571 		goto badscope;
572 	/* re-initialize to be sure */
573 	sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
574 	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id)
575 		goto badscope;
576 
577 	/* scope check is done. */
578 
579 	/* Ensure we only send from a valid address. */
580 	if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
581 	    (error = ip6_ifaddrvalid(&src0, &dst0)) != 0)
582 	{
583 		char ip6buf[INET6_ADDRSTRLEN];
584 		nd6log(LOG_ERR,
585 		    "refusing to send from invalid address %s (pid %d)\n",
586 		    IN6_PRINT(ip6buf, &src0), curproc->p_pid);
587 		IP6_STATINC(IP6_STAT_ODROPPED);
588 		in6_ifstat_inc(origifp, ifs6_out_discard);
589 		if (error == 1)
590 			/*
591 			 * Address exists, but is tentative or detached.
592 			 * We can't send from it because it's invalid,
593 			 * so we drop the packet.
594 			 */
595 			error = 0;
596 		else
597 			error = EADDRNOTAVAIL;
598 		goto bad;
599 	}
600 
601 	if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) &&
602 	    !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
603 		dst = satocsin6(rt->rt_gateway);
604 	else
605 		dst = satocsin6(rtcache_getdst(ro));
606 
607 	/*
608 	 * XXXXXX: original code follows:
609 	 */
610 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
611 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
612 	else {
613 		bool ingroup;
614 
615 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
616 
617 		in6_ifstat_inc(ifp, ifs6_out_mcast);
618 
619 		/*
620 		 * Confirm that the outgoing interface supports multicast.
621 		 */
622 		if (!(ifp->if_flags & IFF_MULTICAST)) {
623 			IP6_STATINC(IP6_STAT_NOROUTE);
624 			in6_ifstat_inc(ifp, ifs6_out_discard);
625 			error = ENETUNREACH;
626 			goto bad;
627 		}
628 
629 		ingroup = in6_multi_group(&ip6->ip6_dst, ifp);
630 		if (ingroup && (im6o == NULL || im6o->im6o_multicast_loop)) {
631 			/*
632 			 * If we belong to the destination multicast group
633 			 * on the outgoing interface, and the caller did not
634 			 * forbid loopback, loop back a copy.
635 			 */
636 			KASSERT(dst != NULL);
637 			ip6_mloopback(ifp, m, dst);
638 		} else {
639 			/*
640 			 * If we are acting as a multicast router, perform
641 			 * multicast forwarding as if the packet had just
642 			 * arrived on the interface to which we are about
643 			 * to send.  The multicast forwarding function
644 			 * recursively calls this function, using the
645 			 * IPV6_FORWARDING flag to prevent infinite recursion.
646 			 *
647 			 * Multicasts that are looped back by ip6_mloopback(),
648 			 * above, will be forwarded by the ip6_input() routine,
649 			 * if necessary.
650 			 */
651 			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
652 				if (ip6_mforward(ip6, ifp, m) != 0) {
653 					m_freem(m);
654 					goto done;
655 				}
656 			}
657 		}
658 		/*
659 		 * Multicasts with a hoplimit of zero may be looped back,
660 		 * above, but must not be transmitted on a network.
661 		 * Also, multicasts addressed to the loopback interface
662 		 * are not sent -- the above call to ip6_mloopback() will
663 		 * loop back a copy if this host actually belongs to the
664 		 * destination group on the loopback interface.
665 		 */
666 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
667 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
668 			m_freem(m);
669 			goto done;
670 		}
671 	}
672 
673 	/*
674 	 * Fill the outgoing inteface to tell the upper layer
675 	 * to increment per-interface statistics.
676 	 */
677 	if (ifpp)
678 		*ifpp = ifp;
679 
680 	/* Determine path MTU. */
681 	/*
682 	 * ro_pmtu represent final destination while
683 	 * ro might represent immediate destination.
684 	 * Use ro_pmtu destination since MTU might differ.
685 	 */
686 	if (ro_pmtu != ro) {
687 		union {
688 			struct sockaddr		dst;
689 			struct sockaddr_in6	dst6;
690 		} u;
691 
692 		/* ro_pmtu may not have a cache */
693 		sockaddr_in6_init(&u.dst6, &finaldst, 0, 0, 0);
694 		rt_pmtu = rtcache_lookup(ro_pmtu, &u.dst);
695 	} else
696 		rt_pmtu = rt;
697 	error = ip6_getpmtu(rt_pmtu, ifp, &mtu, &alwaysfrag);
698 	if (rt_pmtu != NULL && rt_pmtu != rt)
699 		rtcache_unref(rt_pmtu, ro_pmtu);
700 	if (error != 0)
701 		goto bad;
702 
703 	/*
704 	 * The caller of this function may specify to use the minimum MTU
705 	 * in some cases.
706 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
707 	 * setting.  The logic is a bit complicated; by default, unicast
708 	 * packets will follow path MTU while multicast packets will be sent at
709 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
710 	 * including unicast ones will be sent at the minimum MTU.  Multicast
711 	 * packets will always be sent at the minimum MTU unless
712 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
713 	 * See RFC 3542 for more details.
714 	 */
715 	if (mtu > IPV6_MMTU) {
716 		if ((flags & IPV6_MINMTU))
717 			mtu = IPV6_MMTU;
718 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
719 			mtu = IPV6_MMTU;
720 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
721 			 (opt == NULL ||
722 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
723 			mtu = IPV6_MMTU;
724 		}
725 	}
726 
727 	/*
728 	 * clear embedded scope identifiers if necessary.
729 	 * in6_clearscope will touch the addresses only when necessary.
730 	 */
731 	in6_clearscope(&ip6->ip6_src);
732 	in6_clearscope(&ip6->ip6_dst);
733 
734 	/*
735 	 * If the outgoing packet contains a hop-by-hop options header,
736 	 * it must be examined and processed even by the source node.
737 	 * (RFC 2460, section 4.)
738 	 *
739 	 * XXX Is this really necessary?
740 	 */
741 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
742 		u_int32_t dummy1; /* XXX unused */
743 		u_int32_t dummy2; /* XXX unused */
744 		int hoff = sizeof(struct ip6_hdr);
745 
746 		if (ip6_hopopts_input(&dummy1, &dummy2, &m, &hoff)) {
747 			/* m was already freed at this point */
748 			error = EINVAL;
749 			goto done;
750 		}
751 
752 		ip6 = mtod(m, struct ip6_hdr *);
753 	}
754 
755 	/*
756 	 * Run through list of hooks for output packets.
757 	 */
758 	if ((error = pfil_run_hooks(inet6_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
759 		goto done;
760 	if (m == NULL)
761 		goto done;
762 	ip6 = mtod(m, struct ip6_hdr *);
763 
764 	/*
765 	 * Send the packet to the outgoing interface.
766 	 * If necessary, do IPv6 fragmentation before sending.
767 	 *
768 	 * the logic here is rather complex:
769 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
770 	 * 1-a:	send as is if tlen <= path mtu
771 	 * 1-b:	fragment if tlen > path mtu
772 	 *
773 	 * 2: if user asks us not to fragment (dontfrag == 1)
774 	 * 2-a:	send as is if tlen <= interface mtu
775 	 * 2-b:	error if tlen > interface mtu
776 	 *
777 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
778 	 *	always fragment
779 	 *
780 	 * 4: if dontfrag == 1 && alwaysfrag == 1
781 	 *	error, as we cannot handle this conflicting request
782 	 */
783 	tlen = m->m_pkthdr.len;
784 	tso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0;
785 	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
786 		dontfrag = 1;
787 	else
788 		dontfrag = 0;
789 
790 	if (dontfrag && alwaysfrag) {	/* case 4 */
791 		/* conflicting request - can't transmit */
792 		error = EMSGSIZE;
793 		goto bad;
794 	}
795 	if (dontfrag && (!tso && tlen > IN6_LINKMTU(ifp))) {	/* case 2-b */
796 		/*
797 		 * Even if the DONTFRAG option is specified, we cannot send the
798 		 * packet when the data length is larger than the MTU of the
799 		 * outgoing interface.
800 		 * Notify the error by sending IPV6_PATHMTU ancillary data as
801 		 * well as returning an error code (the latter is not described
802 		 * in the API spec.)
803 		 */
804 		u_int32_t mtu32;
805 		struct ip6ctlparam ip6cp;
806 
807 		mtu32 = (u_int32_t)mtu;
808 		memset(&ip6cp, 0, sizeof(ip6cp));
809 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
810 		pfctlinput2(PRC_MSGSIZE,
811 		    rtcache_getdst(ro_pmtu), &ip6cp);
812 
813 		error = EMSGSIZE;
814 		goto bad;
815 	}
816 
817 	/*
818 	 * transmit packet without fragmentation
819 	 */
820 	if (dontfrag || (!alwaysfrag && (tlen <= mtu || tso))) {
821 		/* case 1-a and 2-a */
822 		struct in6_ifaddr *ia6;
823 		int sw_csum;
824 		int s;
825 
826 		ip6 = mtod(m, struct ip6_hdr *);
827 		s = pserialize_read_enter();
828 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
829 		if (ia6) {
830 			/* Record statistics for this interface address. */
831 			ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len;
832 		}
833 		pserialize_read_exit(s);
834 
835 		sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
836 		if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
837 			if (IN6_NEED_CHECKSUM(ifp,
838 			    sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
839 				in6_delayed_cksum(m);
840 			}
841 			m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
842 		}
843 
844 		KASSERT(dst != NULL);
845 		if (__predict_true(!tso ||
846 		    (ifp->if_capenable & IFCAP_TSOv6) != 0)) {
847 			error = ip6_if_output(ifp, origifp, m, dst, rt);
848 		} else {
849 			error = ip6_tso_output(ifp, origifp, m, dst, rt);
850 		}
851 		goto done;
852 	}
853 
854 	if (tso) {
855 		error = EINVAL; /* XXX */
856 		goto bad;
857 	}
858 
859 	/*
860 	 * try to fragment the packet.  case 1-b and 3
861 	 */
862 	if (mtu < IPV6_MMTU) {
863 		/* path MTU cannot be less than IPV6_MMTU */
864 		error = EMSGSIZE;
865 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
866 		goto bad;
867 	} else if (ip6->ip6_plen == 0) {
868 		/* jumbo payload cannot be fragmented */
869 		error = EMSGSIZE;
870 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
871 		goto bad;
872 	} else {
873 		const u_int32_t id = htonl(ip6_randomid());
874 		struct mbuf **mnext, *m_frgpart;
875 		const int hlen = unfragpartlen;
876 		struct ip6_frag *ip6f;
877 		u_char nextproto;
878 
879 		if (mtu > IPV6_MAXPACKET)
880 			mtu = IPV6_MAXPACKET;
881 
882 		/*
883 		 * Must be able to put at least 8 bytes per fragment.
884 		 */
885 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
886 		if (len < 8) {
887 			error = EMSGSIZE;
888 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
889 			goto bad;
890 		}
891 
892 		mnext = &m->m_nextpkt;
893 
894 		/*
895 		 * Change the next header field of the last header in the
896 		 * unfragmentable part.
897 		 */
898 		if (exthdrs.ip6e_rthdr) {
899 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
900 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
901 		} else if (exthdrs.ip6e_dest1) {
902 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
903 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
904 		} else if (exthdrs.ip6e_hbh) {
905 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
906 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
907 		} else {
908 			nextproto = ip6->ip6_nxt;
909 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
910 		}
911 
912 		if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
913 		    != 0) {
914 			if (IN6_NEED_CHECKSUM(ifp,
915 			    m->m_pkthdr.csum_flags &
916 			    (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
917 				in6_delayed_cksum(m);
918 			}
919 			m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
920 		}
921 
922 		/*
923 		 * Loop through length of segment after first fragment,
924 		 * make new header and copy data of each part and link onto
925 		 * chain.
926 		 */
927 		m0 = m;
928 		for (off = hlen; off < tlen; off += len) {
929 			struct mbuf *mlast;
930 
931 			MGETHDR(m, M_DONTWAIT, MT_HEADER);
932 			if (!m) {
933 				error = ENOBUFS;
934 				IP6_STATINC(IP6_STAT_ODROPPED);
935 				goto sendorfree;
936 			}
937 			m_reset_rcvif(m);
938 			m->m_flags = m0->m_flags & M_COPYFLAGS;
939 			*mnext = m;
940 			mnext = &m->m_nextpkt;
941 			m->m_data += max_linkhdr;
942 			mhip6 = mtod(m, struct ip6_hdr *);
943 			*mhip6 = *ip6;
944 			m->m_len = sizeof(*mhip6);
945 
946 			ip6f = NULL;
947 			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
948 			if (error) {
949 				IP6_STATINC(IP6_STAT_ODROPPED);
950 				goto sendorfree;
951 			}
952 
953 			/* Fill in the Frag6 Header */
954 			ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
955 			if (off + len >= tlen)
956 				len = tlen - off;
957 			else
958 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
959 			ip6f->ip6f_reserved = 0;
960 			ip6f->ip6f_ident = id;
961 			ip6f->ip6f_nxt = nextproto;
962 
963 			mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
964 			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
965 			if ((m_frgpart = m_copym(m0, off, len, M_DONTWAIT)) == NULL) {
966 				error = ENOBUFS;
967 				IP6_STATINC(IP6_STAT_ODROPPED);
968 				goto sendorfree;
969 			}
970 			for (mlast = m; mlast->m_next; mlast = mlast->m_next)
971 				;
972 			mlast->m_next = m_frgpart;
973 
974 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
975 			m_reset_rcvif(m);
976 			IP6_STATINC(IP6_STAT_OFRAGMENTS);
977 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
978 		}
979 
980 		in6_ifstat_inc(ifp, ifs6_out_fragok);
981 	}
982 
983 sendorfree:
984 	m = m0->m_nextpkt;
985 	m0->m_nextpkt = 0;
986 	m_freem(m0);
987 	for (m0 = m; m; m = m0) {
988 		m0 = m->m_nextpkt;
989 		m->m_nextpkt = 0;
990 		if (error == 0) {
991 			struct in6_ifaddr *ia6;
992 			int s;
993 			ip6 = mtod(m, struct ip6_hdr *);
994 			s = pserialize_read_enter();
995 			ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
996 			if (ia6) {
997 				/*
998 				 * Record statistics for this interface
999 				 * address.
1000 				 */
1001 				ia6->ia_ifa.ifa_data.ifad_outbytes +=
1002 				    m->m_pkthdr.len;
1003 			}
1004 			pserialize_read_exit(s);
1005 			KASSERT(dst != NULL);
1006 			error = ip6_if_output(ifp, origifp, m, dst, rt);
1007 		} else
1008 			m_freem(m);
1009 	}
1010 
1011 	if (error == 0)
1012 		IP6_STATINC(IP6_STAT_FRAGMENTED);
1013 
1014 done:
1015 	rtcache_unref(rt, ro);
1016 	if (ro == &ip6route)
1017 		rtcache_free(&ip6route);
1018 #ifdef IPSEC
1019 	if (sp != NULL)
1020 		KEY_SP_UNREF(&sp);
1021 #endif
1022 	if_put(ifp, &psref);
1023 	if (release_psref_ia)
1024 		if_put(origifp, &psref_ia);
1025 	curlwp_bindx(bound);
1026 
1027 	return error;
1028 
1029 freehdrs:
1030 	m_freem(exthdrs.ip6e_hbh);
1031 	m_freem(exthdrs.ip6e_dest1);
1032 	m_freem(exthdrs.ip6e_rthdr);
1033 	m_freem(exthdrs.ip6e_dest2);
1034 	/* FALLTHROUGH */
1035 bad:
1036 	m_freem(m);
1037 	goto done;
1038 
1039 badscope:
1040 	IP6_STATINC(IP6_STAT_BADSCOPE);
1041 	in6_ifstat_inc(origifp, ifs6_out_discard);
1042 	if (error == 0)
1043 		error = EHOSTUNREACH; /* XXX */
1044 	goto bad;
1045 }
1046 
1047 static int
1048 ip6_copyexthdr(struct mbuf **mp, void *hdr, int hlen)
1049 {
1050 	struct mbuf *m;
1051 
1052 	if (hlen > MCLBYTES)
1053 		return ENOBUFS; /* XXX */
1054 
1055 	MGET(m, M_DONTWAIT, MT_DATA);
1056 	if (!m)
1057 		return ENOBUFS;
1058 
1059 	if (hlen > MLEN) {
1060 		MCLGET(m, M_DONTWAIT);
1061 		if ((m->m_flags & M_EXT) == 0) {
1062 			m_free(m);
1063 			return ENOBUFS;
1064 		}
1065 	}
1066 	m->m_len = hlen;
1067 	if (hdr)
1068 		memcpy(mtod(m, void *), hdr, hlen);
1069 
1070 	*mp = m;
1071 	return 0;
1072 }
1073 
1074 /*
1075  * Process a delayed payload checksum calculation.
1076  */
1077 void
1078 in6_delayed_cksum(struct mbuf *m)
1079 {
1080 	uint16_t csum, offset;
1081 
1082 	KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1083 	KASSERT((~m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1084 	KASSERT((m->m_pkthdr.csum_flags
1085 	    & (M_CSUM_UDPv4|M_CSUM_TCPv4|M_CSUM_TSOv4)) == 0);
1086 
1087 	offset = M_CSUM_DATA_IPv6_IPHL(m->m_pkthdr.csum_data);
1088 	csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1089 	if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) != 0) {
1090 		csum = 0xffff;
1091 	}
1092 
1093 	offset += M_CSUM_DATA_IPv6_OFFSET(m->m_pkthdr.csum_data);
1094 	if ((offset + sizeof(csum)) > m->m_len) {
1095 		m_copyback(m, offset, sizeof(csum), &csum);
1096 	} else {
1097 		*(uint16_t *)(mtod(m, char *) + offset) = csum;
1098 	}
1099 }
1100 
1101 /*
1102  * Insert jumbo payload option.
1103  */
1104 static int
1105 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1106 {
1107 	struct mbuf *mopt;
1108 	u_int8_t *optbuf;
1109 	u_int32_t v;
1110 
1111 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1112 
1113 	/*
1114 	 * If there is no hop-by-hop options header, allocate new one.
1115 	 * If there is one but it doesn't have enough space to store the
1116 	 * jumbo payload option, allocate a cluster to store the whole options.
1117 	 * Otherwise, use it to store the options.
1118 	 */
1119 	if (exthdrs->ip6e_hbh == NULL) {
1120 		MGET(mopt, M_DONTWAIT, MT_DATA);
1121 		if (mopt == 0)
1122 			return (ENOBUFS);
1123 		mopt->m_len = JUMBOOPTLEN;
1124 		optbuf = mtod(mopt, u_int8_t *);
1125 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1126 		exthdrs->ip6e_hbh = mopt;
1127 	} else {
1128 		struct ip6_hbh *hbh;
1129 
1130 		mopt = exthdrs->ip6e_hbh;
1131 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1132 			const int oldoptlen = mopt->m_len;
1133 			struct mbuf *n;
1134 
1135 			/*
1136 			 * Assumptions:
1137 			 * - exthdrs->ip6e_hbh is not referenced from places
1138 			 *   other than exthdrs.
1139 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1140 			 */
1141 			KASSERT(mopt->m_next == NULL);
1142 
1143 			/*
1144 			 * Give up if the whole (new) hbh header does not fit
1145 			 * even in an mbuf cluster.
1146 			 */
1147 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1148 				return ENOBUFS;
1149 
1150 			/*
1151 			 * At this point, we must always prepare a cluster.
1152 			 */
1153 			MGET(n, M_DONTWAIT, MT_DATA);
1154 			if (n) {
1155 				MCLGET(n, M_DONTWAIT);
1156 				if ((n->m_flags & M_EXT) == 0) {
1157 					m_freem(n);
1158 					n = NULL;
1159 				}
1160 			}
1161 			if (!n)
1162 				return ENOBUFS;
1163 
1164 			n->m_len = oldoptlen + JUMBOOPTLEN;
1165 			bcopy(mtod(mopt, void *), mtod(n, void *),
1166 			    oldoptlen);
1167 			optbuf = mtod(n, u_int8_t *) + oldoptlen;
1168 			m_freem(mopt);
1169 			mopt = exthdrs->ip6e_hbh = n;
1170 		} else {
1171 			optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
1172 			mopt->m_len += JUMBOOPTLEN;
1173 		}
1174 		optbuf[0] = IP6OPT_PADN;
1175 		optbuf[1] = 0;
1176 
1177 		/*
1178 		 * Adjust the header length according to the pad and
1179 		 * the jumbo payload option.
1180 		 */
1181 		hbh = mtod(mopt, struct ip6_hbh *);
1182 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1183 	}
1184 
1185 	/* fill in the option. */
1186 	optbuf[2] = IP6OPT_JUMBO;
1187 	optbuf[3] = 4;
1188 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1189 	memcpy(&optbuf[4], &v, sizeof(u_int32_t));
1190 
1191 	/* finally, adjust the packet header length */
1192 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1193 
1194 	return 0;
1195 #undef JUMBOOPTLEN
1196 }
1197 
1198 /*
1199  * Insert fragment header and copy unfragmentable header portions.
1200  *
1201  * *frghdrp will not be read, and it is guaranteed that either an
1202  * error is returned or that *frghdrp will point to space allocated
1203  * for the fragment header.
1204  *
1205  * On entry, m contains:
1206  *     IPv6 Header
1207  * On exit, it contains:
1208  *     IPv6 Header -> Unfragmentable Part -> Frag6 Header
1209  */
1210 static int
1211 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1212 	struct ip6_frag **frghdrp)
1213 {
1214 	struct mbuf *n, *mlast;
1215 
1216 	if (hlen > sizeof(struct ip6_hdr)) {
1217 		n = m_copym(m0, sizeof(struct ip6_hdr),
1218 		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1219 		if (n == NULL)
1220 			return ENOBUFS;
1221 		m->m_next = n;
1222 	} else
1223 		n = m;
1224 
1225 	/* Search for the last mbuf of unfragmentable part. */
1226 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1227 		;
1228 
1229 	if ((mlast->m_flags & M_EXT) == 0 &&
1230 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1231 		/* use the trailing space of the last mbuf for the fragment hdr */
1232 		*frghdrp = (struct ip6_frag *)(mtod(mlast, char *) +
1233 		    mlast->m_len);
1234 		mlast->m_len += sizeof(struct ip6_frag);
1235 	} else {
1236 		/* allocate a new mbuf for the fragment header */
1237 		struct mbuf *mfrg;
1238 
1239 		MGET(mfrg, M_DONTWAIT, MT_DATA);
1240 		if (mfrg == NULL)
1241 			return ENOBUFS;
1242 		mfrg->m_len = sizeof(struct ip6_frag);
1243 		*frghdrp = mtod(mfrg, struct ip6_frag *);
1244 		mlast->m_next = mfrg;
1245 	}
1246 
1247 	return 0;
1248 }
1249 
1250 static int
1251 ip6_getpmtu(struct rtentry *rt, struct ifnet *ifp, u_long *mtup,
1252     int *alwaysfragp)
1253 {
1254 	u_int32_t mtu = 0;
1255 	int alwaysfrag = 0;
1256 	int error = 0;
1257 
1258 	if (rt != NULL) {
1259 		u_int32_t ifmtu;
1260 
1261 		if (ifp == NULL)
1262 			ifp = rt->rt_ifp;
1263 		ifmtu = IN6_LINKMTU(ifp);
1264 		mtu = rt->rt_rmx.rmx_mtu;
1265 		if (mtu == 0)
1266 			mtu = ifmtu;
1267 		else if (mtu < IPV6_MMTU) {
1268 			/*
1269 			 * RFC2460 section 5, last paragraph:
1270 			 * if we record ICMPv6 too big message with
1271 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1272 			 * or smaller, with fragment header attached.
1273 			 * (fragment header is needed regardless from the
1274 			 * packet size, for translators to identify packets)
1275 			 */
1276 			alwaysfrag = 1;
1277 			mtu = IPV6_MMTU;
1278 		} else if (mtu > ifmtu) {
1279 			/*
1280 			 * The MTU on the route is larger than the MTU on
1281 			 * the interface!  This shouldn't happen, unless the
1282 			 * MTU of the interface has been changed after the
1283 			 * interface was brought up.  Change the MTU in the
1284 			 * route to match the interface MTU (as long as the
1285 			 * field isn't locked).
1286 			 */
1287 			mtu = ifmtu;
1288 			if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1289 				rt->rt_rmx.rmx_mtu = mtu;
1290 		}
1291 	} else if (ifp) {
1292 		mtu = IN6_LINKMTU(ifp);
1293 	} else
1294 		error = EHOSTUNREACH; /* XXX */
1295 
1296 	*mtup = mtu;
1297 	if (alwaysfragp)
1298 		*alwaysfragp = alwaysfrag;
1299 	return (error);
1300 }
1301 
1302 /*
1303  * IP6 socket option processing.
1304  */
1305 int
1306 ip6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1307 {
1308 	int optdatalen, uproto;
1309 	void *optdata;
1310 	struct in6pcb *in6p = sotoin6pcb(so);
1311 	struct ip_moptions **mopts;
1312 	int error, optval;
1313 	int level, optname;
1314 
1315 	KASSERT(solocked(so));
1316 	KASSERT(sopt != NULL);
1317 
1318 	level = sopt->sopt_level;
1319 	optname = sopt->sopt_name;
1320 
1321 	error = optval = 0;
1322 	uproto = (int)so->so_proto->pr_protocol;
1323 
1324 	switch (level) {
1325 	case IPPROTO_IP:
1326 		switch (optname) {
1327 		case IP_ADD_MEMBERSHIP:
1328 		case IP_DROP_MEMBERSHIP:
1329 		case IP_MULTICAST_IF:
1330 		case IP_MULTICAST_LOOP:
1331 		case IP_MULTICAST_TTL:
1332 			mopts = &in6p->in6p_v4moptions;
1333 			switch (op) {
1334 			case PRCO_GETOPT:
1335 				return ip_getmoptions(*mopts, sopt);
1336 			case PRCO_SETOPT:
1337 				return ip_setmoptions(mopts, sopt);
1338 			default:
1339 				return EINVAL;
1340 			}
1341 		default:
1342 			return ENOPROTOOPT;
1343 		}
1344 	case IPPROTO_IPV6:
1345 		break;
1346 	default:
1347 		return ENOPROTOOPT;
1348 	}
1349 	switch (op) {
1350 	case PRCO_SETOPT:
1351 		switch (optname) {
1352 #ifdef RFC2292
1353 		case IPV6_2292PKTOPTIONS:
1354 			error = ip6_pcbopts(&in6p->in6p_outputopts, so, sopt);
1355 			break;
1356 #endif
1357 
1358 		/*
1359 		 * Use of some Hop-by-Hop options or some
1360 		 * Destination options, might require special
1361 		 * privilege.  That is, normal applications
1362 		 * (without special privilege) might be forbidden
1363 		 * from setting certain options in outgoing packets,
1364 		 * and might never see certain options in received
1365 		 * packets. [RFC 2292 Section 6]
1366 		 * KAME specific note:
1367 		 *  KAME prevents non-privileged users from sending or
1368 		 *  receiving ANY hbh/dst options in order to avoid
1369 		 *  overhead of parsing options in the kernel.
1370 		 */
1371 		case IPV6_RECVHOPOPTS:
1372 		case IPV6_RECVDSTOPTS:
1373 		case IPV6_RECVRTHDRDSTOPTS:
1374 			error = kauth_authorize_network(kauth_cred_get(),
1375 			    KAUTH_NETWORK_IPV6, KAUTH_REQ_NETWORK_IPV6_HOPBYHOP,
1376 			    NULL, NULL, NULL);
1377 			if (error)
1378 				break;
1379 			/* FALLTHROUGH */
1380 		case IPV6_UNICAST_HOPS:
1381 		case IPV6_HOPLIMIT:
1382 		case IPV6_FAITH:
1383 
1384 		case IPV6_RECVPKTINFO:
1385 		case IPV6_RECVHOPLIMIT:
1386 		case IPV6_RECVRTHDR:
1387 		case IPV6_RECVPATHMTU:
1388 		case IPV6_RECVTCLASS:
1389 		case IPV6_V6ONLY:
1390 			error = sockopt_getint(sopt, &optval);
1391 			if (error)
1392 				break;
1393 			switch (optname) {
1394 			case IPV6_UNICAST_HOPS:
1395 				if (optval < -1 || optval >= 256)
1396 					error = EINVAL;
1397 				else {
1398 					/* -1 = kernel default */
1399 					in6p->in6p_hops = optval;
1400 				}
1401 				break;
1402 #define OPTSET(bit) \
1403 do { \
1404 if (optval) \
1405 	in6p->in6p_flags |= (bit); \
1406 else \
1407 	in6p->in6p_flags &= ~(bit); \
1408 } while (/*CONSTCOND*/ 0)
1409 
1410 #ifdef RFC2292
1411 #define OPTSET2292(bit) 			\
1412 do { 						\
1413 in6p->in6p_flags |= IN6P_RFC2292; 	\
1414 if (optval) 				\
1415 	in6p->in6p_flags |= (bit); 	\
1416 else 					\
1417 	in6p->in6p_flags &= ~(bit); 	\
1418 } while (/*CONSTCOND*/ 0)
1419 #endif
1420 
1421 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1422 
1423 			case IPV6_RECVPKTINFO:
1424 #ifdef RFC2292
1425 				/* cannot mix with RFC2292 */
1426 				if (OPTBIT(IN6P_RFC2292)) {
1427 					error = EINVAL;
1428 					break;
1429 				}
1430 #endif
1431 				OPTSET(IN6P_PKTINFO);
1432 				break;
1433 
1434 			case IPV6_HOPLIMIT:
1435 			{
1436 				struct ip6_pktopts **optp;
1437 
1438 #ifdef RFC2292
1439 				/* cannot mix with RFC2292 */
1440 				if (OPTBIT(IN6P_RFC2292)) {
1441 					error = EINVAL;
1442 					break;
1443 				}
1444 #endif
1445 				optp = &in6p->in6p_outputopts;
1446 				error = ip6_pcbopt(IPV6_HOPLIMIT,
1447 						   (u_char *)&optval,
1448 						   sizeof(optval),
1449 						   optp,
1450 						   kauth_cred_get(), uproto);
1451 				break;
1452 			}
1453 
1454 			case IPV6_RECVHOPLIMIT:
1455 #ifdef RFC2292
1456 				/* cannot mix with RFC2292 */
1457 				if (OPTBIT(IN6P_RFC2292)) {
1458 					error = EINVAL;
1459 					break;
1460 				}
1461 #endif
1462 				OPTSET(IN6P_HOPLIMIT);
1463 				break;
1464 
1465 			case IPV6_RECVHOPOPTS:
1466 #ifdef RFC2292
1467 				/* cannot mix with RFC2292 */
1468 				if (OPTBIT(IN6P_RFC2292)) {
1469 					error = EINVAL;
1470 					break;
1471 				}
1472 #endif
1473 				OPTSET(IN6P_HOPOPTS);
1474 				break;
1475 
1476 			case IPV6_RECVDSTOPTS:
1477 #ifdef RFC2292
1478 				/* cannot mix with RFC2292 */
1479 				if (OPTBIT(IN6P_RFC2292)) {
1480 					error = EINVAL;
1481 					break;
1482 				}
1483 #endif
1484 				OPTSET(IN6P_DSTOPTS);
1485 				break;
1486 
1487 			case IPV6_RECVRTHDRDSTOPTS:
1488 #ifdef RFC2292
1489 				/* cannot mix with RFC2292 */
1490 				if (OPTBIT(IN6P_RFC2292)) {
1491 					error = EINVAL;
1492 					break;
1493 				}
1494 #endif
1495 				OPTSET(IN6P_RTHDRDSTOPTS);
1496 				break;
1497 
1498 			case IPV6_RECVRTHDR:
1499 #ifdef RFC2292
1500 				/* cannot mix with RFC2292 */
1501 				if (OPTBIT(IN6P_RFC2292)) {
1502 					error = EINVAL;
1503 					break;
1504 				}
1505 #endif
1506 				OPTSET(IN6P_RTHDR);
1507 				break;
1508 
1509 			case IPV6_FAITH:
1510 				OPTSET(IN6P_FAITH);
1511 				break;
1512 
1513 			case IPV6_RECVPATHMTU:
1514 				/*
1515 				 * We ignore this option for TCP
1516 				 * sockets.
1517 				 * (RFC3542 leaves this case
1518 				 * unspecified.)
1519 				 */
1520 				if (uproto != IPPROTO_TCP)
1521 					OPTSET(IN6P_MTU);
1522 				break;
1523 
1524 			case IPV6_V6ONLY:
1525 				/*
1526 				 * make setsockopt(IPV6_V6ONLY)
1527 				 * available only prior to bind(2).
1528 				 * see ipng mailing list, Jun 22 2001.
1529 				 */
1530 				if (in6p->in6p_lport ||
1531 				    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1532 					error = EINVAL;
1533 					break;
1534 				}
1535 #ifdef INET6_BINDV6ONLY
1536 				if (!optval)
1537 					error = EINVAL;
1538 #else
1539 				OPTSET(IN6P_IPV6_V6ONLY);
1540 #endif
1541 				break;
1542 			case IPV6_RECVTCLASS:
1543 #ifdef RFC2292
1544 				/* cannot mix with RFC2292 XXX */
1545 				if (OPTBIT(IN6P_RFC2292)) {
1546 					error = EINVAL;
1547 					break;
1548 				}
1549 #endif
1550 				OPTSET(IN6P_TCLASS);
1551 				break;
1552 
1553 			}
1554 			break;
1555 
1556 		case IPV6_OTCLASS:
1557 		{
1558 			struct ip6_pktopts **optp;
1559 			u_int8_t tclass;
1560 
1561 			error = sockopt_get(sopt, &tclass, sizeof(tclass));
1562 			if (error)
1563 				break;
1564 			optp = &in6p->in6p_outputopts;
1565 			error = ip6_pcbopt(optname,
1566 					   (u_char *)&tclass,
1567 					   sizeof(tclass),
1568 					   optp,
1569 					   kauth_cred_get(), uproto);
1570 			break;
1571 		}
1572 
1573 		case IPV6_TCLASS:
1574 		case IPV6_DONTFRAG:
1575 		case IPV6_USE_MIN_MTU:
1576 		case IPV6_PREFER_TEMPADDR:
1577 			error = sockopt_getint(sopt, &optval);
1578 			if (error)
1579 				break;
1580 			{
1581 				struct ip6_pktopts **optp;
1582 				optp = &in6p->in6p_outputopts;
1583 				error = ip6_pcbopt(optname,
1584 						   (u_char *)&optval,
1585 						   sizeof(optval),
1586 						   optp,
1587 						   kauth_cred_get(), uproto);
1588 				break;
1589 			}
1590 
1591 #ifdef RFC2292
1592 		case IPV6_2292PKTINFO:
1593 		case IPV6_2292HOPLIMIT:
1594 		case IPV6_2292HOPOPTS:
1595 		case IPV6_2292DSTOPTS:
1596 		case IPV6_2292RTHDR:
1597 			/* RFC 2292 */
1598 			error = sockopt_getint(sopt, &optval);
1599 			if (error)
1600 				break;
1601 
1602 			switch (optname) {
1603 			case IPV6_2292PKTINFO:
1604 				OPTSET2292(IN6P_PKTINFO);
1605 				break;
1606 			case IPV6_2292HOPLIMIT:
1607 				OPTSET2292(IN6P_HOPLIMIT);
1608 				break;
1609 			case IPV6_2292HOPOPTS:
1610 				/*
1611 				 * Check super-user privilege.
1612 				 * See comments for IPV6_RECVHOPOPTS.
1613 				 */
1614 				error =
1615 				    kauth_authorize_network(kauth_cred_get(),
1616 				    KAUTH_NETWORK_IPV6,
1617 				    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1618 				    NULL, NULL);
1619 				if (error)
1620 					return (error);
1621 				OPTSET2292(IN6P_HOPOPTS);
1622 				break;
1623 			case IPV6_2292DSTOPTS:
1624 				error =
1625 				    kauth_authorize_network(kauth_cred_get(),
1626 				    KAUTH_NETWORK_IPV6,
1627 				    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1628 				    NULL, NULL);
1629 				if (error)
1630 					return (error);
1631 				OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1632 				break;
1633 			case IPV6_2292RTHDR:
1634 				OPTSET2292(IN6P_RTHDR);
1635 				break;
1636 			}
1637 			break;
1638 #endif
1639 		case IPV6_PKTINFO:
1640 		case IPV6_HOPOPTS:
1641 		case IPV6_RTHDR:
1642 		case IPV6_DSTOPTS:
1643 		case IPV6_RTHDRDSTOPTS:
1644 		case IPV6_NEXTHOP: {
1645 			/* new advanced API (RFC3542) */
1646 			void *optbuf;
1647 			int optbuflen;
1648 			struct ip6_pktopts **optp;
1649 
1650 #ifdef RFC2292
1651 			/* cannot mix with RFC2292 */
1652 			if (OPTBIT(IN6P_RFC2292)) {
1653 				error = EINVAL;
1654 				break;
1655 			}
1656 #endif
1657 
1658 			optbuflen = sopt->sopt_size;
1659 			optbuf = malloc(optbuflen, M_IP6OPT, M_NOWAIT);
1660 			if (optbuf == NULL) {
1661 				error = ENOBUFS;
1662 				break;
1663 			}
1664 
1665 			error = sockopt_get(sopt, optbuf, optbuflen);
1666 			if (error) {
1667 				free(optbuf, M_IP6OPT);
1668 				break;
1669 			}
1670 			optp = &in6p->in6p_outputopts;
1671 			error = ip6_pcbopt(optname, optbuf, optbuflen,
1672 			    optp, kauth_cred_get(), uproto);
1673 
1674 			free(optbuf, M_IP6OPT);
1675 			break;
1676 			}
1677 #undef OPTSET
1678 
1679 		case IPV6_MULTICAST_IF:
1680 		case IPV6_MULTICAST_HOPS:
1681 		case IPV6_MULTICAST_LOOP:
1682 		case IPV6_JOIN_GROUP:
1683 		case IPV6_LEAVE_GROUP:
1684 			error = ip6_setmoptions(sopt, in6p);
1685 			break;
1686 
1687 		case IPV6_PORTRANGE:
1688 			error = sockopt_getint(sopt, &optval);
1689 			if (error)
1690 				break;
1691 
1692 			switch (optval) {
1693 			case IPV6_PORTRANGE_DEFAULT:
1694 				in6p->in6p_flags &= ~(IN6P_LOWPORT);
1695 				in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1696 				break;
1697 
1698 			case IPV6_PORTRANGE_HIGH:
1699 				in6p->in6p_flags &= ~(IN6P_LOWPORT);
1700 				in6p->in6p_flags |= IN6P_HIGHPORT;
1701 				break;
1702 
1703 			case IPV6_PORTRANGE_LOW:
1704 				in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1705 				in6p->in6p_flags |= IN6P_LOWPORT;
1706 				break;
1707 
1708 			default:
1709 				error = EINVAL;
1710 				break;
1711 			}
1712 			break;
1713 
1714 		case IPV6_PORTALGO:
1715 			error = sockopt_getint(sopt, &optval);
1716 			if (error)
1717 				break;
1718 
1719 			error = portalgo_algo_index_select(
1720 			    (struct inpcb_hdr *)in6p, optval);
1721 			break;
1722 
1723 #if defined(IPSEC)
1724 		case IPV6_IPSEC_POLICY:
1725 			if (ipsec_enabled) {
1726 				error = ipsec_set_policy(in6p,
1727 				    sopt->sopt_data, sopt->sopt_size,
1728 				    kauth_cred_get());
1729 				break;
1730 			}
1731 			/*FALLTHROUGH*/
1732 #endif /* IPSEC */
1733 
1734 		default:
1735 			error = ENOPROTOOPT;
1736 			break;
1737 		}
1738 		break;
1739 
1740 	case PRCO_GETOPT:
1741 		switch (optname) {
1742 #ifdef RFC2292
1743 		case IPV6_2292PKTOPTIONS:
1744 			/*
1745 			 * RFC3542 (effectively) deprecated the
1746 			 * semantics of the 2292-style pktoptions.
1747 			 * Since it was not reliable in nature (i.e.,
1748 			 * applications had to expect the lack of some
1749 			 * information after all), it would make sense
1750 			 * to simplify this part by always returning
1751 			 * empty data.
1752 			 */
1753 			break;
1754 #endif
1755 
1756 		case IPV6_RECVHOPOPTS:
1757 		case IPV6_RECVDSTOPTS:
1758 		case IPV6_RECVRTHDRDSTOPTS:
1759 		case IPV6_UNICAST_HOPS:
1760 		case IPV6_RECVPKTINFO:
1761 		case IPV6_RECVHOPLIMIT:
1762 		case IPV6_RECVRTHDR:
1763 		case IPV6_RECVPATHMTU:
1764 
1765 		case IPV6_FAITH:
1766 		case IPV6_V6ONLY:
1767 		case IPV6_PORTRANGE:
1768 		case IPV6_RECVTCLASS:
1769 			switch (optname) {
1770 
1771 			case IPV6_RECVHOPOPTS:
1772 				optval = OPTBIT(IN6P_HOPOPTS);
1773 				break;
1774 
1775 			case IPV6_RECVDSTOPTS:
1776 				optval = OPTBIT(IN6P_DSTOPTS);
1777 				break;
1778 
1779 			case IPV6_RECVRTHDRDSTOPTS:
1780 				optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1781 				break;
1782 
1783 			case IPV6_UNICAST_HOPS:
1784 				optval = in6p->in6p_hops;
1785 				break;
1786 
1787 			case IPV6_RECVPKTINFO:
1788 				optval = OPTBIT(IN6P_PKTINFO);
1789 				break;
1790 
1791 			case IPV6_RECVHOPLIMIT:
1792 				optval = OPTBIT(IN6P_HOPLIMIT);
1793 				break;
1794 
1795 			case IPV6_RECVRTHDR:
1796 				optval = OPTBIT(IN6P_RTHDR);
1797 				break;
1798 
1799 			case IPV6_RECVPATHMTU:
1800 				optval = OPTBIT(IN6P_MTU);
1801 				break;
1802 
1803 			case IPV6_FAITH:
1804 				optval = OPTBIT(IN6P_FAITH);
1805 				break;
1806 
1807 			case IPV6_V6ONLY:
1808 				optval = OPTBIT(IN6P_IPV6_V6ONLY);
1809 				break;
1810 
1811 			case IPV6_PORTRANGE:
1812 			    {
1813 				int flags;
1814 				flags = in6p->in6p_flags;
1815 				if (flags & IN6P_HIGHPORT)
1816 					optval = IPV6_PORTRANGE_HIGH;
1817 				else if (flags & IN6P_LOWPORT)
1818 					optval = IPV6_PORTRANGE_LOW;
1819 				else
1820 					optval = 0;
1821 				break;
1822 			    }
1823 			case IPV6_RECVTCLASS:
1824 				optval = OPTBIT(IN6P_TCLASS);
1825 				break;
1826 
1827 			}
1828 			if (error)
1829 				break;
1830 			error = sockopt_setint(sopt, optval);
1831 			break;
1832 
1833 		case IPV6_PATHMTU:
1834 		    {
1835 			u_long pmtu = 0;
1836 			struct ip6_mtuinfo mtuinfo;
1837 			struct route *ro = &in6p->in6p_route;
1838 			struct rtentry *rt;
1839 			union {
1840 				struct sockaddr		dst;
1841 				struct sockaddr_in6	dst6;
1842 			} u;
1843 
1844 			if (!(so->so_state & SS_ISCONNECTED))
1845 				return (ENOTCONN);
1846 			/*
1847 			 * XXX: we dot not consider the case of source
1848 			 * routing, or optional information to specify
1849 			 * the outgoing interface.
1850 			 */
1851 			sockaddr_in6_init(&u.dst6, &in6p->in6p_faddr, 0, 0, 0);
1852 			rt = rtcache_lookup(ro, &u.dst);
1853 			error = ip6_getpmtu(rt, NULL, &pmtu, NULL);
1854 			rtcache_unref(rt, ro);
1855 			if (error)
1856 				break;
1857 			if (pmtu > IPV6_MAXPACKET)
1858 				pmtu = IPV6_MAXPACKET;
1859 
1860 			memset(&mtuinfo, 0, sizeof(mtuinfo));
1861 			mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1862 			optdata = (void *)&mtuinfo;
1863 			optdatalen = sizeof(mtuinfo);
1864 			if (optdatalen > MCLBYTES)
1865 				return (EMSGSIZE); /* XXX */
1866 			error = sockopt_set(sopt, optdata, optdatalen);
1867 			break;
1868 		    }
1869 
1870 #ifdef RFC2292
1871 		case IPV6_2292PKTINFO:
1872 		case IPV6_2292HOPLIMIT:
1873 		case IPV6_2292HOPOPTS:
1874 		case IPV6_2292RTHDR:
1875 		case IPV6_2292DSTOPTS:
1876 			switch (optname) {
1877 			case IPV6_2292PKTINFO:
1878 				optval = OPTBIT(IN6P_PKTINFO);
1879 				break;
1880 			case IPV6_2292HOPLIMIT:
1881 				optval = OPTBIT(IN6P_HOPLIMIT);
1882 				break;
1883 			case IPV6_2292HOPOPTS:
1884 				optval = OPTBIT(IN6P_HOPOPTS);
1885 				break;
1886 			case IPV6_2292RTHDR:
1887 				optval = OPTBIT(IN6P_RTHDR);
1888 				break;
1889 			case IPV6_2292DSTOPTS:
1890 				optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1891 				break;
1892 			}
1893 			error = sockopt_setint(sopt, optval);
1894 			break;
1895 #endif
1896 		case IPV6_PKTINFO:
1897 		case IPV6_HOPOPTS:
1898 		case IPV6_RTHDR:
1899 		case IPV6_DSTOPTS:
1900 		case IPV6_RTHDRDSTOPTS:
1901 		case IPV6_NEXTHOP:
1902 		case IPV6_OTCLASS:
1903 		case IPV6_TCLASS:
1904 		case IPV6_DONTFRAG:
1905 		case IPV6_USE_MIN_MTU:
1906 		case IPV6_PREFER_TEMPADDR:
1907 			error = ip6_getpcbopt(in6p->in6p_outputopts,
1908 			    optname, sopt);
1909 			break;
1910 
1911 		case IPV6_MULTICAST_IF:
1912 		case IPV6_MULTICAST_HOPS:
1913 		case IPV6_MULTICAST_LOOP:
1914 		case IPV6_JOIN_GROUP:
1915 		case IPV6_LEAVE_GROUP:
1916 			error = ip6_getmoptions(sopt, in6p);
1917 			break;
1918 
1919 		case IPV6_PORTALGO:
1920 			optval = ((struct inpcb_hdr *)in6p)->inph_portalgo;
1921 			error = sockopt_setint(sopt, optval);
1922 			break;
1923 
1924 #if defined(IPSEC)
1925 		case IPV6_IPSEC_POLICY:
1926 			if (ipsec_used) {
1927 				struct mbuf *m = NULL;
1928 
1929 				/*
1930 				 * XXX: this will return EINVAL as sopt is
1931 				 * empty
1932 				 */
1933 				error = ipsec_get_policy(in6p, sopt->sopt_data,
1934 				    sopt->sopt_size, &m);
1935 				if (!error)
1936 					error = sockopt_setmbuf(sopt, m);
1937 				break;
1938 			}
1939 			/*FALLTHROUGH*/
1940 #endif /* IPSEC */
1941 
1942 		default:
1943 			error = ENOPROTOOPT;
1944 			break;
1945 		}
1946 		break;
1947 	}
1948 	return (error);
1949 }
1950 
1951 int
1952 ip6_raw_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1953 {
1954 	int error = 0, optval;
1955 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
1956 	struct in6pcb *in6p = sotoin6pcb(so);
1957 	int level, optname;
1958 
1959 	KASSERT(sopt != NULL);
1960 
1961 	level = sopt->sopt_level;
1962 	optname = sopt->sopt_name;
1963 
1964 	if (level != IPPROTO_IPV6) {
1965 		return ENOPROTOOPT;
1966 	}
1967 
1968 	switch (optname) {
1969 	case IPV6_CHECKSUM:
1970 		/*
1971 		 * For ICMPv6 sockets, no modification allowed for checksum
1972 		 * offset, permit "no change" values to help existing apps.
1973 		 *
1974 		 * XXX RFC3542 says: "An attempt to set IPV6_CHECKSUM
1975 		 * for an ICMPv6 socket will fail."  The current
1976 		 * behavior does not meet RFC3542.
1977 		 */
1978 		switch (op) {
1979 		case PRCO_SETOPT:
1980 			error = sockopt_getint(sopt, &optval);
1981 			if (error)
1982 				break;
1983 			if ((optval % 2) != 0) {
1984 				/* the API assumes even offset values */
1985 				error = EINVAL;
1986 			} else if (so->so_proto->pr_protocol ==
1987 			    IPPROTO_ICMPV6) {
1988 				if (optval != icmp6off)
1989 					error = EINVAL;
1990 			} else
1991 				in6p->in6p_cksum = optval;
1992 			break;
1993 
1994 		case PRCO_GETOPT:
1995 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
1996 				optval = icmp6off;
1997 			else
1998 				optval = in6p->in6p_cksum;
1999 
2000 			error = sockopt_setint(sopt, optval);
2001 			break;
2002 
2003 		default:
2004 			error = EINVAL;
2005 			break;
2006 		}
2007 		break;
2008 
2009 	default:
2010 		error = ENOPROTOOPT;
2011 		break;
2012 	}
2013 
2014 	return (error);
2015 }
2016 
2017 #ifdef RFC2292
2018 /*
2019  * Set up IP6 options in pcb for insertion in output packets or
2020  * specifying behavior of outgoing packets.
2021  */
2022 static int
2023 ip6_pcbopts(struct ip6_pktopts **pktopt, struct socket *so,
2024     struct sockopt *sopt)
2025 {
2026 	struct ip6_pktopts *opt = *pktopt;
2027 	struct mbuf *m;
2028 	int error = 0;
2029 
2030 	KASSERT(solocked(so));
2031 
2032 	/* turn off any old options. */
2033 	if (opt) {
2034 #ifdef DIAGNOSTIC
2035 	    if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2036 		opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2037 		opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2038 		    printf("ip6_pcbopts: all specified options are cleared.\n");
2039 #endif
2040 		ip6_clearpktopts(opt, -1);
2041 	} else {
2042 		opt = malloc(sizeof(*opt), M_IP6OPT, M_NOWAIT);
2043 		if (opt == NULL)
2044 			return (ENOBUFS);
2045 	}
2046 	*pktopt = NULL;
2047 
2048 	if (sopt == NULL || sopt->sopt_size == 0) {
2049 		/*
2050 		 * Only turning off any previous options, regardless of
2051 		 * whether the opt is just created or given.
2052 		 */
2053 		free(opt, M_IP6OPT);
2054 		return (0);
2055 	}
2056 
2057 	/*  set options specified by user. */
2058 	m = sockopt_getmbuf(sopt);
2059 	if (m == NULL) {
2060 		free(opt, M_IP6OPT);
2061 		return (ENOBUFS);
2062 	}
2063 
2064 	error = ip6_setpktopts(m, opt, NULL, kauth_cred_get(),
2065 	    so->so_proto->pr_protocol);
2066 	m_freem(m);
2067 	if (error != 0) {
2068 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2069 		free(opt, M_IP6OPT);
2070 		return (error);
2071 	}
2072 	*pktopt = opt;
2073 	return (0);
2074 }
2075 #endif
2076 
2077 /*
2078  * initialize ip6_pktopts.  beware that there are non-zero default values in
2079  * the struct.
2080  */
2081 void
2082 ip6_initpktopts(struct ip6_pktopts *opt)
2083 {
2084 
2085 	memset(opt, 0, sizeof(*opt));
2086 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2087 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2088 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2089 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2090 }
2091 
2092 #define sin6tosa(sin6)	((struct sockaddr *)(sin6)) /* XXX */
2093 static int
2094 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2095     kauth_cred_t cred, int uproto)
2096 {
2097 	struct ip6_pktopts *opt;
2098 
2099 	if (*pktopt == NULL) {
2100 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2101 		    M_NOWAIT);
2102 		if (*pktopt == NULL)
2103 			return (ENOBUFS);
2104 
2105 		ip6_initpktopts(*pktopt);
2106 	}
2107 	opt = *pktopt;
2108 
2109 	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2110 }
2111 
2112 static int
2113 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2114 {
2115 	void *optdata = NULL;
2116 	int optdatalen = 0;
2117 	struct ip6_ext *ip6e;
2118 	int error = 0;
2119 	struct in6_pktinfo null_pktinfo;
2120 	int deftclass = 0, on;
2121 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2122 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2123 
2124 	switch (optname) {
2125 	case IPV6_PKTINFO:
2126 		if (pktopt && pktopt->ip6po_pktinfo)
2127 			optdata = (void *)pktopt->ip6po_pktinfo;
2128 		else {
2129 			/* XXX: we don't have to do this every time... */
2130 			memset(&null_pktinfo, 0, sizeof(null_pktinfo));
2131 			optdata = (void *)&null_pktinfo;
2132 		}
2133 		optdatalen = sizeof(struct in6_pktinfo);
2134 		break;
2135 	case IPV6_OTCLASS:
2136 		/* XXX */
2137 		return (EINVAL);
2138 	case IPV6_TCLASS:
2139 		if (pktopt && pktopt->ip6po_tclass >= 0)
2140 			optdata = (void *)&pktopt->ip6po_tclass;
2141 		else
2142 			optdata = (void *)&deftclass;
2143 		optdatalen = sizeof(int);
2144 		break;
2145 	case IPV6_HOPOPTS:
2146 		if (pktopt && pktopt->ip6po_hbh) {
2147 			optdata = (void *)pktopt->ip6po_hbh;
2148 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2149 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2150 		}
2151 		break;
2152 	case IPV6_RTHDR:
2153 		if (pktopt && pktopt->ip6po_rthdr) {
2154 			optdata = (void *)pktopt->ip6po_rthdr;
2155 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2156 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2157 		}
2158 		break;
2159 	case IPV6_RTHDRDSTOPTS:
2160 		if (pktopt && pktopt->ip6po_dest1) {
2161 			optdata = (void *)pktopt->ip6po_dest1;
2162 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2163 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2164 		}
2165 		break;
2166 	case IPV6_DSTOPTS:
2167 		if (pktopt && pktopt->ip6po_dest2) {
2168 			optdata = (void *)pktopt->ip6po_dest2;
2169 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2170 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2171 		}
2172 		break;
2173 	case IPV6_NEXTHOP:
2174 		if (pktopt && pktopt->ip6po_nexthop) {
2175 			optdata = (void *)pktopt->ip6po_nexthop;
2176 			optdatalen = pktopt->ip6po_nexthop->sa_len;
2177 		}
2178 		break;
2179 	case IPV6_USE_MIN_MTU:
2180 		if (pktopt)
2181 			optdata = (void *)&pktopt->ip6po_minmtu;
2182 		else
2183 			optdata = (void *)&defminmtu;
2184 		optdatalen = sizeof(int);
2185 		break;
2186 	case IPV6_DONTFRAG:
2187 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2188 			on = 1;
2189 		else
2190 			on = 0;
2191 		optdata = (void *)&on;
2192 		optdatalen = sizeof(on);
2193 		break;
2194 	case IPV6_PREFER_TEMPADDR:
2195 		if (pktopt)
2196 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2197 		else
2198 			optdata = (void *)&defpreftemp;
2199 		optdatalen = sizeof(int);
2200 		break;
2201 	default:		/* should not happen */
2202 #ifdef DIAGNOSTIC
2203 		panic("ip6_getpcbopt: unexpected option\n");
2204 #endif
2205 		return (ENOPROTOOPT);
2206 	}
2207 
2208 	error = sockopt_set(sopt, optdata, optdatalen);
2209 
2210 	return (error);
2211 }
2212 
2213 void
2214 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2215 {
2216 	if (optname == -1 || optname == IPV6_PKTINFO) {
2217 		if (pktopt->ip6po_pktinfo)
2218 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2219 		pktopt->ip6po_pktinfo = NULL;
2220 	}
2221 	if (optname == -1 || optname == IPV6_HOPLIMIT)
2222 		pktopt->ip6po_hlim = -1;
2223 	if (optname == -1 || optname == IPV6_TCLASS)
2224 		pktopt->ip6po_tclass = -1;
2225 	if (optname == -1 || optname == IPV6_NEXTHOP) {
2226 		rtcache_free(&pktopt->ip6po_nextroute);
2227 		if (pktopt->ip6po_nexthop)
2228 			free(pktopt->ip6po_nexthop, M_IP6OPT);
2229 		pktopt->ip6po_nexthop = NULL;
2230 	}
2231 	if (optname == -1 || optname == IPV6_HOPOPTS) {
2232 		if (pktopt->ip6po_hbh)
2233 			free(pktopt->ip6po_hbh, M_IP6OPT);
2234 		pktopt->ip6po_hbh = NULL;
2235 	}
2236 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2237 		if (pktopt->ip6po_dest1)
2238 			free(pktopt->ip6po_dest1, M_IP6OPT);
2239 		pktopt->ip6po_dest1 = NULL;
2240 	}
2241 	if (optname == -1 || optname == IPV6_RTHDR) {
2242 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2243 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2244 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2245 		rtcache_free(&pktopt->ip6po_route);
2246 	}
2247 	if (optname == -1 || optname == IPV6_DSTOPTS) {
2248 		if (pktopt->ip6po_dest2)
2249 			free(pktopt->ip6po_dest2, M_IP6OPT);
2250 		pktopt->ip6po_dest2 = NULL;
2251 	}
2252 }
2253 
2254 #define PKTOPT_EXTHDRCPY(type) 					\
2255 do {								\
2256 	if (src->type) {					\
2257 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2258 		dst->type = malloc(hlen, M_IP6OPT, canwait);	\
2259 		if (dst->type == NULL)				\
2260 			goto bad;				\
2261 		memcpy(dst->type, src->type, hlen);		\
2262 	}							\
2263 } while (/*CONSTCOND*/ 0)
2264 
2265 static int
2266 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2267 {
2268 	dst->ip6po_hlim = src->ip6po_hlim;
2269 	dst->ip6po_tclass = src->ip6po_tclass;
2270 	dst->ip6po_flags = src->ip6po_flags;
2271 	dst->ip6po_minmtu = src->ip6po_minmtu;
2272 	dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
2273 	if (src->ip6po_pktinfo) {
2274 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2275 		    M_IP6OPT, canwait);
2276 		if (dst->ip6po_pktinfo == NULL)
2277 			goto bad;
2278 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2279 	}
2280 	if (src->ip6po_nexthop) {
2281 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2282 		    M_IP6OPT, canwait);
2283 		if (dst->ip6po_nexthop == NULL)
2284 			goto bad;
2285 		memcpy(dst->ip6po_nexthop, src->ip6po_nexthop,
2286 		    src->ip6po_nexthop->sa_len);
2287 	}
2288 	PKTOPT_EXTHDRCPY(ip6po_hbh);
2289 	PKTOPT_EXTHDRCPY(ip6po_dest1);
2290 	PKTOPT_EXTHDRCPY(ip6po_dest2);
2291 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2292 	return (0);
2293 
2294   bad:
2295 	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2296 	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2297 	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2298 	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2299 	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2300 	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2301 
2302 	return (ENOBUFS);
2303 }
2304 #undef PKTOPT_EXTHDRCPY
2305 
2306 struct ip6_pktopts *
2307 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2308 {
2309 	int error;
2310 	struct ip6_pktopts *dst;
2311 
2312 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2313 	if (dst == NULL)
2314 		return (NULL);
2315 	ip6_initpktopts(dst);
2316 
2317 	if ((error = copypktopts(dst, src, canwait)) != 0) {
2318 		free(dst, M_IP6OPT);
2319 		return (NULL);
2320 	}
2321 
2322 	return (dst);
2323 }
2324 
2325 void
2326 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2327 {
2328 	if (pktopt == NULL)
2329 		return;
2330 
2331 	ip6_clearpktopts(pktopt, -1);
2332 
2333 	free(pktopt, M_IP6OPT);
2334 }
2335 
2336 int
2337 ip6_get_membership(const struct sockopt *sopt, struct ifnet **ifp,
2338     struct psref *psref, void *v, size_t l)
2339 {
2340 	struct ipv6_mreq mreq;
2341 	int error;
2342 	struct in6_addr *ia = &mreq.ipv6mr_multiaddr;
2343 	struct in_addr *ia4 = (void *)&ia->s6_addr32[3];
2344 
2345 	error = sockopt_get(sopt, &mreq, sizeof(mreq));
2346 	if (error != 0)
2347 		return error;
2348 
2349 	if (IN6_IS_ADDR_UNSPECIFIED(ia)) {
2350 		/*
2351 		 * We use the unspecified address to specify to accept
2352 		 * all multicast addresses. Only super user is allowed
2353 		 * to do this.
2354 		 */
2355 		if (kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_IPV6,
2356 		    KAUTH_REQ_NETWORK_IPV6_JOIN_MULTICAST, NULL, NULL, NULL))
2357 			return EACCES;
2358 	} else if (IN6_IS_ADDR_V4MAPPED(ia)) {
2359 		// Don't bother if we are not going to use ifp.
2360 		if (l == sizeof(*ia)) {
2361 			memcpy(v, ia, l);
2362 			return 0;
2363 		}
2364 	} else if (!IN6_IS_ADDR_MULTICAST(ia)) {
2365 		return EINVAL;
2366 	}
2367 
2368 	/*
2369 	 * If no interface was explicitly specified, choose an
2370 	 * appropriate one according to the given multicast address.
2371 	 */
2372 	if (mreq.ipv6mr_interface == 0) {
2373 		struct rtentry *rt;
2374 		union {
2375 			struct sockaddr		dst;
2376 			struct sockaddr_in	dst4;
2377 			struct sockaddr_in6	dst6;
2378 		} u;
2379 		struct route ro;
2380 
2381 		/*
2382 		 * Look up the routing table for the
2383 		 * address, and choose the outgoing interface.
2384 		 *   XXX: is it a good approach?
2385 		 */
2386 		memset(&ro, 0, sizeof(ro));
2387 		if (IN6_IS_ADDR_V4MAPPED(ia))
2388 			sockaddr_in_init(&u.dst4, ia4, 0);
2389 		else
2390 			sockaddr_in6_init(&u.dst6, ia, 0, 0, 0);
2391 		error = rtcache_setdst(&ro, &u.dst);
2392 		if (error != 0)
2393 			return error;
2394 		rt = rtcache_init(&ro);
2395 		*ifp = rt != NULL ?
2396 		    if_get_byindex(rt->rt_ifp->if_index, psref) : NULL;
2397 		rtcache_unref(rt, &ro);
2398 		rtcache_free(&ro);
2399 	} else {
2400 		/*
2401 		 * If the interface is specified, validate it.
2402 		 */
2403 		*ifp = if_get_byindex(mreq.ipv6mr_interface, psref);
2404 		if (*ifp == NULL)
2405 			return ENXIO;	/* XXX EINVAL? */
2406 	}
2407 	if (sizeof(*ia) == l)
2408 		memcpy(v, ia, l);
2409 	else
2410 		memcpy(v, ia4, l);
2411 	return 0;
2412 }
2413 
2414 /*
2415  * Set the IP6 multicast options in response to user setsockopt().
2416  */
2417 static int
2418 ip6_setmoptions(const struct sockopt *sopt, struct in6pcb *in6p)
2419 {
2420 	int error = 0;
2421 	u_int loop, ifindex;
2422 	struct ipv6_mreq mreq;
2423 	struct in6_addr ia;
2424 	struct ifnet *ifp;
2425 	struct ip6_moptions *im6o = in6p->in6p_moptions;
2426 	struct in6_multi_mship *imm;
2427 
2428 	KASSERT(in6p_locked(in6p));
2429 
2430 	if (im6o == NULL) {
2431 		/*
2432 		 * No multicast option buffer attached to the pcb;
2433 		 * allocate one and initialize to default values.
2434 		 */
2435 		im6o = malloc(sizeof(*im6o), M_IPMOPTS, M_NOWAIT);
2436 		if (im6o == NULL)
2437 			return (ENOBUFS);
2438 		in6p->in6p_moptions = im6o;
2439 		im6o->im6o_multicast_if_index = 0;
2440 		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2441 		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2442 		LIST_INIT(&im6o->im6o_memberships);
2443 	}
2444 
2445 	switch (sopt->sopt_name) {
2446 
2447 	case IPV6_MULTICAST_IF: {
2448 		int s;
2449 		/*
2450 		 * Select the interface for outgoing multicast packets.
2451 		 */
2452 		error = sockopt_get(sopt, &ifindex, sizeof(ifindex));
2453 		if (error != 0)
2454 			break;
2455 
2456 		s = pserialize_read_enter();
2457 		if (ifindex != 0) {
2458 			if ((ifp = if_byindex(ifindex)) == NULL) {
2459 				pserialize_read_exit(s);
2460 				error = ENXIO;	/* XXX EINVAL? */
2461 				break;
2462 			}
2463 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2464 				pserialize_read_exit(s);
2465 				error = EADDRNOTAVAIL;
2466 				break;
2467 			}
2468 		} else
2469 			ifp = NULL;
2470 		im6o->im6o_multicast_if_index = if_get_index(ifp);
2471 		pserialize_read_exit(s);
2472 		break;
2473 	    }
2474 
2475 	case IPV6_MULTICAST_HOPS:
2476 	    {
2477 		/*
2478 		 * Set the IP6 hoplimit for outgoing multicast packets.
2479 		 */
2480 		int optval;
2481 
2482 		error = sockopt_getint(sopt, &optval);
2483 		if (error != 0)
2484 			break;
2485 
2486 		if (optval < -1 || optval >= 256)
2487 			error = EINVAL;
2488 		else if (optval == -1)
2489 			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2490 		else
2491 			im6o->im6o_multicast_hlim = optval;
2492 		break;
2493 	    }
2494 
2495 	case IPV6_MULTICAST_LOOP:
2496 		/*
2497 		 * Set the loopback flag for outgoing multicast packets.
2498 		 * Must be zero or one.
2499 		 */
2500 		error = sockopt_get(sopt, &loop, sizeof(loop));
2501 		if (error != 0)
2502 			break;
2503 		if (loop > 1) {
2504 			error = EINVAL;
2505 			break;
2506 		}
2507 		im6o->im6o_multicast_loop = loop;
2508 		break;
2509 
2510 	case IPV6_JOIN_GROUP: {
2511 		int bound;
2512 		struct psref psref;
2513 		/*
2514 		 * Add a multicast group membership.
2515 		 * Group must be a valid IP6 multicast address.
2516 		 */
2517 		bound = curlwp_bind();
2518 		ifp = NULL;
2519 		error = ip6_get_membership(sopt, &ifp, &psref, &ia, sizeof(ia));
2520 		if (error != 0) {
2521 			KASSERT(ifp == NULL);
2522 			curlwp_bindx(bound);
2523 			return error;
2524 		}
2525 
2526 		if (IN6_IS_ADDR_V4MAPPED(&ia)) {
2527 			error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
2528 			goto put_break;
2529 		}
2530 		/*
2531 		 * See if we found an interface, and confirm that it
2532 		 * supports multicast
2533 		 */
2534 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2535 			error = EADDRNOTAVAIL;
2536 			goto put_break;
2537 		}
2538 
2539 		if (in6_setscope(&ia, ifp, NULL)) {
2540 			error = EADDRNOTAVAIL; /* XXX: should not happen */
2541 			goto put_break;
2542 		}
2543 
2544 		/*
2545 		 * See if the membership already exists.
2546 		 */
2547 		LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
2548 			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2549 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2550 			    &ia))
2551 				goto put_break;
2552 		}
2553 		if (imm != NULL) {
2554 			error = EADDRINUSE;
2555 			goto put_break;
2556 		}
2557 		/*
2558 		 * Everything looks good; add a new record to the multicast
2559 		 * address list for the given interface.
2560 		 */
2561 		IFNET_LOCK(ifp);
2562 		imm = in6_joingroup(ifp, &ia, &error, 0);
2563 		IFNET_UNLOCK(ifp);
2564 		if (imm == NULL)
2565 			goto put_break;
2566 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2567 	    put_break:
2568 		if_put(ifp, &psref);
2569 		curlwp_bindx(bound);
2570 		break;
2571 	    }
2572 
2573 	case IPV6_LEAVE_GROUP: {
2574 		struct ifnet *in6m_ifp;
2575 		/*
2576 		 * Drop a multicast group membership.
2577 		 * Group must be a valid IP6 multicast address.
2578 		 */
2579 		error = sockopt_get(sopt, &mreq, sizeof(mreq));
2580 		if (error != 0)
2581 			break;
2582 
2583 		if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
2584 			error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
2585 			break;
2586 		}
2587 		/*
2588 		 * If an interface address was specified, get a pointer
2589 		 * to its ifnet structure.
2590 		 */
2591 		if (mreq.ipv6mr_interface != 0) {
2592 			if ((ifp = if_byindex(mreq.ipv6mr_interface)) == NULL) {
2593 				error = ENXIO;	/* XXX EINVAL? */
2594 				break;
2595 			}
2596 		} else
2597 			ifp = NULL;
2598 
2599 		/* Fill in the scope zone ID */
2600 		if (ifp) {
2601 			if (in6_setscope(&mreq.ipv6mr_multiaddr, ifp, NULL)) {
2602 				/* XXX: should not happen */
2603 				error = EADDRNOTAVAIL;
2604 				break;
2605 			}
2606 		} else if (mreq.ipv6mr_interface != 0) {
2607 			/*
2608 			 * XXX: This case would happens when the (positive)
2609 			 * index is in the valid range, but the corresponding
2610 			 * interface has been detached dynamically.  The above
2611 			 * check probably avoids such case to happen here, but
2612 			 * we check it explicitly for safety.
2613 			 */
2614 			error = EADDRNOTAVAIL;
2615 			break;
2616 		} else {	/* ipv6mr_interface == 0 */
2617 			struct sockaddr_in6 sa6_mc;
2618 
2619 			/*
2620 			 * The API spec says as follows:
2621 			 *  If the interface index is specified as 0, the
2622 			 *  system may choose a multicast group membership to
2623 			 *  drop by matching the multicast address only.
2624 			 * On the other hand, we cannot disambiguate the scope
2625 			 * zone unless an interface is provided.  Thus, we
2626 			 * check if there's ambiguity with the default scope
2627 			 * zone as the last resort.
2628 			 */
2629 			sockaddr_in6_init(&sa6_mc, &mreq.ipv6mr_multiaddr,
2630 			    0, 0, 0);
2631 			error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2632 			if (error != 0)
2633 				break;
2634 			mreq.ipv6mr_multiaddr = sa6_mc.sin6_addr;
2635 		}
2636 
2637 		/*
2638 		 * Find the membership in the membership list.
2639 		 */
2640 		LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
2641 			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2642 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2643 			    &mreq.ipv6mr_multiaddr))
2644 				break;
2645 		}
2646 		if (imm == NULL) {
2647 			/* Unable to resolve interface */
2648 			error = EADDRNOTAVAIL;
2649 			break;
2650 		}
2651 		/*
2652 		 * Give up the multicast address record to which the
2653 		 * membership points.
2654 		 */
2655 		LIST_REMOVE(imm, i6mm_chain);
2656 		in6m_ifp = imm->i6mm_maddr->in6m_ifp;
2657 		IFNET_LOCK(in6m_ifp);
2658 		in6_leavegroup(imm);
2659 		/* in6m_ifp should not leave thanks to in6p_lock */
2660 		IFNET_UNLOCK(in6m_ifp);
2661 		break;
2662 	    }
2663 
2664 	default:
2665 		error = EOPNOTSUPP;
2666 		break;
2667 	}
2668 
2669 	/*
2670 	 * If all options have default values, no need to keep the mbuf.
2671 	 */
2672 	if (im6o->im6o_multicast_if_index == 0 &&
2673 	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2674 	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2675 	    LIST_EMPTY(&im6o->im6o_memberships)) {
2676 		free(in6p->in6p_moptions, M_IPMOPTS);
2677 		in6p->in6p_moptions = NULL;
2678 	}
2679 
2680 	return (error);
2681 }
2682 
2683 /*
2684  * Return the IP6 multicast options in response to user getsockopt().
2685  */
2686 static int
2687 ip6_getmoptions(struct sockopt *sopt, struct in6pcb *in6p)
2688 {
2689 	u_int optval;
2690 	int error;
2691 	struct ip6_moptions *im6o = in6p->in6p_moptions;
2692 
2693 	switch (sopt->sopt_name) {
2694 	case IPV6_MULTICAST_IF:
2695 		if (im6o == NULL || im6o->im6o_multicast_if_index == 0)
2696 			optval = 0;
2697 		else
2698 			optval = im6o->im6o_multicast_if_index;
2699 
2700 		error = sockopt_set(sopt, &optval, sizeof(optval));
2701 		break;
2702 
2703 	case IPV6_MULTICAST_HOPS:
2704 		if (im6o == NULL)
2705 			optval = ip6_defmcasthlim;
2706 		else
2707 			optval = im6o->im6o_multicast_hlim;
2708 
2709 		error = sockopt_set(sopt, &optval, sizeof(optval));
2710 		break;
2711 
2712 	case IPV6_MULTICAST_LOOP:
2713 		if (im6o == NULL)
2714 			optval = IPV6_DEFAULT_MULTICAST_LOOP;
2715 		else
2716 			optval = im6o->im6o_multicast_loop;
2717 
2718 		error = sockopt_set(sopt, &optval, sizeof(optval));
2719 		break;
2720 
2721 	default:
2722 		error = EOPNOTSUPP;
2723 	}
2724 
2725 	return (error);
2726 }
2727 
2728 /*
2729  * Discard the IP6 multicast options.
2730  */
2731 void
2732 ip6_freemoptions(struct ip6_moptions *im6o)
2733 {
2734 	struct in6_multi_mship *imm, *nimm;
2735 
2736 	if (im6o == NULL)
2737 		return;
2738 
2739 	/* The owner of im6o (in6p) should be protected by solock */
2740 	LIST_FOREACH_SAFE(imm, &im6o->im6o_memberships, i6mm_chain, nimm) {
2741 		struct ifnet *ifp;
2742 
2743 		LIST_REMOVE(imm, i6mm_chain);
2744 
2745 		ifp = imm->i6mm_maddr->in6m_ifp;
2746 		IFNET_LOCK(ifp);
2747 		in6_leavegroup(imm);
2748 		/* ifp should not leave thanks to solock */
2749 		IFNET_UNLOCK(ifp);
2750 	}
2751 	free(im6o, M_IPMOPTS);
2752 }
2753 
2754 /*
2755  * Set IPv6 outgoing packet options based on advanced API.
2756  */
2757 int
2758 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2759 	struct ip6_pktopts *stickyopt, kauth_cred_t cred, int uproto)
2760 {
2761 	struct cmsghdr *cm = 0;
2762 
2763 	if (control == NULL || opt == NULL)
2764 		return (EINVAL);
2765 
2766 	ip6_initpktopts(opt);
2767 	if (stickyopt) {
2768 		int error;
2769 
2770 		/*
2771 		 * If stickyopt is provided, make a local copy of the options
2772 		 * for this particular packet, then override them by ancillary
2773 		 * objects.
2774 		 * XXX: copypktopts() does not copy the cached route to a next
2775 		 * hop (if any).  This is not very good in terms of efficiency,
2776 		 * but we can allow this since this option should be rarely
2777 		 * used.
2778 		 */
2779 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2780 			return (error);
2781 	}
2782 
2783 	/*
2784 	 * XXX: Currently, we assume all the optional information is stored
2785 	 * in a single mbuf.
2786 	 */
2787 	if (control->m_next)
2788 		return (EINVAL);
2789 
2790 	/* XXX if cm->cmsg_len is not aligned, control->m_len can become <0 */
2791 	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2792 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2793 		int error;
2794 
2795 		if (control->m_len < CMSG_LEN(0))
2796 			return (EINVAL);
2797 
2798 		cm = mtod(control, struct cmsghdr *);
2799 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2800 			return (EINVAL);
2801 		if (cm->cmsg_level != IPPROTO_IPV6)
2802 			continue;
2803 
2804 		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2805 		    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2806 		if (error)
2807 			return (error);
2808 	}
2809 
2810 	return (0);
2811 }
2812 
2813 /*
2814  * Set a particular packet option, as a sticky option or an ancillary data
2815  * item.  "len" can be 0 only when it's a sticky option.
2816  * We have 4 cases of combination of "sticky" and "cmsg":
2817  * "sticky=0, cmsg=0": impossible
2818  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2819  * "sticky=1, cmsg=0": RFC3542 socket option
2820  * "sticky=1, cmsg=1": RFC2292 socket option
2821  */
2822 static int
2823 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2824     kauth_cred_t cred, int sticky, int cmsg, int uproto)
2825 {
2826 	int minmtupolicy;
2827 	int error;
2828 
2829 	if (!sticky && !cmsg) {
2830 #ifdef DIAGNOSTIC
2831 		printf("ip6_setpktopt: impossible case\n");
2832 #endif
2833 		return (EINVAL);
2834 	}
2835 
2836 	/*
2837 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2838 	 * not be specified in the context of RFC3542.  Conversely,
2839 	 * RFC3542 types should not be specified in the context of RFC2292.
2840 	 */
2841 	if (!cmsg) {
2842 		switch (optname) {
2843 		case IPV6_2292PKTINFO:
2844 		case IPV6_2292HOPLIMIT:
2845 		case IPV6_2292NEXTHOP:
2846 		case IPV6_2292HOPOPTS:
2847 		case IPV6_2292DSTOPTS:
2848 		case IPV6_2292RTHDR:
2849 		case IPV6_2292PKTOPTIONS:
2850 			return (ENOPROTOOPT);
2851 		}
2852 	}
2853 	if (sticky && cmsg) {
2854 		switch (optname) {
2855 		case IPV6_PKTINFO:
2856 		case IPV6_HOPLIMIT:
2857 		case IPV6_NEXTHOP:
2858 		case IPV6_HOPOPTS:
2859 		case IPV6_DSTOPTS:
2860 		case IPV6_RTHDRDSTOPTS:
2861 		case IPV6_RTHDR:
2862 		case IPV6_USE_MIN_MTU:
2863 		case IPV6_DONTFRAG:
2864 		case IPV6_OTCLASS:
2865 		case IPV6_TCLASS:
2866 		case IPV6_PREFER_TEMPADDR: /* XXX not an RFC3542 option */
2867 			return (ENOPROTOOPT);
2868 		}
2869 	}
2870 
2871 	switch (optname) {
2872 #ifdef RFC2292
2873 	case IPV6_2292PKTINFO:
2874 #endif
2875 	case IPV6_PKTINFO:
2876 	{
2877 		struct in6_pktinfo *pktinfo;
2878 
2879 		if (len != sizeof(struct in6_pktinfo))
2880 			return (EINVAL);
2881 
2882 		pktinfo = (struct in6_pktinfo *)buf;
2883 
2884 		/*
2885 		 * An application can clear any sticky IPV6_PKTINFO option by
2886 		 * doing a "regular" setsockopt with ipi6_addr being
2887 		 * in6addr_any and ipi6_ifindex being zero.
2888 		 * [RFC 3542, Section 6]
2889 		 */
2890 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2891 		    pktinfo->ipi6_ifindex == 0 &&
2892 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2893 			ip6_clearpktopts(opt, optname);
2894 			break;
2895 		}
2896 
2897 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2898 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2899 			return (EINVAL);
2900 		}
2901 
2902 		/* Validate the interface index if specified. */
2903 		if (pktinfo->ipi6_ifindex) {
2904 			struct ifnet *ifp;
2905 			int s = pserialize_read_enter();
2906 			ifp = if_byindex(pktinfo->ipi6_ifindex);
2907 			if (ifp == NULL) {
2908 				pserialize_read_exit(s);
2909 				return ENXIO;
2910 			}
2911 			pserialize_read_exit(s);
2912 		}
2913 
2914 		/*
2915 		 * We store the address anyway, and let in6_selectsrc()
2916 		 * validate the specified address.  This is because ipi6_addr
2917 		 * may not have enough information about its scope zone, and
2918 		 * we may need additional information (such as outgoing
2919 		 * interface or the scope zone of a destination address) to
2920 		 * disambiguate the scope.
2921 		 * XXX: the delay of the validation may confuse the
2922 		 * application when it is used as a sticky option.
2923 		 */
2924 		if (opt->ip6po_pktinfo == NULL) {
2925 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2926 			    M_IP6OPT, M_NOWAIT);
2927 			if (opt->ip6po_pktinfo == NULL)
2928 				return (ENOBUFS);
2929 		}
2930 		memcpy(opt->ip6po_pktinfo, pktinfo, sizeof(*pktinfo));
2931 		break;
2932 	}
2933 
2934 #ifdef RFC2292
2935 	case IPV6_2292HOPLIMIT:
2936 #endif
2937 	case IPV6_HOPLIMIT:
2938 	{
2939 		int *hlimp;
2940 
2941 		/*
2942 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2943 		 * to simplify the ordering among hoplimit options.
2944 		 */
2945 		if (optname == IPV6_HOPLIMIT && sticky)
2946 			return (ENOPROTOOPT);
2947 
2948 		if (len != sizeof(int))
2949 			return (EINVAL);
2950 		hlimp = (int *)buf;
2951 		if (*hlimp < -1 || *hlimp > 255)
2952 			return (EINVAL);
2953 
2954 		opt->ip6po_hlim = *hlimp;
2955 		break;
2956 	}
2957 
2958 	case IPV6_OTCLASS:
2959 		if (len != sizeof(u_int8_t))
2960 			return (EINVAL);
2961 
2962 		opt->ip6po_tclass = *(u_int8_t *)buf;
2963 		break;
2964 
2965 	case IPV6_TCLASS:
2966 	{
2967 		int tclass;
2968 
2969 		if (len != sizeof(int))
2970 			return (EINVAL);
2971 		tclass = *(int *)buf;
2972 		if (tclass < -1 || tclass > 255)
2973 			return (EINVAL);
2974 
2975 		opt->ip6po_tclass = tclass;
2976 		break;
2977 	}
2978 
2979 #ifdef RFC2292
2980 	case IPV6_2292NEXTHOP:
2981 #endif
2982 	case IPV6_NEXTHOP:
2983 		error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
2984 		    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
2985 		if (error)
2986 			return (error);
2987 
2988 		if (len == 0) {	/* just remove the option */
2989 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
2990 			break;
2991 		}
2992 
2993 		/* check if cmsg_len is large enough for sa_len */
2994 		if (len < sizeof(struct sockaddr) || len < *buf)
2995 			return (EINVAL);
2996 
2997 		switch (((struct sockaddr *)buf)->sa_family) {
2998 		case AF_INET6:
2999 		{
3000 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3001 
3002 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3003 				return (EINVAL);
3004 
3005 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3006 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3007 				return (EINVAL);
3008 			}
3009 			if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3010 			    != 0) {
3011 				return (error);
3012 			}
3013 			break;
3014 		}
3015 		case AF_LINK:	/* eventually be supported? */
3016 		default:
3017 			return (EAFNOSUPPORT);
3018 		}
3019 
3020 		/* turn off the previous option, then set the new option. */
3021 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3022 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3023 		if (opt->ip6po_nexthop == NULL)
3024 			return (ENOBUFS);
3025 		memcpy(opt->ip6po_nexthop, buf, *buf);
3026 		break;
3027 
3028 #ifdef RFC2292
3029 	case IPV6_2292HOPOPTS:
3030 #endif
3031 	case IPV6_HOPOPTS:
3032 	{
3033 		struct ip6_hbh *hbh;
3034 		int hbhlen;
3035 
3036 		/*
3037 		 * XXX: We don't allow a non-privileged user to set ANY HbH
3038 		 * options, since per-option restriction has too much
3039 		 * overhead.
3040 		 */
3041 		error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
3042 		    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
3043 		if (error)
3044 			return (error);
3045 
3046 		if (len == 0) {
3047 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3048 			break;	/* just remove the option */
3049 		}
3050 
3051 		/* message length validation */
3052 		if (len < sizeof(struct ip6_hbh))
3053 			return (EINVAL);
3054 		hbh = (struct ip6_hbh *)buf;
3055 		hbhlen = (hbh->ip6h_len + 1) << 3;
3056 		if (len != hbhlen)
3057 			return (EINVAL);
3058 
3059 		/* turn off the previous option, then set the new option. */
3060 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3061 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3062 		if (opt->ip6po_hbh == NULL)
3063 			return (ENOBUFS);
3064 		memcpy(opt->ip6po_hbh, hbh, hbhlen);
3065 
3066 		break;
3067 	}
3068 
3069 #ifdef RFC2292
3070 	case IPV6_2292DSTOPTS:
3071 #endif
3072 	case IPV6_DSTOPTS:
3073 	case IPV6_RTHDRDSTOPTS:
3074 	{
3075 		struct ip6_dest *dest, **newdest = NULL;
3076 		int destlen;
3077 
3078 		/* XXX: see the comment for IPV6_HOPOPTS */
3079 		error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
3080 		    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
3081 		if (error)
3082 			return (error);
3083 
3084 		if (len == 0) {
3085 			ip6_clearpktopts(opt, optname);
3086 			break;	/* just remove the option */
3087 		}
3088 
3089 		/* message length validation */
3090 		if (len < sizeof(struct ip6_dest))
3091 			return (EINVAL);
3092 		dest = (struct ip6_dest *)buf;
3093 		destlen = (dest->ip6d_len + 1) << 3;
3094 		if (len != destlen)
3095 			return (EINVAL);
3096 		/*
3097 		 * Determine the position that the destination options header
3098 		 * should be inserted; before or after the routing header.
3099 		 */
3100 		switch (optname) {
3101 		case IPV6_2292DSTOPTS:
3102 			/*
3103 			 * The old advanced API is ambiguous on this point.
3104 			 * Our approach is to determine the position based
3105 			 * according to the existence of a routing header.
3106 			 * Note, however, that this depends on the order of the
3107 			 * extension headers in the ancillary data; the 1st
3108 			 * part of the destination options header must appear
3109 			 * before the routing header in the ancillary data,
3110 			 * too.
3111 			 * RFC3542 solved the ambiguity by introducing
3112 			 * separate ancillary data or option types.
3113 			 */
3114 			if (opt->ip6po_rthdr == NULL)
3115 				newdest = &opt->ip6po_dest1;
3116 			else
3117 				newdest = &opt->ip6po_dest2;
3118 			break;
3119 		case IPV6_RTHDRDSTOPTS:
3120 			newdest = &opt->ip6po_dest1;
3121 			break;
3122 		case IPV6_DSTOPTS:
3123 			newdest = &opt->ip6po_dest2;
3124 			break;
3125 		}
3126 
3127 		/* turn off the previous option, then set the new option. */
3128 		ip6_clearpktopts(opt, optname);
3129 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3130 		if (*newdest == NULL)
3131 			return (ENOBUFS);
3132 		memcpy(*newdest, dest, destlen);
3133 
3134 		break;
3135 	}
3136 
3137 #ifdef RFC2292
3138 	case IPV6_2292RTHDR:
3139 #endif
3140 	case IPV6_RTHDR:
3141 	{
3142 		struct ip6_rthdr *rth;
3143 		int rthlen;
3144 
3145 		if (len == 0) {
3146 			ip6_clearpktopts(opt, IPV6_RTHDR);
3147 			break;	/* just remove the option */
3148 		}
3149 
3150 		/* message length validation */
3151 		if (len < sizeof(struct ip6_rthdr))
3152 			return (EINVAL);
3153 		rth = (struct ip6_rthdr *)buf;
3154 		rthlen = (rth->ip6r_len + 1) << 3;
3155 		if (len != rthlen)
3156 			return (EINVAL);
3157 		switch (rth->ip6r_type) {
3158 		case IPV6_RTHDR_TYPE_0:
3159 			/* Dropped, RFC5095. */
3160 		default:
3161 			return (EINVAL);	/* not supported */
3162 		}
3163 		/* turn off the previous option */
3164 		ip6_clearpktopts(opt, IPV6_RTHDR);
3165 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3166 		if (opt->ip6po_rthdr == NULL)
3167 			return (ENOBUFS);
3168 		memcpy(opt->ip6po_rthdr, rth, rthlen);
3169 		break;
3170 	}
3171 
3172 	case IPV6_USE_MIN_MTU:
3173 		if (len != sizeof(int))
3174 			return (EINVAL);
3175 		minmtupolicy = *(int *)buf;
3176 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3177 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3178 		    minmtupolicy != IP6PO_MINMTU_ALL) {
3179 			return (EINVAL);
3180 		}
3181 		opt->ip6po_minmtu = minmtupolicy;
3182 		break;
3183 
3184 	case IPV6_DONTFRAG:
3185 		if (len != sizeof(int))
3186 			return (EINVAL);
3187 
3188 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3189 			/*
3190 			 * we ignore this option for TCP sockets.
3191 			 * (RFC3542 leaves this case unspecified.)
3192 			 */
3193 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3194 		} else
3195 			opt->ip6po_flags |= IP6PO_DONTFRAG;
3196 		break;
3197 
3198 	case IPV6_PREFER_TEMPADDR:
3199 	{
3200 		int preftemp;
3201 
3202 		if (len != sizeof(int))
3203 			return (EINVAL);
3204 		preftemp = *(int *)buf;
3205 		switch (preftemp) {
3206 		case IP6PO_TEMPADDR_SYSTEM:
3207 		case IP6PO_TEMPADDR_NOTPREFER:
3208 		case IP6PO_TEMPADDR_PREFER:
3209 			break;
3210 		default:
3211 			return (EINVAL);
3212 		}
3213 		opt->ip6po_prefer_tempaddr = preftemp;
3214 		break;
3215 	}
3216 
3217 	default:
3218 		return (ENOPROTOOPT);
3219 	} /* end of switch */
3220 
3221 	return (0);
3222 }
3223 
3224 /*
3225  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3226  * packet to the input queue of a specified interface.  Note that this
3227  * calls the output routine of the loopback "driver", but with an interface
3228  * pointer that might NOT be lo0ifp -- easier than replicating that code here.
3229  */
3230 void
3231 ip6_mloopback(struct ifnet *ifp, struct mbuf *m,
3232 	const struct sockaddr_in6 *dst)
3233 {
3234 	struct mbuf *copym;
3235 	struct ip6_hdr *ip6;
3236 
3237 	copym = m_copypacket(m, M_DONTWAIT);
3238 	if (copym == NULL)
3239 		return;
3240 
3241 	/*
3242 	 * Make sure to deep-copy IPv6 header portion in case the data
3243 	 * is in an mbuf cluster, so that we can safely override the IPv6
3244 	 * header portion later.
3245 	 */
3246 	if ((copym->m_flags & M_EXT) != 0 ||
3247 	    copym->m_len < sizeof(struct ip6_hdr)) {
3248 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3249 		if (copym == NULL)
3250 			return;
3251 	}
3252 
3253 #ifdef DIAGNOSTIC
3254 	if (copym->m_len < sizeof(*ip6)) {
3255 		m_freem(copym);
3256 		return;
3257 	}
3258 #endif
3259 
3260 	ip6 = mtod(copym, struct ip6_hdr *);
3261 	/*
3262 	 * clear embedded scope identifiers if necessary.
3263 	 * in6_clearscope will touch the addresses only when necessary.
3264 	 */
3265 	in6_clearscope(&ip6->ip6_src);
3266 	in6_clearscope(&ip6->ip6_dst);
3267 
3268 	(void)looutput(ifp, copym, (const struct sockaddr *)dst, NULL);
3269 }
3270 
3271 /*
3272  * Chop IPv6 header off from the payload.
3273  */
3274 static int
3275 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3276 {
3277 	struct mbuf *mh;
3278 	struct ip6_hdr *ip6;
3279 
3280 	ip6 = mtod(m, struct ip6_hdr *);
3281 	if (m->m_len > sizeof(*ip6)) {
3282 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3283 		if (mh == NULL) {
3284 			m_freem(m);
3285 			return ENOBUFS;
3286 		}
3287 		M_MOVE_PKTHDR(mh, m);
3288 		MH_ALIGN(mh, sizeof(*ip6));
3289 		m->m_len -= sizeof(*ip6);
3290 		m->m_data += sizeof(*ip6);
3291 		mh->m_next = m;
3292 		mh->m_len = sizeof(*ip6);
3293 		memcpy(mtod(mh, void *), (void *)ip6, sizeof(*ip6));
3294 		m = mh;
3295 	}
3296 	exthdrs->ip6e_ip6 = m;
3297 	return 0;
3298 }
3299 
3300 /*
3301  * Compute IPv6 extension header length.
3302  */
3303 int
3304 ip6_optlen(struct in6pcb *in6p)
3305 {
3306 	int len;
3307 
3308 	if (!in6p->in6p_outputopts)
3309 		return 0;
3310 
3311 	len = 0;
3312 #define elen(x) \
3313     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3314 
3315 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3316 	len += elen(in6p->in6p_outputopts->ip6po_dest1);
3317 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3318 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3319 	return len;
3320 #undef elen
3321 }
3322 
3323 /*
3324  * Ensure sending address is valid.
3325  * Returns 0 on success, -1 if an error should be sent back or 1
3326  * if the packet could be dropped without error (protocol dependent).
3327  */
3328 static int
3329 ip6_ifaddrvalid(const struct in6_addr *src, const struct in6_addr *dst)
3330 {
3331 	struct sockaddr_in6 sin6;
3332 	int s, error;
3333 	struct ifaddr *ifa;
3334 	struct in6_ifaddr *ia6;
3335 
3336 	if (IN6_IS_ADDR_UNSPECIFIED(src))
3337 		return 0;
3338 
3339 	memset(&sin6, 0, sizeof(sin6));
3340 	sin6.sin6_family = AF_INET6;
3341 	sin6.sin6_len = sizeof(sin6);
3342 	sin6.sin6_addr = *src;
3343 
3344 	s = pserialize_read_enter();
3345 	ifa = ifa_ifwithaddr(sin6tosa(&sin6));
3346 	if ((ia6 = ifatoia6(ifa)) == NULL ||
3347 	    ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_DUPLICATED))
3348 		error = -1;
3349 	else if (ia6->ia6_flags & IN6_IFF_TENTATIVE)
3350 		error = 1;
3351 	else if (ia6->ia6_flags & IN6_IFF_DETACHED &&
3352 	    (sin6.sin6_addr = *dst, ifa_ifwithaddr(sin6tosa(&sin6)) == NULL))
3353 		/* Allow internal traffic to DETACHED addresses */
3354 		error = 1;
3355 	else
3356 		error = 0;
3357 	pserialize_read_exit(s);
3358 
3359 	return error;
3360 }
3361