xref: /netbsd-src/sys/netinet6/ip6_output.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /*	$NetBSD: ip6_output.c,v 1.208 2018/05/01 07:21:39 maxv Exp $	*/
2 /*	$KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
62  */
63 
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: ip6_output.c,v 1.208 2018/05/01 07:21:39 maxv Exp $");
66 
67 #ifdef _KERNEL_OPT
68 #include "opt_inet.h"
69 #include "opt_inet6.h"
70 #include "opt_ipsec.h"
71 #endif
72 
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/errno.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/syslog.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82 #include <sys/kauth.h>
83 
84 #include <net/if.h>
85 #include <net/route.h>
86 #include <net/pfil.h>
87 
88 #include <netinet/in.h>
89 #include <netinet/in_var.h>
90 #include <netinet/ip6.h>
91 #include <netinet/ip_var.h>
92 #include <netinet/icmp6.h>
93 #include <netinet/in_offload.h>
94 #include <netinet/portalgo.h>
95 #include <netinet6/in6_offload.h>
96 #include <netinet6/ip6_var.h>
97 #include <netinet6/ip6_private.h>
98 #include <netinet6/in6_pcb.h>
99 #include <netinet6/nd6.h>
100 #include <netinet6/ip6protosw.h>
101 #include <netinet6/scope6_var.h>
102 
103 #ifdef IPSEC
104 #include <netipsec/ipsec.h>
105 #include <netipsec/ipsec6.h>
106 #include <netipsec/key.h>
107 #endif
108 
109 extern pfil_head_t *inet6_pfil_hook;	/* XXX */
110 
111 struct ip6_exthdrs {
112 	struct mbuf *ip6e_ip6;
113 	struct mbuf *ip6e_hbh;
114 	struct mbuf *ip6e_dest1;
115 	struct mbuf *ip6e_rthdr;
116 	struct mbuf *ip6e_dest2;
117 };
118 
119 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
120 	kauth_cred_t, int);
121 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
122 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, kauth_cred_t,
123 	int, int, int);
124 static int ip6_setmoptions(const struct sockopt *, struct in6pcb *);
125 static int ip6_getmoptions(struct sockopt *, struct in6pcb *);
126 static int ip6_copyexthdr(struct mbuf **, void *, int);
127 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
128 	struct ip6_frag **);
129 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
130 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
131 static int ip6_getpmtu(struct rtentry *, struct ifnet *, u_long *, int *);
132 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
133 static int ip6_ifaddrvalid(const struct in6_addr *, const struct in6_addr *);
134 static int ip6_handle_rthdr(struct ip6_rthdr *, struct ip6_hdr *);
135 
136 #ifdef RFC2292
137 static int ip6_pcbopts(struct ip6_pktopts **, struct socket *, struct sockopt *);
138 #endif
139 
140 static int
141 ip6_handle_rthdr(struct ip6_rthdr *rh, struct ip6_hdr *ip6)
142 {
143 	int error = 0;
144 
145 	switch (rh->ip6r_type) {
146 	case IPV6_RTHDR_TYPE_0:
147 		/* Dropped, RFC5095. */
148 	default:	/* is it possible? */
149 		error = EINVAL;
150 	}
151 
152 	return error;
153 }
154 
155 /*
156  * Send an IP packet to a host.
157  */
158 int
159 ip6_if_output(struct ifnet * const ifp, struct ifnet * const origifp,
160     struct mbuf * const m, const struct sockaddr_in6 * const dst,
161     const struct rtentry *rt)
162 {
163 	int error = 0;
164 
165 	if (rt != NULL) {
166 		error = rt_check_reject_route(rt, ifp);
167 		if (error != 0) {
168 			m_freem(m);
169 			return error;
170 		}
171 	}
172 
173 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
174 		error = if_output_lock(ifp, origifp, m, sin6tocsa(dst), rt);
175 	else
176 		error = if_output_lock(ifp, ifp, m, sin6tocsa(dst), rt);
177 	return error;
178 }
179 
180 /*
181  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
182  * header (with pri, len, nxt, hlim, src, dst).
183  *
184  * This function may modify ver and hlim only. The mbuf chain containing the
185  * packet will be freed. The mbuf opt, if present, will not be freed.
186  *
187  * Type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
188  * nd_ifinfo.linkmtu is u_int32_t. So we use u_long to hold largest one,
189  * which is rt_rmx.rmx_mtu.
190  */
191 int
192 ip6_output(
193     struct mbuf *m0,
194     struct ip6_pktopts *opt,
195     struct route *ro,
196     int flags,
197     struct ip6_moptions *im6o,
198     struct in6pcb *in6p,
199     struct ifnet **ifpp		/* XXX: just for statistics */
200 )
201 {
202 	struct ip6_hdr *ip6, *mhip6;
203 	struct ifnet *ifp = NULL, *origifp = NULL;
204 	struct mbuf *m = m0;
205 	int tlen, len, off;
206 	bool tso;
207 	struct route ip6route;
208 	struct rtentry *rt = NULL, *rt_pmtu;
209 	const struct sockaddr_in6 *dst;
210 	struct sockaddr_in6 src_sa, dst_sa;
211 	int error = 0;
212 	struct in6_ifaddr *ia = NULL;
213 	u_long mtu;
214 	int alwaysfrag, dontfrag;
215 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
216 	struct ip6_exthdrs exthdrs;
217 	struct in6_addr finaldst, src0, dst0;
218 	u_int32_t zone;
219 	struct route *ro_pmtu = NULL;
220 	int hdrsplit = 0;
221 	int needipsec = 0;
222 #ifdef IPSEC
223 	struct secpolicy *sp = NULL;
224 #endif
225 	struct psref psref, psref_ia;
226 	int bound = curlwp_bind();
227 	bool release_psref_ia = false;
228 
229 #ifdef DIAGNOSTIC
230 	if ((m->m_flags & M_PKTHDR) == 0)
231 		panic("ip6_output: no HDR");
232 	if ((m->m_pkthdr.csum_flags &
233 	    (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TSOv4)) != 0) {
234 		panic("ip6_output: IPv4 checksum offload flags: %d",
235 		    m->m_pkthdr.csum_flags);
236 	}
237 	if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) ==
238 	    (M_CSUM_TCPv6|M_CSUM_UDPv6)) {
239 		panic("ip6_output: conflicting checksum offload flags: %d",
240 		    m->m_pkthdr.csum_flags);
241 	}
242 #endif
243 
244 	M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));
245 
246 #define MAKE_EXTHDR(hp, mp)						\
247     do {								\
248 	if (hp) {							\
249 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
250 		error = ip6_copyexthdr((mp), (void *)(hp), 		\
251 		    ((eh)->ip6e_len + 1) << 3);				\
252 		if (error)						\
253 			goto freehdrs;					\
254 	}								\
255     } while (/*CONSTCOND*/ 0)
256 
257 	memset(&exthdrs, 0, sizeof(exthdrs));
258 	if (opt) {
259 		/* Hop-by-Hop options header */
260 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
261 		/* Destination options header (1st part) */
262 		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
263 		/* Routing header */
264 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
265 		/* Destination options header (2nd part) */
266 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
267 	}
268 
269 	/*
270 	 * Calculate the total length of the extension header chain.
271 	 * Keep the length of the unfragmentable part for fragmentation.
272 	 */
273 	optlen = 0;
274 	if (exthdrs.ip6e_hbh)
275 		optlen += exthdrs.ip6e_hbh->m_len;
276 	if (exthdrs.ip6e_dest1)
277 		optlen += exthdrs.ip6e_dest1->m_len;
278 	if (exthdrs.ip6e_rthdr)
279 		optlen += exthdrs.ip6e_rthdr->m_len;
280 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
281 	/* NOTE: we don't add AH/ESP length here. do that later. */
282 	if (exthdrs.ip6e_dest2)
283 		optlen += exthdrs.ip6e_dest2->m_len;
284 
285 #ifdef IPSEC
286 	if (ipsec_used) {
287 		/* Check the security policy (SP) for the packet */
288 		sp = ipsec6_check_policy(m, in6p, flags, &needipsec, &error);
289 		if (error != 0) {
290 			/*
291 			 * Hack: -EINVAL is used to signal that a packet
292 			 * should be silently discarded.  This is typically
293 			 * because we asked key management for an SA and
294 			 * it was delayed (e.g. kicked up to IKE).
295 			 */
296 			if (error == -EINVAL)
297 				error = 0;
298 			goto freehdrs;
299 		}
300 	}
301 #endif
302 
303 	if (needipsec &&
304 	    (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
305 		in6_delayed_cksum(m);
306 		m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
307 	}
308 
309 	/*
310 	 * If we need IPsec, or there is at least one extension header,
311 	 * separate IP6 header from the payload.
312 	 */
313 	if ((needipsec || optlen) && !hdrsplit) {
314 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
315 			m = NULL;
316 			goto freehdrs;
317 		}
318 		m = exthdrs.ip6e_ip6;
319 		hdrsplit++;
320 	}
321 
322 	/* adjust pointer */
323 	ip6 = mtod(m, struct ip6_hdr *);
324 
325 	/* adjust mbuf packet header length */
326 	m->m_pkthdr.len += optlen;
327 	plen = m->m_pkthdr.len - sizeof(*ip6);
328 
329 	/* If this is a jumbo payload, insert a jumbo payload option. */
330 	if (plen > IPV6_MAXPACKET) {
331 		if (!hdrsplit) {
332 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
333 				m = NULL;
334 				goto freehdrs;
335 			}
336 			m = exthdrs.ip6e_ip6;
337 			hdrsplit++;
338 		}
339 		/* adjust pointer */
340 		ip6 = mtod(m, struct ip6_hdr *);
341 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
342 			goto freehdrs;
343 		optlen += 8; /* XXX JUMBOOPTLEN */
344 		ip6->ip6_plen = 0;
345 	} else
346 		ip6->ip6_plen = htons(plen);
347 
348 	/*
349 	 * Concatenate headers and fill in next header fields.
350 	 * Here we have, on "m"
351 	 *	IPv6 payload
352 	 * and we insert headers accordingly.  Finally, we should be getting:
353 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
354 	 *
355 	 * during the header composing process, "m" points to IPv6 header.
356 	 * "mprev" points to an extension header prior to esp.
357 	 */
358 	{
359 		u_char *nexthdrp = &ip6->ip6_nxt;
360 		struct mbuf *mprev = m;
361 
362 		/*
363 		 * we treat dest2 specially.  this makes IPsec processing
364 		 * much easier.  the goal here is to make mprev point the
365 		 * mbuf prior to dest2.
366 		 *
367 		 * result: IPv6 dest2 payload
368 		 * m and mprev will point to IPv6 header.
369 		 */
370 		if (exthdrs.ip6e_dest2) {
371 			if (!hdrsplit)
372 				panic("assumption failed: hdr not split");
373 			exthdrs.ip6e_dest2->m_next = m->m_next;
374 			m->m_next = exthdrs.ip6e_dest2;
375 			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
376 			ip6->ip6_nxt = IPPROTO_DSTOPTS;
377 		}
378 
379 #define MAKE_CHAIN(m, mp, p, i)\
380     do {\
381 	if (m) {\
382 		if (!hdrsplit) \
383 			panic("assumption failed: hdr not split"); \
384 		*mtod((m), u_char *) = *(p);\
385 		*(p) = (i);\
386 		p = mtod((m), u_char *);\
387 		(m)->m_next = (mp)->m_next;\
388 		(mp)->m_next = (m);\
389 		(mp) = (m);\
390 	}\
391     } while (/*CONSTCOND*/ 0)
392 		/*
393 		 * result: IPv6 hbh dest1 rthdr dest2 payload
394 		 * m will point to IPv6 header.  mprev will point to the
395 		 * extension header prior to dest2 (rthdr in the above case).
396 		 */
397 		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
398 		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
399 		    IPPROTO_DSTOPTS);
400 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
401 		    IPPROTO_ROUTING);
402 
403 		M_CSUM_DATA_IPv6_HL_SET(m->m_pkthdr.csum_data,
404 		    sizeof(struct ip6_hdr) + optlen);
405 	}
406 
407 	/* Need to save for pmtu */
408 	finaldst = ip6->ip6_dst;
409 
410 	/*
411 	 * If there is a routing header, replace destination address field
412 	 * with the first hop of the routing header.
413 	 */
414 	if (exthdrs.ip6e_rthdr) {
415 		struct ip6_rthdr *rh;
416 
417 		rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
418 
419 		error = ip6_handle_rthdr(rh, ip6);
420 		if (error != 0)
421 			goto bad;
422 	}
423 
424 	/* Source address validation */
425 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
426 	    (flags & IPV6_UNSPECSRC) == 0) {
427 		error = EOPNOTSUPP;
428 		IP6_STATINC(IP6_STAT_BADSCOPE);
429 		goto bad;
430 	}
431 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
432 		error = EOPNOTSUPP;
433 		IP6_STATINC(IP6_STAT_BADSCOPE);
434 		goto bad;
435 	}
436 
437 	IP6_STATINC(IP6_STAT_LOCALOUT);
438 
439 	/*
440 	 * Route packet.
441 	 */
442 	/* initialize cached route */
443 	if (ro == NULL) {
444 		memset(&ip6route, 0, sizeof(ip6route));
445 		ro = &ip6route;
446 	}
447 	ro_pmtu = ro;
448 	if (opt && opt->ip6po_rthdr)
449 		ro = &opt->ip6po_route;
450 
451 	/*
452 	 * if specified, try to fill in the traffic class field.
453 	 * do not override if a non-zero value is already set.
454 	 * we check the diffserv field and the ecn field separately.
455 	 */
456 	if (opt && opt->ip6po_tclass >= 0) {
457 		int mask = 0;
458 
459 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
460 			mask |= 0xfc;
461 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
462 			mask |= 0x03;
463 		if (mask != 0)
464 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
465 	}
466 
467 	/* fill in or override the hop limit field, if necessary. */
468 	if (opt && opt->ip6po_hlim != -1)
469 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
470 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
471 		if (im6o != NULL)
472 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
473 		else
474 			ip6->ip6_hlim = ip6_defmcasthlim;
475 	}
476 
477 #ifdef IPSEC
478 	if (needipsec) {
479 		int s = splsoftnet();
480 		error = ipsec6_process_packet(m, sp->req);
481 		splx(s);
482 
483 		/*
484 		 * Preserve KAME behaviour: ENOENT can be returned
485 		 * when an SA acquire is in progress.  Don't propagate
486 		 * this to user-level; it confuses applications.
487 		 * XXX this will go away when the SADB is redone.
488 		 */
489 		if (error == ENOENT)
490 			error = 0;
491 
492 		goto done;
493 	}
494 #endif
495 
496 	/* adjust pointer */
497 	ip6 = mtod(m, struct ip6_hdr *);
498 
499 	sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
500 
501 	/* We do not need a route for multicast */
502 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
503 		struct in6_pktinfo *pi = NULL;
504 
505 		/*
506 		 * If the outgoing interface for the address is specified by
507 		 * the caller, use it.
508 		 */
509 		if (opt && (pi = opt->ip6po_pktinfo) != NULL) {
510 			/* XXX boundary check is assumed to be already done. */
511 			ifp = if_get_byindex(pi->ipi6_ifindex, &psref);
512 		} else if (im6o != NULL) {
513 			ifp = if_get_byindex(im6o->im6o_multicast_if_index,
514 			    &psref);
515 		}
516 	}
517 
518 	if (ifp == NULL) {
519 		error = in6_selectroute(&dst_sa, opt, &ro, &rt, true);
520 		if (error != 0)
521 			goto bad;
522 		ifp = if_get_byindex(rt->rt_ifp->if_index, &psref);
523 	}
524 
525 	if (rt == NULL) {
526 		/*
527 		 * If in6_selectroute() does not return a route entry,
528 		 * dst may not have been updated.
529 		 */
530 		error = rtcache_setdst(ro, sin6tosa(&dst_sa));
531 		if (error) {
532 			goto bad;
533 		}
534 	}
535 
536 	/*
537 	 * then rt (for unicast) and ifp must be non-NULL valid values.
538 	 */
539 	if ((flags & IPV6_FORWARDING) == 0) {
540 		/* XXX: the FORWARDING flag can be set for mrouting. */
541 		in6_ifstat_inc(ifp, ifs6_out_request);
542 	}
543 	if (rt != NULL) {
544 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
545 		rt->rt_use++;
546 	}
547 
548 	/*
549 	 * The outgoing interface must be in the zone of source and
550 	 * destination addresses.  We should use ia_ifp to support the
551 	 * case of sending packets to an address of our own.
552 	 */
553 	if (ia != NULL && ia->ia_ifp) {
554 		origifp = ia->ia_ifp;
555 		if (if_is_deactivated(origifp))
556 			goto bad;
557 		if_acquire(origifp, &psref_ia);
558 		release_psref_ia = true;
559 	} else
560 		origifp = ifp;
561 
562 	src0 = ip6->ip6_src;
563 	if (in6_setscope(&src0, origifp, &zone))
564 		goto badscope;
565 	sockaddr_in6_init(&src_sa, &ip6->ip6_src, 0, 0, 0);
566 	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
567 		goto badscope;
568 
569 	dst0 = ip6->ip6_dst;
570 	if (in6_setscope(&dst0, origifp, &zone))
571 		goto badscope;
572 	/* re-initialize to be sure */
573 	sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
574 	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id)
575 		goto badscope;
576 
577 	/* scope check is done. */
578 
579 	/* Ensure we only send from a valid address. */
580 	if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
581 	    (error = ip6_ifaddrvalid(&src0, &dst0)) != 0)
582 	{
583 		char ip6buf[INET6_ADDRSTRLEN];
584 		nd6log(LOG_ERR,
585 		    "refusing to send from invalid address %s (pid %d)\n",
586 		    IN6_PRINT(ip6buf, &src0), curproc->p_pid);
587 		IP6_STATINC(IP6_STAT_ODROPPED);
588 		in6_ifstat_inc(origifp, ifs6_out_discard);
589 		if (error == 1)
590 			/*
591 			 * Address exists, but is tentative or detached.
592 			 * We can't send from it because it's invalid,
593 			 * so we drop the packet.
594 			 */
595 			error = 0;
596 		else
597 			error = EADDRNOTAVAIL;
598 		goto bad;
599 	}
600 
601 	if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) &&
602 	    !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
603 		dst = satocsin6(rt->rt_gateway);
604 	else
605 		dst = satocsin6(rtcache_getdst(ro));
606 
607 	/*
608 	 * XXXXXX: original code follows:
609 	 */
610 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
611 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
612 	else {
613 		bool ingroup;
614 
615 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
616 
617 		in6_ifstat_inc(ifp, ifs6_out_mcast);
618 
619 		/*
620 		 * Confirm that the outgoing interface supports multicast.
621 		 */
622 		if (!(ifp->if_flags & IFF_MULTICAST)) {
623 			IP6_STATINC(IP6_STAT_NOROUTE);
624 			in6_ifstat_inc(ifp, ifs6_out_discard);
625 			error = ENETUNREACH;
626 			goto bad;
627 		}
628 
629 		ingroup = in6_multi_group(&ip6->ip6_dst, ifp);
630 		if (ingroup && (im6o == NULL || im6o->im6o_multicast_loop)) {
631 			/*
632 			 * If we belong to the destination multicast group
633 			 * on the outgoing interface, and the caller did not
634 			 * forbid loopback, loop back a copy.
635 			 */
636 			KASSERT(dst != NULL);
637 			ip6_mloopback(ifp, m, dst);
638 		} else {
639 			/*
640 			 * If we are acting as a multicast router, perform
641 			 * multicast forwarding as if the packet had just
642 			 * arrived on the interface to which we are about
643 			 * to send.  The multicast forwarding function
644 			 * recursively calls this function, using the
645 			 * IPV6_FORWARDING flag to prevent infinite recursion.
646 			 *
647 			 * Multicasts that are looped back by ip6_mloopback(),
648 			 * above, will be forwarded by the ip6_input() routine,
649 			 * if necessary.
650 			 */
651 			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
652 				if (ip6_mforward(ip6, ifp, m) != 0) {
653 					m_freem(m);
654 					goto done;
655 				}
656 			}
657 		}
658 		/*
659 		 * Multicasts with a hoplimit of zero may be looped back,
660 		 * above, but must not be transmitted on a network.
661 		 * Also, multicasts addressed to the loopback interface
662 		 * are not sent -- the above call to ip6_mloopback() will
663 		 * loop back a copy if this host actually belongs to the
664 		 * destination group on the loopback interface.
665 		 */
666 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
667 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
668 			m_freem(m);
669 			goto done;
670 		}
671 	}
672 
673 	/*
674 	 * Fill the outgoing inteface to tell the upper layer
675 	 * to increment per-interface statistics.
676 	 */
677 	if (ifpp)
678 		*ifpp = ifp;
679 
680 	/* Determine path MTU. */
681 	/*
682 	 * ro_pmtu represent final destination while
683 	 * ro might represent immediate destination.
684 	 * Use ro_pmtu destination since MTU might differ.
685 	 */
686 	if (ro_pmtu != ro) {
687 		union {
688 			struct sockaddr		dst;
689 			struct sockaddr_in6	dst6;
690 		} u;
691 
692 		/* ro_pmtu may not have a cache */
693 		sockaddr_in6_init(&u.dst6, &finaldst, 0, 0, 0);
694 		rt_pmtu = rtcache_lookup(ro_pmtu, &u.dst);
695 	} else
696 		rt_pmtu = rt;
697 	error = ip6_getpmtu(rt_pmtu, ifp, &mtu, &alwaysfrag);
698 	if (rt_pmtu != NULL && rt_pmtu != rt)
699 		rtcache_unref(rt_pmtu, ro_pmtu);
700 	if (error != 0)
701 		goto bad;
702 
703 	/*
704 	 * The caller of this function may specify to use the minimum MTU
705 	 * in some cases.
706 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
707 	 * setting.  The logic is a bit complicated; by default, unicast
708 	 * packets will follow path MTU while multicast packets will be sent at
709 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
710 	 * including unicast ones will be sent at the minimum MTU.  Multicast
711 	 * packets will always be sent at the minimum MTU unless
712 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
713 	 * See RFC 3542 for more details.
714 	 */
715 	if (mtu > IPV6_MMTU) {
716 		if ((flags & IPV6_MINMTU))
717 			mtu = IPV6_MMTU;
718 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
719 			mtu = IPV6_MMTU;
720 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
721 			 (opt == NULL ||
722 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
723 			mtu = IPV6_MMTU;
724 		}
725 	}
726 
727 	/*
728 	 * clear embedded scope identifiers if necessary.
729 	 * in6_clearscope will touch the addresses only when necessary.
730 	 */
731 	in6_clearscope(&ip6->ip6_src);
732 	in6_clearscope(&ip6->ip6_dst);
733 
734 	/*
735 	 * If the outgoing packet contains a hop-by-hop options header,
736 	 * it must be examined and processed even by the source node.
737 	 * (RFC 2460, section 4.)
738 	 *
739 	 * XXX Is this really necessary?
740 	 */
741 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
742 		u_int32_t dummy1; /* XXX unused */
743 		u_int32_t dummy2; /* XXX unused */
744 		int hoff = sizeof(struct ip6_hdr);
745 
746 		if (ip6_hopopts_input(&dummy1, &dummy2, &m, &hoff)) {
747 			/* m was already freed at this point */
748 			error = EINVAL;
749 			goto done;
750 		}
751 
752 		ip6 = mtod(m, struct ip6_hdr *);
753 	}
754 
755 	/*
756 	 * Run through list of hooks for output packets.
757 	 */
758 	if ((error = pfil_run_hooks(inet6_pfil_hook, &m, ifp, PFIL_OUT)) != 0)
759 		goto done;
760 	if (m == NULL)
761 		goto done;
762 	ip6 = mtod(m, struct ip6_hdr *);
763 
764 	/*
765 	 * Send the packet to the outgoing interface.
766 	 * If necessary, do IPv6 fragmentation before sending.
767 	 *
768 	 * the logic here is rather complex:
769 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
770 	 * 1-a:	send as is if tlen <= path mtu
771 	 * 1-b:	fragment if tlen > path mtu
772 	 *
773 	 * 2: if user asks us not to fragment (dontfrag == 1)
774 	 * 2-a:	send as is if tlen <= interface mtu
775 	 * 2-b:	error if tlen > interface mtu
776 	 *
777 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
778 	 *	always fragment
779 	 *
780 	 * 4: if dontfrag == 1 && alwaysfrag == 1
781 	 *	error, as we cannot handle this conflicting request
782 	 */
783 	tlen = m->m_pkthdr.len;
784 	tso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0;
785 	if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
786 		dontfrag = 1;
787 	else
788 		dontfrag = 0;
789 
790 	if (dontfrag && alwaysfrag) {	/* case 4 */
791 		/* conflicting request - can't transmit */
792 		error = EMSGSIZE;
793 		goto bad;
794 	}
795 	if (dontfrag && (!tso && tlen > IN6_LINKMTU(ifp))) {	/* case 2-b */
796 		/*
797 		 * Even if the DONTFRAG option is specified, we cannot send the
798 		 * packet when the data length is larger than the MTU of the
799 		 * outgoing interface.
800 		 * Notify the error by sending IPV6_PATHMTU ancillary data as
801 		 * well as returning an error code (the latter is not described
802 		 * in the API spec.)
803 		 */
804 		u_int32_t mtu32;
805 		struct ip6ctlparam ip6cp;
806 
807 		mtu32 = (u_int32_t)mtu;
808 		memset(&ip6cp, 0, sizeof(ip6cp));
809 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
810 		pfctlinput2(PRC_MSGSIZE,
811 		    rtcache_getdst(ro_pmtu), &ip6cp);
812 
813 		error = EMSGSIZE;
814 		goto bad;
815 	}
816 
817 	/*
818 	 * transmit packet without fragmentation
819 	 */
820 	if (dontfrag || (!alwaysfrag && (tlen <= mtu || tso))) {
821 		/* case 1-a and 2-a */
822 		struct in6_ifaddr *ia6;
823 		int sw_csum;
824 		int s;
825 
826 		ip6 = mtod(m, struct ip6_hdr *);
827 		s = pserialize_read_enter();
828 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
829 		if (ia6) {
830 			/* Record statistics for this interface address. */
831 			ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len;
832 		}
833 		pserialize_read_exit(s);
834 
835 		sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
836 		if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
837 			if (IN6_NEED_CHECKSUM(ifp,
838 			    sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
839 				in6_delayed_cksum(m);
840 			}
841 			m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
842 		}
843 
844 		KASSERT(dst != NULL);
845 		if (__predict_true(!tso ||
846 		    (ifp->if_capenable & IFCAP_TSOv6) != 0)) {
847 			error = ip6_if_output(ifp, origifp, m, dst, rt);
848 		} else {
849 			error = ip6_tso_output(ifp, origifp, m, dst, rt);
850 		}
851 		goto done;
852 	}
853 
854 	if (tso) {
855 		error = EINVAL; /* XXX */
856 		goto bad;
857 	}
858 
859 	/*
860 	 * try to fragment the packet.  case 1-b and 3
861 	 */
862 	if (mtu < IPV6_MMTU) {
863 		/* path MTU cannot be less than IPV6_MMTU */
864 		error = EMSGSIZE;
865 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
866 		goto bad;
867 	} else if (ip6->ip6_plen == 0) {
868 		/* jumbo payload cannot be fragmented */
869 		error = EMSGSIZE;
870 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
871 		goto bad;
872 	} else {
873 		const u_int32_t id = htonl(ip6_randomid());
874 		struct mbuf **mnext, *m_frgpart;
875 		const int hlen = unfragpartlen;
876 		struct ip6_frag *ip6f;
877 		u_char nextproto;
878 #if 0		/* see below */
879 		struct ip6ctlparam ip6cp;
880 		u_int32_t mtu32;
881 #endif
882 
883 		if (mtu > IPV6_MAXPACKET)
884 			mtu = IPV6_MAXPACKET;
885 
886 #if 0
887 		/*
888 		 * It is believed this code is a leftover from the
889 		 * development of the IPV6_RECVPATHMTU sockopt and
890 		 * associated work to implement RFC3542.
891 		 * It's not entirely clear what the intent of the API
892 		 * is at this point, so disable this code for now.
893 		 * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
894 		 * will send notifications if the application requests.
895 		 */
896 
897 		/* Notify a proper path MTU to applications. */
898 		mtu32 = (u_int32_t)mtu;
899 		memset(&ip6cp, 0, sizeof(ip6cp));
900 		ip6cp.ip6c_cmdarg = (void *)&mtu32;
901 		pfctlinput2(PRC_MSGSIZE,
902 		    rtcache_getdst(ro_pmtu), &ip6cp);
903 #endif
904 
905 		/*
906 		 * Must be able to put at least 8 bytes per fragment.
907 		 */
908 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
909 		if (len < 8) {
910 			error = EMSGSIZE;
911 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
912 			goto bad;
913 		}
914 
915 		mnext = &m->m_nextpkt;
916 
917 		/*
918 		 * Change the next header field of the last header in the
919 		 * unfragmentable part.
920 		 */
921 		if (exthdrs.ip6e_rthdr) {
922 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
923 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
924 		} else if (exthdrs.ip6e_dest1) {
925 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
926 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
927 		} else if (exthdrs.ip6e_hbh) {
928 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
929 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
930 		} else {
931 			nextproto = ip6->ip6_nxt;
932 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
933 		}
934 
935 		if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
936 		    != 0) {
937 			if (IN6_NEED_CHECKSUM(ifp,
938 			    m->m_pkthdr.csum_flags &
939 			    (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
940 				in6_delayed_cksum(m);
941 			}
942 			m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
943 		}
944 
945 		/*
946 		 * Loop through length of segment after first fragment,
947 		 * make new header and copy data of each part and link onto
948 		 * chain.
949 		 */
950 		m0 = m;
951 		for (off = hlen; off < tlen; off += len) {
952 			struct mbuf *mlast;
953 
954 			MGETHDR(m, M_DONTWAIT, MT_HEADER);
955 			if (!m) {
956 				error = ENOBUFS;
957 				IP6_STATINC(IP6_STAT_ODROPPED);
958 				goto sendorfree;
959 			}
960 			m_reset_rcvif(m);
961 			m->m_flags = m0->m_flags & M_COPYFLAGS;
962 			*mnext = m;
963 			mnext = &m->m_nextpkt;
964 			m->m_data += max_linkhdr;
965 			mhip6 = mtod(m, struct ip6_hdr *);
966 			*mhip6 = *ip6;
967 			m->m_len = sizeof(*mhip6);
968 
969 			ip6f = NULL;
970 			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
971 			if (error) {
972 				IP6_STATINC(IP6_STAT_ODROPPED);
973 				goto sendorfree;
974 			}
975 
976 			/* Fill in the Frag6 Header */
977 			ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
978 			if (off + len >= tlen)
979 				len = tlen - off;
980 			else
981 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
982 			ip6f->ip6f_reserved = 0;
983 			ip6f->ip6f_ident = id;
984 			ip6f->ip6f_nxt = nextproto;
985 
986 			mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
987 			    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
988 			if ((m_frgpart = m_copym(m0, off, len, M_DONTWAIT)) == NULL) {
989 				error = ENOBUFS;
990 				IP6_STATINC(IP6_STAT_ODROPPED);
991 				goto sendorfree;
992 			}
993 			for (mlast = m; mlast->m_next; mlast = mlast->m_next)
994 				;
995 			mlast->m_next = m_frgpart;
996 
997 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
998 			m_reset_rcvif(m);
999 			IP6_STATINC(IP6_STAT_OFRAGMENTS);
1000 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1001 		}
1002 
1003 		in6_ifstat_inc(ifp, ifs6_out_fragok);
1004 	}
1005 
1006 sendorfree:
1007 	m = m0->m_nextpkt;
1008 	m0->m_nextpkt = 0;
1009 	m_freem(m0);
1010 	for (m0 = m; m; m = m0) {
1011 		m0 = m->m_nextpkt;
1012 		m->m_nextpkt = 0;
1013 		if (error == 0) {
1014 			struct in6_ifaddr *ia6;
1015 			int s;
1016 			ip6 = mtod(m, struct ip6_hdr *);
1017 			s = pserialize_read_enter();
1018 			ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
1019 			if (ia6) {
1020 				/*
1021 				 * Record statistics for this interface
1022 				 * address.
1023 				 */
1024 				ia6->ia_ifa.ifa_data.ifad_outbytes +=
1025 				    m->m_pkthdr.len;
1026 			}
1027 			pserialize_read_exit(s);
1028 			KASSERT(dst != NULL);
1029 			error = ip6_if_output(ifp, origifp, m, dst, rt);
1030 		} else
1031 			m_freem(m);
1032 	}
1033 
1034 	if (error == 0)
1035 		IP6_STATINC(IP6_STAT_FRAGMENTED);
1036 
1037 done:
1038 	rtcache_unref(rt, ro);
1039 	if (ro == &ip6route)
1040 		rtcache_free(&ip6route);
1041 #ifdef IPSEC
1042 	if (sp != NULL)
1043 		KEY_SP_UNREF(&sp);
1044 #endif
1045 	if_put(ifp, &psref);
1046 	if (release_psref_ia)
1047 		if_put(origifp, &psref_ia);
1048 	curlwp_bindx(bound);
1049 
1050 	return error;
1051 
1052 freehdrs:
1053 	m_freem(exthdrs.ip6e_hbh);
1054 	m_freem(exthdrs.ip6e_dest1);
1055 	m_freem(exthdrs.ip6e_rthdr);
1056 	m_freem(exthdrs.ip6e_dest2);
1057 	/* FALLTHROUGH */
1058 bad:
1059 	m_freem(m);
1060 	goto done;
1061 
1062 badscope:
1063 	IP6_STATINC(IP6_STAT_BADSCOPE);
1064 	in6_ifstat_inc(origifp, ifs6_out_discard);
1065 	if (error == 0)
1066 		error = EHOSTUNREACH; /* XXX */
1067 	goto bad;
1068 }
1069 
1070 static int
1071 ip6_copyexthdr(struct mbuf **mp, void *hdr, int hlen)
1072 {
1073 	struct mbuf *m;
1074 
1075 	if (hlen > MCLBYTES)
1076 		return ENOBUFS; /* XXX */
1077 
1078 	MGET(m, M_DONTWAIT, MT_DATA);
1079 	if (!m)
1080 		return ENOBUFS;
1081 
1082 	if (hlen > MLEN) {
1083 		MCLGET(m, M_DONTWAIT);
1084 		if ((m->m_flags & M_EXT) == 0) {
1085 			m_free(m);
1086 			return ENOBUFS;
1087 		}
1088 	}
1089 	m->m_len = hlen;
1090 	if (hdr)
1091 		memcpy(mtod(m, void *), hdr, hlen);
1092 
1093 	*mp = m;
1094 	return 0;
1095 }
1096 
1097 /*
1098  * Process a delayed payload checksum calculation.
1099  */
1100 void
1101 in6_delayed_cksum(struct mbuf *m)
1102 {
1103 	uint16_t csum, offset;
1104 
1105 	KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1106 	KASSERT((~m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0);
1107 	KASSERT((m->m_pkthdr.csum_flags
1108 	    & (M_CSUM_UDPv4|M_CSUM_TCPv4|M_CSUM_TSOv4)) == 0);
1109 
1110 	offset = M_CSUM_DATA_IPv6_HL(m->m_pkthdr.csum_data);
1111 	csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1112 	if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv6) != 0) {
1113 		csum = 0xffff;
1114 	}
1115 
1116 	offset += M_CSUM_DATA_IPv6_OFFSET(m->m_pkthdr.csum_data);
1117 	if ((offset + sizeof(csum)) > m->m_len) {
1118 		m_copyback(m, offset, sizeof(csum), &csum);
1119 	} else {
1120 		*(uint16_t *)(mtod(m, char *) + offset) = csum;
1121 	}
1122 }
1123 
1124 /*
1125  * Insert jumbo payload option.
1126  */
1127 static int
1128 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1129 {
1130 	struct mbuf *mopt;
1131 	u_int8_t *optbuf;
1132 	u_int32_t v;
1133 
1134 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1135 
1136 	/*
1137 	 * If there is no hop-by-hop options header, allocate new one.
1138 	 * If there is one but it doesn't have enough space to store the
1139 	 * jumbo payload option, allocate a cluster to store the whole options.
1140 	 * Otherwise, use it to store the options.
1141 	 */
1142 	if (exthdrs->ip6e_hbh == NULL) {
1143 		MGET(mopt, M_DONTWAIT, MT_DATA);
1144 		if (mopt == 0)
1145 			return (ENOBUFS);
1146 		mopt->m_len = JUMBOOPTLEN;
1147 		optbuf = mtod(mopt, u_int8_t *);
1148 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1149 		exthdrs->ip6e_hbh = mopt;
1150 	} else {
1151 		struct ip6_hbh *hbh;
1152 
1153 		mopt = exthdrs->ip6e_hbh;
1154 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1155 			const int oldoptlen = mopt->m_len;
1156 			struct mbuf *n;
1157 
1158 			/*
1159 			 * Assumptions:
1160 			 * - exthdrs->ip6e_hbh is not referenced from places
1161 			 *   other than exthdrs.
1162 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1163 			 */
1164 			KASSERT(mopt->m_next == NULL);
1165 
1166 			/*
1167 			 * Give up if the whole (new) hbh header does not fit
1168 			 * even in an mbuf cluster.
1169 			 */
1170 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1171 				return ENOBUFS;
1172 
1173 			/*
1174 			 * At this point, we must always prepare a cluster.
1175 			 */
1176 			MGET(n, M_DONTWAIT, MT_DATA);
1177 			if (n) {
1178 				MCLGET(n, M_DONTWAIT);
1179 				if ((n->m_flags & M_EXT) == 0) {
1180 					m_freem(n);
1181 					n = NULL;
1182 				}
1183 			}
1184 			if (!n)
1185 				return ENOBUFS;
1186 
1187 			n->m_len = oldoptlen + JUMBOOPTLEN;
1188 			bcopy(mtod(mopt, void *), mtod(n, void *),
1189 			    oldoptlen);
1190 			optbuf = mtod(n, u_int8_t *) + oldoptlen;
1191 			m_freem(mopt);
1192 			mopt = exthdrs->ip6e_hbh = n;
1193 		} else {
1194 			optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
1195 			mopt->m_len += JUMBOOPTLEN;
1196 		}
1197 		optbuf[0] = IP6OPT_PADN;
1198 		optbuf[1] = 0;
1199 
1200 		/*
1201 		 * Adjust the header length according to the pad and
1202 		 * the jumbo payload option.
1203 		 */
1204 		hbh = mtod(mopt, struct ip6_hbh *);
1205 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1206 	}
1207 
1208 	/* fill in the option. */
1209 	optbuf[2] = IP6OPT_JUMBO;
1210 	optbuf[3] = 4;
1211 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1212 	memcpy(&optbuf[4], &v, sizeof(u_int32_t));
1213 
1214 	/* finally, adjust the packet header length */
1215 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1216 
1217 	return 0;
1218 #undef JUMBOOPTLEN
1219 }
1220 
1221 /*
1222  * Insert fragment header and copy unfragmentable header portions.
1223  *
1224  * *frghdrp will not be read, and it is guaranteed that either an
1225  * error is returned or that *frghdrp will point to space allocated
1226  * for the fragment header.
1227  *
1228  * On entry, m contains:
1229  *     IPv6 Header
1230  * On exit, it contains:
1231  *     IPv6 Header -> Unfragmentable Part -> Frag6 Header
1232  */
1233 static int
1234 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1235 	struct ip6_frag **frghdrp)
1236 {
1237 	struct mbuf *n, *mlast;
1238 
1239 	if (hlen > sizeof(struct ip6_hdr)) {
1240 		n = m_copym(m0, sizeof(struct ip6_hdr),
1241 		    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1242 		if (n == NULL)
1243 			return ENOBUFS;
1244 		m->m_next = n;
1245 	} else
1246 		n = m;
1247 
1248 	/* Search for the last mbuf of unfragmentable part. */
1249 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1250 		;
1251 
1252 	if ((mlast->m_flags & M_EXT) == 0 &&
1253 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1254 		/* use the trailing space of the last mbuf for the fragment hdr */
1255 		*frghdrp = (struct ip6_frag *)(mtod(mlast, char *) +
1256 		    mlast->m_len);
1257 		mlast->m_len += sizeof(struct ip6_frag);
1258 	} else {
1259 		/* allocate a new mbuf for the fragment header */
1260 		struct mbuf *mfrg;
1261 
1262 		MGET(mfrg, M_DONTWAIT, MT_DATA);
1263 		if (mfrg == NULL)
1264 			return ENOBUFS;
1265 		mfrg->m_len = sizeof(struct ip6_frag);
1266 		*frghdrp = mtod(mfrg, struct ip6_frag *);
1267 		mlast->m_next = mfrg;
1268 	}
1269 
1270 	return 0;
1271 }
1272 
1273 static int
1274 ip6_getpmtu(struct rtentry *rt, struct ifnet *ifp, u_long *mtup,
1275     int *alwaysfragp)
1276 {
1277 	u_int32_t mtu = 0;
1278 	int alwaysfrag = 0;
1279 	int error = 0;
1280 
1281 	if (rt != NULL) {
1282 		u_int32_t ifmtu;
1283 
1284 		if (ifp == NULL)
1285 			ifp = rt->rt_ifp;
1286 		ifmtu = IN6_LINKMTU(ifp);
1287 		mtu = rt->rt_rmx.rmx_mtu;
1288 		if (mtu == 0)
1289 			mtu = ifmtu;
1290 		else if (mtu < IPV6_MMTU) {
1291 			/*
1292 			 * RFC2460 section 5, last paragraph:
1293 			 * if we record ICMPv6 too big message with
1294 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1295 			 * or smaller, with fragment header attached.
1296 			 * (fragment header is needed regardless from the
1297 			 * packet size, for translators to identify packets)
1298 			 */
1299 			alwaysfrag = 1;
1300 			mtu = IPV6_MMTU;
1301 		} else if (mtu > ifmtu) {
1302 			/*
1303 			 * The MTU on the route is larger than the MTU on
1304 			 * the interface!  This shouldn't happen, unless the
1305 			 * MTU of the interface has been changed after the
1306 			 * interface was brought up.  Change the MTU in the
1307 			 * route to match the interface MTU (as long as the
1308 			 * field isn't locked).
1309 			 */
1310 			mtu = ifmtu;
1311 			if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1312 				rt->rt_rmx.rmx_mtu = mtu;
1313 		}
1314 	} else if (ifp) {
1315 		mtu = IN6_LINKMTU(ifp);
1316 	} else
1317 		error = EHOSTUNREACH; /* XXX */
1318 
1319 	*mtup = mtu;
1320 	if (alwaysfragp)
1321 		*alwaysfragp = alwaysfrag;
1322 	return (error);
1323 }
1324 
1325 /*
1326  * IP6 socket option processing.
1327  */
1328 int
1329 ip6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1330 {
1331 	int optdatalen, uproto;
1332 	void *optdata;
1333 	struct in6pcb *in6p = sotoin6pcb(so);
1334 	struct ip_moptions **mopts;
1335 	int error, optval;
1336 	int level, optname;
1337 
1338 	KASSERT(solocked(so));
1339 	KASSERT(sopt != NULL);
1340 
1341 	level = sopt->sopt_level;
1342 	optname = sopt->sopt_name;
1343 
1344 	error = optval = 0;
1345 	uproto = (int)so->so_proto->pr_protocol;
1346 
1347 	switch (level) {
1348 	case IPPROTO_IP:
1349 		switch (optname) {
1350 		case IP_ADD_MEMBERSHIP:
1351 		case IP_DROP_MEMBERSHIP:
1352 		case IP_MULTICAST_IF:
1353 		case IP_MULTICAST_LOOP:
1354 		case IP_MULTICAST_TTL:
1355 			mopts = &in6p->in6p_v4moptions;
1356 			switch (op) {
1357 			case PRCO_GETOPT:
1358 				return ip_getmoptions(*mopts, sopt);
1359 			case PRCO_SETOPT:
1360 				return ip_setmoptions(mopts, sopt);
1361 			default:
1362 				return EINVAL;
1363 			}
1364 		default:
1365 			return ENOPROTOOPT;
1366 		}
1367 	case IPPROTO_IPV6:
1368 		break;
1369 	default:
1370 		return ENOPROTOOPT;
1371 	}
1372 	switch (op) {
1373 	case PRCO_SETOPT:
1374 		switch (optname) {
1375 #ifdef RFC2292
1376 		case IPV6_2292PKTOPTIONS:
1377 			error = ip6_pcbopts(&in6p->in6p_outputopts, so, sopt);
1378 			break;
1379 #endif
1380 
1381 		/*
1382 		 * Use of some Hop-by-Hop options or some
1383 		 * Destination options, might require special
1384 		 * privilege.  That is, normal applications
1385 		 * (without special privilege) might be forbidden
1386 		 * from setting certain options in outgoing packets,
1387 		 * and might never see certain options in received
1388 		 * packets. [RFC 2292 Section 6]
1389 		 * KAME specific note:
1390 		 *  KAME prevents non-privileged users from sending or
1391 		 *  receiving ANY hbh/dst options in order to avoid
1392 		 *  overhead of parsing options in the kernel.
1393 		 */
1394 		case IPV6_RECVHOPOPTS:
1395 		case IPV6_RECVDSTOPTS:
1396 		case IPV6_RECVRTHDRDSTOPTS:
1397 			error = kauth_authorize_network(kauth_cred_get(),
1398 			    KAUTH_NETWORK_IPV6, KAUTH_REQ_NETWORK_IPV6_HOPBYHOP,
1399 			    NULL, NULL, NULL);
1400 			if (error)
1401 				break;
1402 			/* FALLTHROUGH */
1403 		case IPV6_UNICAST_HOPS:
1404 		case IPV6_HOPLIMIT:
1405 		case IPV6_FAITH:
1406 
1407 		case IPV6_RECVPKTINFO:
1408 		case IPV6_RECVHOPLIMIT:
1409 		case IPV6_RECVRTHDR:
1410 		case IPV6_RECVPATHMTU:
1411 		case IPV6_RECVTCLASS:
1412 		case IPV6_V6ONLY:
1413 			error = sockopt_getint(sopt, &optval);
1414 			if (error)
1415 				break;
1416 			switch (optname) {
1417 			case IPV6_UNICAST_HOPS:
1418 				if (optval < -1 || optval >= 256)
1419 					error = EINVAL;
1420 				else {
1421 					/* -1 = kernel default */
1422 					in6p->in6p_hops = optval;
1423 				}
1424 				break;
1425 #define OPTSET(bit) \
1426 do { \
1427 if (optval) \
1428 	in6p->in6p_flags |= (bit); \
1429 else \
1430 	in6p->in6p_flags &= ~(bit); \
1431 } while (/*CONSTCOND*/ 0)
1432 
1433 #ifdef RFC2292
1434 #define OPTSET2292(bit) 			\
1435 do { 						\
1436 in6p->in6p_flags |= IN6P_RFC2292; 	\
1437 if (optval) 				\
1438 	in6p->in6p_flags |= (bit); 	\
1439 else 					\
1440 	in6p->in6p_flags &= ~(bit); 	\
1441 } while (/*CONSTCOND*/ 0)
1442 #endif
1443 
1444 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
1445 
1446 			case IPV6_RECVPKTINFO:
1447 #ifdef RFC2292
1448 				/* cannot mix with RFC2292 */
1449 				if (OPTBIT(IN6P_RFC2292)) {
1450 					error = EINVAL;
1451 					break;
1452 				}
1453 #endif
1454 				OPTSET(IN6P_PKTINFO);
1455 				break;
1456 
1457 			case IPV6_HOPLIMIT:
1458 			{
1459 				struct ip6_pktopts **optp;
1460 
1461 #ifdef RFC2292
1462 				/* cannot mix with RFC2292 */
1463 				if (OPTBIT(IN6P_RFC2292)) {
1464 					error = EINVAL;
1465 					break;
1466 				}
1467 #endif
1468 				optp = &in6p->in6p_outputopts;
1469 				error = ip6_pcbopt(IPV6_HOPLIMIT,
1470 						   (u_char *)&optval,
1471 						   sizeof(optval),
1472 						   optp,
1473 						   kauth_cred_get(), uproto);
1474 				break;
1475 			}
1476 
1477 			case IPV6_RECVHOPLIMIT:
1478 #ifdef RFC2292
1479 				/* cannot mix with RFC2292 */
1480 				if (OPTBIT(IN6P_RFC2292)) {
1481 					error = EINVAL;
1482 					break;
1483 				}
1484 #endif
1485 				OPTSET(IN6P_HOPLIMIT);
1486 				break;
1487 
1488 			case IPV6_RECVHOPOPTS:
1489 #ifdef RFC2292
1490 				/* cannot mix with RFC2292 */
1491 				if (OPTBIT(IN6P_RFC2292)) {
1492 					error = EINVAL;
1493 					break;
1494 				}
1495 #endif
1496 				OPTSET(IN6P_HOPOPTS);
1497 				break;
1498 
1499 			case IPV6_RECVDSTOPTS:
1500 #ifdef RFC2292
1501 				/* cannot mix with RFC2292 */
1502 				if (OPTBIT(IN6P_RFC2292)) {
1503 					error = EINVAL;
1504 					break;
1505 				}
1506 #endif
1507 				OPTSET(IN6P_DSTOPTS);
1508 				break;
1509 
1510 			case IPV6_RECVRTHDRDSTOPTS:
1511 #ifdef RFC2292
1512 				/* cannot mix with RFC2292 */
1513 				if (OPTBIT(IN6P_RFC2292)) {
1514 					error = EINVAL;
1515 					break;
1516 				}
1517 #endif
1518 				OPTSET(IN6P_RTHDRDSTOPTS);
1519 				break;
1520 
1521 			case IPV6_RECVRTHDR:
1522 #ifdef RFC2292
1523 				/* cannot mix with RFC2292 */
1524 				if (OPTBIT(IN6P_RFC2292)) {
1525 					error = EINVAL;
1526 					break;
1527 				}
1528 #endif
1529 				OPTSET(IN6P_RTHDR);
1530 				break;
1531 
1532 			case IPV6_FAITH:
1533 				OPTSET(IN6P_FAITH);
1534 				break;
1535 
1536 			case IPV6_RECVPATHMTU:
1537 				/*
1538 				 * We ignore this option for TCP
1539 				 * sockets.
1540 				 * (RFC3542 leaves this case
1541 				 * unspecified.)
1542 				 */
1543 				if (uproto != IPPROTO_TCP)
1544 					OPTSET(IN6P_MTU);
1545 				break;
1546 
1547 			case IPV6_V6ONLY:
1548 				/*
1549 				 * make setsockopt(IPV6_V6ONLY)
1550 				 * available only prior to bind(2).
1551 				 * see ipng mailing list, Jun 22 2001.
1552 				 */
1553 				if (in6p->in6p_lport ||
1554 				    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1555 					error = EINVAL;
1556 					break;
1557 				}
1558 #ifdef INET6_BINDV6ONLY
1559 				if (!optval)
1560 					error = EINVAL;
1561 #else
1562 				OPTSET(IN6P_IPV6_V6ONLY);
1563 #endif
1564 				break;
1565 			case IPV6_RECVTCLASS:
1566 #ifdef RFC2292
1567 				/* cannot mix with RFC2292 XXX */
1568 				if (OPTBIT(IN6P_RFC2292)) {
1569 					error = EINVAL;
1570 					break;
1571 				}
1572 #endif
1573 				OPTSET(IN6P_TCLASS);
1574 				break;
1575 
1576 			}
1577 			break;
1578 
1579 		case IPV6_OTCLASS:
1580 		{
1581 			struct ip6_pktopts **optp;
1582 			u_int8_t tclass;
1583 
1584 			error = sockopt_get(sopt, &tclass, sizeof(tclass));
1585 			if (error)
1586 				break;
1587 			optp = &in6p->in6p_outputopts;
1588 			error = ip6_pcbopt(optname,
1589 					   (u_char *)&tclass,
1590 					   sizeof(tclass),
1591 					   optp,
1592 					   kauth_cred_get(), uproto);
1593 			break;
1594 		}
1595 
1596 		case IPV6_TCLASS:
1597 		case IPV6_DONTFRAG:
1598 		case IPV6_USE_MIN_MTU:
1599 		case IPV6_PREFER_TEMPADDR:
1600 			error = sockopt_getint(sopt, &optval);
1601 			if (error)
1602 				break;
1603 			{
1604 				struct ip6_pktopts **optp;
1605 				optp = &in6p->in6p_outputopts;
1606 				error = ip6_pcbopt(optname,
1607 						   (u_char *)&optval,
1608 						   sizeof(optval),
1609 						   optp,
1610 						   kauth_cred_get(), uproto);
1611 				break;
1612 			}
1613 
1614 #ifdef RFC2292
1615 		case IPV6_2292PKTINFO:
1616 		case IPV6_2292HOPLIMIT:
1617 		case IPV6_2292HOPOPTS:
1618 		case IPV6_2292DSTOPTS:
1619 		case IPV6_2292RTHDR:
1620 			/* RFC 2292 */
1621 			error = sockopt_getint(sopt, &optval);
1622 			if (error)
1623 				break;
1624 
1625 			switch (optname) {
1626 			case IPV6_2292PKTINFO:
1627 				OPTSET2292(IN6P_PKTINFO);
1628 				break;
1629 			case IPV6_2292HOPLIMIT:
1630 				OPTSET2292(IN6P_HOPLIMIT);
1631 				break;
1632 			case IPV6_2292HOPOPTS:
1633 				/*
1634 				 * Check super-user privilege.
1635 				 * See comments for IPV6_RECVHOPOPTS.
1636 				 */
1637 				error =
1638 				    kauth_authorize_network(kauth_cred_get(),
1639 				    KAUTH_NETWORK_IPV6,
1640 				    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1641 				    NULL, NULL);
1642 				if (error)
1643 					return (error);
1644 				OPTSET2292(IN6P_HOPOPTS);
1645 				break;
1646 			case IPV6_2292DSTOPTS:
1647 				error =
1648 				    kauth_authorize_network(kauth_cred_get(),
1649 				    KAUTH_NETWORK_IPV6,
1650 				    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
1651 				    NULL, NULL);
1652 				if (error)
1653 					return (error);
1654 				OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1655 				break;
1656 			case IPV6_2292RTHDR:
1657 				OPTSET2292(IN6P_RTHDR);
1658 				break;
1659 			}
1660 			break;
1661 #endif
1662 		case IPV6_PKTINFO:
1663 		case IPV6_HOPOPTS:
1664 		case IPV6_RTHDR:
1665 		case IPV6_DSTOPTS:
1666 		case IPV6_RTHDRDSTOPTS:
1667 		case IPV6_NEXTHOP: {
1668 			/* new advanced API (RFC3542) */
1669 			void *optbuf;
1670 			int optbuflen;
1671 			struct ip6_pktopts **optp;
1672 
1673 #ifdef RFC2292
1674 			/* cannot mix with RFC2292 */
1675 			if (OPTBIT(IN6P_RFC2292)) {
1676 				error = EINVAL;
1677 				break;
1678 			}
1679 #endif
1680 
1681 			optbuflen = sopt->sopt_size;
1682 			optbuf = malloc(optbuflen, M_IP6OPT, M_NOWAIT);
1683 			if (optbuf == NULL) {
1684 				error = ENOBUFS;
1685 				break;
1686 			}
1687 
1688 			error = sockopt_get(sopt, optbuf, optbuflen);
1689 			if (error) {
1690 				free(optbuf, M_IP6OPT);
1691 				break;
1692 			}
1693 			optp = &in6p->in6p_outputopts;
1694 			error = ip6_pcbopt(optname, optbuf, optbuflen,
1695 			    optp, kauth_cred_get(), uproto);
1696 
1697 			free(optbuf, M_IP6OPT);
1698 			break;
1699 			}
1700 #undef OPTSET
1701 
1702 		case IPV6_MULTICAST_IF:
1703 		case IPV6_MULTICAST_HOPS:
1704 		case IPV6_MULTICAST_LOOP:
1705 		case IPV6_JOIN_GROUP:
1706 		case IPV6_LEAVE_GROUP:
1707 			error = ip6_setmoptions(sopt, in6p);
1708 			break;
1709 
1710 		case IPV6_PORTRANGE:
1711 			error = sockopt_getint(sopt, &optval);
1712 			if (error)
1713 				break;
1714 
1715 			switch (optval) {
1716 			case IPV6_PORTRANGE_DEFAULT:
1717 				in6p->in6p_flags &= ~(IN6P_LOWPORT);
1718 				in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1719 				break;
1720 
1721 			case IPV6_PORTRANGE_HIGH:
1722 				in6p->in6p_flags &= ~(IN6P_LOWPORT);
1723 				in6p->in6p_flags |= IN6P_HIGHPORT;
1724 				break;
1725 
1726 			case IPV6_PORTRANGE_LOW:
1727 				in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1728 				in6p->in6p_flags |= IN6P_LOWPORT;
1729 				break;
1730 
1731 			default:
1732 				error = EINVAL;
1733 				break;
1734 			}
1735 			break;
1736 
1737 		case IPV6_PORTALGO:
1738 			error = sockopt_getint(sopt, &optval);
1739 			if (error)
1740 				break;
1741 
1742 			error = portalgo_algo_index_select(
1743 			    (struct inpcb_hdr *)in6p, optval);
1744 			break;
1745 
1746 #if defined(IPSEC)
1747 		case IPV6_IPSEC_POLICY:
1748 			if (ipsec_enabled) {
1749 				error = ipsec_set_policy(in6p,
1750 				    sopt->sopt_data, sopt->sopt_size,
1751 				    kauth_cred_get());
1752 				break;
1753 			}
1754 			/*FALLTHROUGH*/
1755 #endif /* IPSEC */
1756 
1757 		default:
1758 			error = ENOPROTOOPT;
1759 			break;
1760 		}
1761 		break;
1762 
1763 	case PRCO_GETOPT:
1764 		switch (optname) {
1765 #ifdef RFC2292
1766 		case IPV6_2292PKTOPTIONS:
1767 			/*
1768 			 * RFC3542 (effectively) deprecated the
1769 			 * semantics of the 2292-style pktoptions.
1770 			 * Since it was not reliable in nature (i.e.,
1771 			 * applications had to expect the lack of some
1772 			 * information after all), it would make sense
1773 			 * to simplify this part by always returning
1774 			 * empty data.
1775 			 */
1776 			break;
1777 #endif
1778 
1779 		case IPV6_RECVHOPOPTS:
1780 		case IPV6_RECVDSTOPTS:
1781 		case IPV6_RECVRTHDRDSTOPTS:
1782 		case IPV6_UNICAST_HOPS:
1783 		case IPV6_RECVPKTINFO:
1784 		case IPV6_RECVHOPLIMIT:
1785 		case IPV6_RECVRTHDR:
1786 		case IPV6_RECVPATHMTU:
1787 
1788 		case IPV6_FAITH:
1789 		case IPV6_V6ONLY:
1790 		case IPV6_PORTRANGE:
1791 		case IPV6_RECVTCLASS:
1792 			switch (optname) {
1793 
1794 			case IPV6_RECVHOPOPTS:
1795 				optval = OPTBIT(IN6P_HOPOPTS);
1796 				break;
1797 
1798 			case IPV6_RECVDSTOPTS:
1799 				optval = OPTBIT(IN6P_DSTOPTS);
1800 				break;
1801 
1802 			case IPV6_RECVRTHDRDSTOPTS:
1803 				optval = OPTBIT(IN6P_RTHDRDSTOPTS);
1804 				break;
1805 
1806 			case IPV6_UNICAST_HOPS:
1807 				optval = in6p->in6p_hops;
1808 				break;
1809 
1810 			case IPV6_RECVPKTINFO:
1811 				optval = OPTBIT(IN6P_PKTINFO);
1812 				break;
1813 
1814 			case IPV6_RECVHOPLIMIT:
1815 				optval = OPTBIT(IN6P_HOPLIMIT);
1816 				break;
1817 
1818 			case IPV6_RECVRTHDR:
1819 				optval = OPTBIT(IN6P_RTHDR);
1820 				break;
1821 
1822 			case IPV6_RECVPATHMTU:
1823 				optval = OPTBIT(IN6P_MTU);
1824 				break;
1825 
1826 			case IPV6_FAITH:
1827 				optval = OPTBIT(IN6P_FAITH);
1828 				break;
1829 
1830 			case IPV6_V6ONLY:
1831 				optval = OPTBIT(IN6P_IPV6_V6ONLY);
1832 				break;
1833 
1834 			case IPV6_PORTRANGE:
1835 			    {
1836 				int flags;
1837 				flags = in6p->in6p_flags;
1838 				if (flags & IN6P_HIGHPORT)
1839 					optval = IPV6_PORTRANGE_HIGH;
1840 				else if (flags & IN6P_LOWPORT)
1841 					optval = IPV6_PORTRANGE_LOW;
1842 				else
1843 					optval = 0;
1844 				break;
1845 			    }
1846 			case IPV6_RECVTCLASS:
1847 				optval = OPTBIT(IN6P_TCLASS);
1848 				break;
1849 
1850 			}
1851 			if (error)
1852 				break;
1853 			error = sockopt_setint(sopt, optval);
1854 			break;
1855 
1856 		case IPV6_PATHMTU:
1857 		    {
1858 			u_long pmtu = 0;
1859 			struct ip6_mtuinfo mtuinfo;
1860 			struct route *ro = &in6p->in6p_route;
1861 			struct rtentry *rt;
1862 			union {
1863 				struct sockaddr		dst;
1864 				struct sockaddr_in6	dst6;
1865 			} u;
1866 
1867 			if (!(so->so_state & SS_ISCONNECTED))
1868 				return (ENOTCONN);
1869 			/*
1870 			 * XXX: we dot not consider the case of source
1871 			 * routing, or optional information to specify
1872 			 * the outgoing interface.
1873 			 */
1874 			sockaddr_in6_init(&u.dst6, &in6p->in6p_faddr, 0, 0, 0);
1875 			rt = rtcache_lookup(ro, &u.dst);
1876 			error = ip6_getpmtu(rt, NULL, &pmtu, NULL);
1877 			rtcache_unref(rt, ro);
1878 			if (error)
1879 				break;
1880 			if (pmtu > IPV6_MAXPACKET)
1881 				pmtu = IPV6_MAXPACKET;
1882 
1883 			memset(&mtuinfo, 0, sizeof(mtuinfo));
1884 			mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
1885 			optdata = (void *)&mtuinfo;
1886 			optdatalen = sizeof(mtuinfo);
1887 			if (optdatalen > MCLBYTES)
1888 				return (EMSGSIZE); /* XXX */
1889 			error = sockopt_set(sopt, optdata, optdatalen);
1890 			break;
1891 		    }
1892 
1893 #ifdef RFC2292
1894 		case IPV6_2292PKTINFO:
1895 		case IPV6_2292HOPLIMIT:
1896 		case IPV6_2292HOPOPTS:
1897 		case IPV6_2292RTHDR:
1898 		case IPV6_2292DSTOPTS:
1899 			switch (optname) {
1900 			case IPV6_2292PKTINFO:
1901 				optval = OPTBIT(IN6P_PKTINFO);
1902 				break;
1903 			case IPV6_2292HOPLIMIT:
1904 				optval = OPTBIT(IN6P_HOPLIMIT);
1905 				break;
1906 			case IPV6_2292HOPOPTS:
1907 				optval = OPTBIT(IN6P_HOPOPTS);
1908 				break;
1909 			case IPV6_2292RTHDR:
1910 				optval = OPTBIT(IN6P_RTHDR);
1911 				break;
1912 			case IPV6_2292DSTOPTS:
1913 				optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1914 				break;
1915 			}
1916 			error = sockopt_setint(sopt, optval);
1917 			break;
1918 #endif
1919 		case IPV6_PKTINFO:
1920 		case IPV6_HOPOPTS:
1921 		case IPV6_RTHDR:
1922 		case IPV6_DSTOPTS:
1923 		case IPV6_RTHDRDSTOPTS:
1924 		case IPV6_NEXTHOP:
1925 		case IPV6_OTCLASS:
1926 		case IPV6_TCLASS:
1927 		case IPV6_DONTFRAG:
1928 		case IPV6_USE_MIN_MTU:
1929 		case IPV6_PREFER_TEMPADDR:
1930 			error = ip6_getpcbopt(in6p->in6p_outputopts,
1931 			    optname, sopt);
1932 			break;
1933 
1934 		case IPV6_MULTICAST_IF:
1935 		case IPV6_MULTICAST_HOPS:
1936 		case IPV6_MULTICAST_LOOP:
1937 		case IPV6_JOIN_GROUP:
1938 		case IPV6_LEAVE_GROUP:
1939 			error = ip6_getmoptions(sopt, in6p);
1940 			break;
1941 
1942 		case IPV6_PORTALGO:
1943 			optval = ((struct inpcb_hdr *)in6p)->inph_portalgo;
1944 			error = sockopt_setint(sopt, optval);
1945 			break;
1946 
1947 #if defined(IPSEC)
1948 		case IPV6_IPSEC_POLICY:
1949 			if (ipsec_used) {
1950 				struct mbuf *m = NULL;
1951 
1952 				/*
1953 				 * XXX: this will return EINVAL as sopt is
1954 				 * empty
1955 				 */
1956 				error = ipsec_get_policy(in6p, sopt->sopt_data,
1957 				    sopt->sopt_size, &m);
1958 				if (!error)
1959 					error = sockopt_setmbuf(sopt, m);
1960 				break;
1961 			}
1962 			/*FALLTHROUGH*/
1963 #endif /* IPSEC */
1964 
1965 		default:
1966 			error = ENOPROTOOPT;
1967 			break;
1968 		}
1969 		break;
1970 	}
1971 	return (error);
1972 }
1973 
1974 int
1975 ip6_raw_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1976 {
1977 	int error = 0, optval;
1978 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
1979 	struct in6pcb *in6p = sotoin6pcb(so);
1980 	int level, optname;
1981 
1982 	KASSERT(sopt != NULL);
1983 
1984 	level = sopt->sopt_level;
1985 	optname = sopt->sopt_name;
1986 
1987 	if (level != IPPROTO_IPV6) {
1988 		return ENOPROTOOPT;
1989 	}
1990 
1991 	switch (optname) {
1992 	case IPV6_CHECKSUM:
1993 		/*
1994 		 * For ICMPv6 sockets, no modification allowed for checksum
1995 		 * offset, permit "no change" values to help existing apps.
1996 		 *
1997 		 * XXX RFC3542 says: "An attempt to set IPV6_CHECKSUM
1998 		 * for an ICMPv6 socket will fail."  The current
1999 		 * behavior does not meet RFC3542.
2000 		 */
2001 		switch (op) {
2002 		case PRCO_SETOPT:
2003 			error = sockopt_getint(sopt, &optval);
2004 			if (error)
2005 				break;
2006 			if ((optval % 2) != 0) {
2007 				/* the API assumes even offset values */
2008 				error = EINVAL;
2009 			} else if (so->so_proto->pr_protocol ==
2010 			    IPPROTO_ICMPV6) {
2011 				if (optval != icmp6off)
2012 					error = EINVAL;
2013 			} else
2014 				in6p->in6p_cksum = optval;
2015 			break;
2016 
2017 		case PRCO_GETOPT:
2018 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2019 				optval = icmp6off;
2020 			else
2021 				optval = in6p->in6p_cksum;
2022 
2023 			error = sockopt_setint(sopt, optval);
2024 			break;
2025 
2026 		default:
2027 			error = EINVAL;
2028 			break;
2029 		}
2030 		break;
2031 
2032 	default:
2033 		error = ENOPROTOOPT;
2034 		break;
2035 	}
2036 
2037 	return (error);
2038 }
2039 
2040 #ifdef RFC2292
2041 /*
2042  * Set up IP6 options in pcb for insertion in output packets or
2043  * specifying behavior of outgoing packets.
2044  */
2045 static int
2046 ip6_pcbopts(struct ip6_pktopts **pktopt, struct socket *so,
2047     struct sockopt *sopt)
2048 {
2049 	struct ip6_pktopts *opt = *pktopt;
2050 	struct mbuf *m;
2051 	int error = 0;
2052 
2053 	KASSERT(solocked(so));
2054 
2055 	/* turn off any old options. */
2056 	if (opt) {
2057 #ifdef DIAGNOSTIC
2058 	    if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2059 		opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2060 		opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2061 		    printf("ip6_pcbopts: all specified options are cleared.\n");
2062 #endif
2063 		ip6_clearpktopts(opt, -1);
2064 	} else {
2065 		opt = malloc(sizeof(*opt), M_IP6OPT, M_NOWAIT);
2066 		if (opt == NULL)
2067 			return (ENOBUFS);
2068 	}
2069 	*pktopt = NULL;
2070 
2071 	if (sopt == NULL || sopt->sopt_size == 0) {
2072 		/*
2073 		 * Only turning off any previous options, regardless of
2074 		 * whether the opt is just created or given.
2075 		 */
2076 		free(opt, M_IP6OPT);
2077 		return (0);
2078 	}
2079 
2080 	/*  set options specified by user. */
2081 	m = sockopt_getmbuf(sopt);
2082 	if (m == NULL) {
2083 		free(opt, M_IP6OPT);
2084 		return (ENOBUFS);
2085 	}
2086 
2087 	error = ip6_setpktopts(m, opt, NULL, kauth_cred_get(),
2088 	    so->so_proto->pr_protocol);
2089 	m_freem(m);
2090 	if (error != 0) {
2091 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2092 		free(opt, M_IP6OPT);
2093 		return (error);
2094 	}
2095 	*pktopt = opt;
2096 	return (0);
2097 }
2098 #endif
2099 
2100 /*
2101  * initialize ip6_pktopts.  beware that there are non-zero default values in
2102  * the struct.
2103  */
2104 void
2105 ip6_initpktopts(struct ip6_pktopts *opt)
2106 {
2107 
2108 	memset(opt, 0, sizeof(*opt));
2109 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
2110 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
2111 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2112 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2113 }
2114 
2115 #define sin6tosa(sin6)	((struct sockaddr *)(sin6)) /* XXX */
2116 static int
2117 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2118     kauth_cred_t cred, int uproto)
2119 {
2120 	struct ip6_pktopts *opt;
2121 
2122 	if (*pktopt == NULL) {
2123 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
2124 		    M_NOWAIT);
2125 		if (*pktopt == NULL)
2126 			return (ENOBUFS);
2127 
2128 		ip6_initpktopts(*pktopt);
2129 	}
2130 	opt = *pktopt;
2131 
2132 	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
2133 }
2134 
2135 static int
2136 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2137 {
2138 	void *optdata = NULL;
2139 	int optdatalen = 0;
2140 	struct ip6_ext *ip6e;
2141 	int error = 0;
2142 	struct in6_pktinfo null_pktinfo;
2143 	int deftclass = 0, on;
2144 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
2145 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2146 
2147 	switch (optname) {
2148 	case IPV6_PKTINFO:
2149 		if (pktopt && pktopt->ip6po_pktinfo)
2150 			optdata = (void *)pktopt->ip6po_pktinfo;
2151 		else {
2152 			/* XXX: we don't have to do this every time... */
2153 			memset(&null_pktinfo, 0, sizeof(null_pktinfo));
2154 			optdata = (void *)&null_pktinfo;
2155 		}
2156 		optdatalen = sizeof(struct in6_pktinfo);
2157 		break;
2158 	case IPV6_OTCLASS:
2159 		/* XXX */
2160 		return (EINVAL);
2161 	case IPV6_TCLASS:
2162 		if (pktopt && pktopt->ip6po_tclass >= 0)
2163 			optdata = (void *)&pktopt->ip6po_tclass;
2164 		else
2165 			optdata = (void *)&deftclass;
2166 		optdatalen = sizeof(int);
2167 		break;
2168 	case IPV6_HOPOPTS:
2169 		if (pktopt && pktopt->ip6po_hbh) {
2170 			optdata = (void *)pktopt->ip6po_hbh;
2171 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2172 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2173 		}
2174 		break;
2175 	case IPV6_RTHDR:
2176 		if (pktopt && pktopt->ip6po_rthdr) {
2177 			optdata = (void *)pktopt->ip6po_rthdr;
2178 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2179 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2180 		}
2181 		break;
2182 	case IPV6_RTHDRDSTOPTS:
2183 		if (pktopt && pktopt->ip6po_dest1) {
2184 			optdata = (void *)pktopt->ip6po_dest1;
2185 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2186 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2187 		}
2188 		break;
2189 	case IPV6_DSTOPTS:
2190 		if (pktopt && pktopt->ip6po_dest2) {
2191 			optdata = (void *)pktopt->ip6po_dest2;
2192 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2193 			optdatalen = (ip6e->ip6e_len + 1) << 3;
2194 		}
2195 		break;
2196 	case IPV6_NEXTHOP:
2197 		if (pktopt && pktopt->ip6po_nexthop) {
2198 			optdata = (void *)pktopt->ip6po_nexthop;
2199 			optdatalen = pktopt->ip6po_nexthop->sa_len;
2200 		}
2201 		break;
2202 	case IPV6_USE_MIN_MTU:
2203 		if (pktopt)
2204 			optdata = (void *)&pktopt->ip6po_minmtu;
2205 		else
2206 			optdata = (void *)&defminmtu;
2207 		optdatalen = sizeof(int);
2208 		break;
2209 	case IPV6_DONTFRAG:
2210 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2211 			on = 1;
2212 		else
2213 			on = 0;
2214 		optdata = (void *)&on;
2215 		optdatalen = sizeof(on);
2216 		break;
2217 	case IPV6_PREFER_TEMPADDR:
2218 		if (pktopt)
2219 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2220 		else
2221 			optdata = (void *)&defpreftemp;
2222 		optdatalen = sizeof(int);
2223 		break;
2224 	default:		/* should not happen */
2225 #ifdef DIAGNOSTIC
2226 		panic("ip6_getpcbopt: unexpected option\n");
2227 #endif
2228 		return (ENOPROTOOPT);
2229 	}
2230 
2231 	error = sockopt_set(sopt, optdata, optdatalen);
2232 
2233 	return (error);
2234 }
2235 
2236 void
2237 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2238 {
2239 	if (optname == -1 || optname == IPV6_PKTINFO) {
2240 		if (pktopt->ip6po_pktinfo)
2241 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
2242 		pktopt->ip6po_pktinfo = NULL;
2243 	}
2244 	if (optname == -1 || optname == IPV6_HOPLIMIT)
2245 		pktopt->ip6po_hlim = -1;
2246 	if (optname == -1 || optname == IPV6_TCLASS)
2247 		pktopt->ip6po_tclass = -1;
2248 	if (optname == -1 || optname == IPV6_NEXTHOP) {
2249 		rtcache_free(&pktopt->ip6po_nextroute);
2250 		if (pktopt->ip6po_nexthop)
2251 			free(pktopt->ip6po_nexthop, M_IP6OPT);
2252 		pktopt->ip6po_nexthop = NULL;
2253 	}
2254 	if (optname == -1 || optname == IPV6_HOPOPTS) {
2255 		if (pktopt->ip6po_hbh)
2256 			free(pktopt->ip6po_hbh, M_IP6OPT);
2257 		pktopt->ip6po_hbh = NULL;
2258 	}
2259 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2260 		if (pktopt->ip6po_dest1)
2261 			free(pktopt->ip6po_dest1, M_IP6OPT);
2262 		pktopt->ip6po_dest1 = NULL;
2263 	}
2264 	if (optname == -1 || optname == IPV6_RTHDR) {
2265 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2266 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2267 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2268 		rtcache_free(&pktopt->ip6po_route);
2269 	}
2270 	if (optname == -1 || optname == IPV6_DSTOPTS) {
2271 		if (pktopt->ip6po_dest2)
2272 			free(pktopt->ip6po_dest2, M_IP6OPT);
2273 		pktopt->ip6po_dest2 = NULL;
2274 	}
2275 }
2276 
2277 #define PKTOPT_EXTHDRCPY(type) 					\
2278 do {								\
2279 	if (src->type) {					\
2280 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2281 		dst->type = malloc(hlen, M_IP6OPT, canwait);	\
2282 		if (dst->type == NULL)				\
2283 			goto bad;				\
2284 		memcpy(dst->type, src->type, hlen);		\
2285 	}							\
2286 } while (/*CONSTCOND*/ 0)
2287 
2288 static int
2289 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2290 {
2291 	dst->ip6po_hlim = src->ip6po_hlim;
2292 	dst->ip6po_tclass = src->ip6po_tclass;
2293 	dst->ip6po_flags = src->ip6po_flags;
2294 	dst->ip6po_minmtu = src->ip6po_minmtu;
2295 	dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
2296 	if (src->ip6po_pktinfo) {
2297 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
2298 		    M_IP6OPT, canwait);
2299 		if (dst->ip6po_pktinfo == NULL)
2300 			goto bad;
2301 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2302 	}
2303 	if (src->ip6po_nexthop) {
2304 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
2305 		    M_IP6OPT, canwait);
2306 		if (dst->ip6po_nexthop == NULL)
2307 			goto bad;
2308 		memcpy(dst->ip6po_nexthop, src->ip6po_nexthop,
2309 		    src->ip6po_nexthop->sa_len);
2310 	}
2311 	PKTOPT_EXTHDRCPY(ip6po_hbh);
2312 	PKTOPT_EXTHDRCPY(ip6po_dest1);
2313 	PKTOPT_EXTHDRCPY(ip6po_dest2);
2314 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2315 	return (0);
2316 
2317   bad:
2318 	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
2319 	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
2320 	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
2321 	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
2322 	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
2323 	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
2324 
2325 	return (ENOBUFS);
2326 }
2327 #undef PKTOPT_EXTHDRCPY
2328 
2329 struct ip6_pktopts *
2330 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2331 {
2332 	int error;
2333 	struct ip6_pktopts *dst;
2334 
2335 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
2336 	if (dst == NULL)
2337 		return (NULL);
2338 	ip6_initpktopts(dst);
2339 
2340 	if ((error = copypktopts(dst, src, canwait)) != 0) {
2341 		free(dst, M_IP6OPT);
2342 		return (NULL);
2343 	}
2344 
2345 	return (dst);
2346 }
2347 
2348 void
2349 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2350 {
2351 	if (pktopt == NULL)
2352 		return;
2353 
2354 	ip6_clearpktopts(pktopt, -1);
2355 
2356 	free(pktopt, M_IP6OPT);
2357 }
2358 
2359 int
2360 ip6_get_membership(const struct sockopt *sopt, struct ifnet **ifp,
2361     struct psref *psref, void *v, size_t l)
2362 {
2363 	struct ipv6_mreq mreq;
2364 	int error;
2365 	struct in6_addr *ia = &mreq.ipv6mr_multiaddr;
2366 	struct in_addr *ia4 = (void *)&ia->s6_addr32[3];
2367 
2368 	error = sockopt_get(sopt, &mreq, sizeof(mreq));
2369 	if (error != 0)
2370 		return error;
2371 
2372 	if (IN6_IS_ADDR_UNSPECIFIED(ia)) {
2373 		/*
2374 		 * We use the unspecified address to specify to accept
2375 		 * all multicast addresses. Only super user is allowed
2376 		 * to do this.
2377 		 */
2378 		if (kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_IPV6,
2379 		    KAUTH_REQ_NETWORK_IPV6_JOIN_MULTICAST, NULL, NULL, NULL))
2380 			return EACCES;
2381 	} else if (IN6_IS_ADDR_V4MAPPED(ia)) {
2382 		// Don't bother if we are not going to use ifp.
2383 		if (l == sizeof(*ia)) {
2384 			memcpy(v, ia, l);
2385 			return 0;
2386 		}
2387 	} else if (!IN6_IS_ADDR_MULTICAST(ia)) {
2388 		return EINVAL;
2389 	}
2390 
2391 	/*
2392 	 * If no interface was explicitly specified, choose an
2393 	 * appropriate one according to the given multicast address.
2394 	 */
2395 	if (mreq.ipv6mr_interface == 0) {
2396 		struct rtentry *rt;
2397 		union {
2398 			struct sockaddr		dst;
2399 			struct sockaddr_in	dst4;
2400 			struct sockaddr_in6	dst6;
2401 		} u;
2402 		struct route ro;
2403 
2404 		/*
2405 		 * Look up the routing table for the
2406 		 * address, and choose the outgoing interface.
2407 		 *   XXX: is it a good approach?
2408 		 */
2409 		memset(&ro, 0, sizeof(ro));
2410 		if (IN6_IS_ADDR_V4MAPPED(ia))
2411 			sockaddr_in_init(&u.dst4, ia4, 0);
2412 		else
2413 			sockaddr_in6_init(&u.dst6, ia, 0, 0, 0);
2414 		error = rtcache_setdst(&ro, &u.dst);
2415 		if (error != 0)
2416 			return error;
2417 		rt = rtcache_init(&ro);
2418 		*ifp = rt != NULL ?
2419 		    if_get_byindex(rt->rt_ifp->if_index, psref) : NULL;
2420 		rtcache_unref(rt, &ro);
2421 		rtcache_free(&ro);
2422 	} else {
2423 		/*
2424 		 * If the interface is specified, validate it.
2425 		 */
2426 		*ifp = if_get_byindex(mreq.ipv6mr_interface, psref);
2427 		if (*ifp == NULL)
2428 			return ENXIO;	/* XXX EINVAL? */
2429 	}
2430 	if (sizeof(*ia) == l)
2431 		memcpy(v, ia, l);
2432 	else
2433 		memcpy(v, ia4, l);
2434 	return 0;
2435 }
2436 
2437 /*
2438  * Set the IP6 multicast options in response to user setsockopt().
2439  */
2440 static int
2441 ip6_setmoptions(const struct sockopt *sopt, struct in6pcb *in6p)
2442 {
2443 	int error = 0;
2444 	u_int loop, ifindex;
2445 	struct ipv6_mreq mreq;
2446 	struct in6_addr ia;
2447 	struct ifnet *ifp;
2448 	struct ip6_moptions *im6o = in6p->in6p_moptions;
2449 	struct in6_multi_mship *imm;
2450 
2451 	KASSERT(in6p_locked(in6p));
2452 
2453 	if (im6o == NULL) {
2454 		/*
2455 		 * No multicast option buffer attached to the pcb;
2456 		 * allocate one and initialize to default values.
2457 		 */
2458 		im6o = malloc(sizeof(*im6o), M_IPMOPTS, M_NOWAIT);
2459 		if (im6o == NULL)
2460 			return (ENOBUFS);
2461 		in6p->in6p_moptions = im6o;
2462 		im6o->im6o_multicast_if_index = 0;
2463 		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2464 		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
2465 		LIST_INIT(&im6o->im6o_memberships);
2466 	}
2467 
2468 	switch (sopt->sopt_name) {
2469 
2470 	case IPV6_MULTICAST_IF: {
2471 		int s;
2472 		/*
2473 		 * Select the interface for outgoing multicast packets.
2474 		 */
2475 		error = sockopt_get(sopt, &ifindex, sizeof(ifindex));
2476 		if (error != 0)
2477 			break;
2478 
2479 		s = pserialize_read_enter();
2480 		if (ifindex != 0) {
2481 			if ((ifp = if_byindex(ifindex)) == NULL) {
2482 				pserialize_read_exit(s);
2483 				error = ENXIO;	/* XXX EINVAL? */
2484 				break;
2485 			}
2486 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
2487 				pserialize_read_exit(s);
2488 				error = EADDRNOTAVAIL;
2489 				break;
2490 			}
2491 		} else
2492 			ifp = NULL;
2493 		im6o->im6o_multicast_if_index = if_get_index(ifp);
2494 		pserialize_read_exit(s);
2495 		break;
2496 	    }
2497 
2498 	case IPV6_MULTICAST_HOPS:
2499 	    {
2500 		/*
2501 		 * Set the IP6 hoplimit for outgoing multicast packets.
2502 		 */
2503 		int optval;
2504 
2505 		error = sockopt_getint(sopt, &optval);
2506 		if (error != 0)
2507 			break;
2508 
2509 		if (optval < -1 || optval >= 256)
2510 			error = EINVAL;
2511 		else if (optval == -1)
2512 			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
2513 		else
2514 			im6o->im6o_multicast_hlim = optval;
2515 		break;
2516 	    }
2517 
2518 	case IPV6_MULTICAST_LOOP:
2519 		/*
2520 		 * Set the loopback flag for outgoing multicast packets.
2521 		 * Must be zero or one.
2522 		 */
2523 		error = sockopt_get(sopt, &loop, sizeof(loop));
2524 		if (error != 0)
2525 			break;
2526 		if (loop > 1) {
2527 			error = EINVAL;
2528 			break;
2529 		}
2530 		im6o->im6o_multicast_loop = loop;
2531 		break;
2532 
2533 	case IPV6_JOIN_GROUP: {
2534 		int bound;
2535 		struct psref psref;
2536 		/*
2537 		 * Add a multicast group membership.
2538 		 * Group must be a valid IP6 multicast address.
2539 		 */
2540 		bound = curlwp_bind();
2541 		ifp = NULL;
2542 		error = ip6_get_membership(sopt, &ifp, &psref, &ia, sizeof(ia));
2543 		if (error != 0) {
2544 			KASSERT(ifp == NULL);
2545 			curlwp_bindx(bound);
2546 			return error;
2547 		}
2548 
2549 		if (IN6_IS_ADDR_V4MAPPED(&ia)) {
2550 			error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
2551 			goto put_break;
2552 		}
2553 		/*
2554 		 * See if we found an interface, and confirm that it
2555 		 * supports multicast
2556 		 */
2557 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
2558 			error = EADDRNOTAVAIL;
2559 			goto put_break;
2560 		}
2561 
2562 		if (in6_setscope(&ia, ifp, NULL)) {
2563 			error = EADDRNOTAVAIL; /* XXX: should not happen */
2564 			goto put_break;
2565 		}
2566 
2567 		/*
2568 		 * See if the membership already exists.
2569 		 */
2570 		LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
2571 			if (imm->i6mm_maddr->in6m_ifp == ifp &&
2572 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2573 			    &ia))
2574 				goto put_break;
2575 		}
2576 		if (imm != NULL) {
2577 			error = EADDRINUSE;
2578 			goto put_break;
2579 		}
2580 		/*
2581 		 * Everything looks good; add a new record to the multicast
2582 		 * address list for the given interface.
2583 		 */
2584 		IFNET_LOCK(ifp);
2585 		imm = in6_joingroup(ifp, &ia, &error, 0);
2586 		IFNET_UNLOCK(ifp);
2587 		if (imm == NULL)
2588 			goto put_break;
2589 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2590 	    put_break:
2591 		if_put(ifp, &psref);
2592 		curlwp_bindx(bound);
2593 		break;
2594 	    }
2595 
2596 	case IPV6_LEAVE_GROUP: {
2597 		struct ifnet *in6m_ifp;
2598 		/*
2599 		 * Drop a multicast group membership.
2600 		 * Group must be a valid IP6 multicast address.
2601 		 */
2602 		error = sockopt_get(sopt, &mreq, sizeof(mreq));
2603 		if (error != 0)
2604 			break;
2605 
2606 		if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
2607 			error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
2608 			break;
2609 		}
2610 		/*
2611 		 * If an interface address was specified, get a pointer
2612 		 * to its ifnet structure.
2613 		 */
2614 		if (mreq.ipv6mr_interface != 0) {
2615 			if ((ifp = if_byindex(mreq.ipv6mr_interface)) == NULL) {
2616 				error = ENXIO;	/* XXX EINVAL? */
2617 				break;
2618 			}
2619 		} else
2620 			ifp = NULL;
2621 
2622 		/* Fill in the scope zone ID */
2623 		if (ifp) {
2624 			if (in6_setscope(&mreq.ipv6mr_multiaddr, ifp, NULL)) {
2625 				/* XXX: should not happen */
2626 				error = EADDRNOTAVAIL;
2627 				break;
2628 			}
2629 		} else if (mreq.ipv6mr_interface != 0) {
2630 			/*
2631 			 * XXX: This case would happens when the (positive)
2632 			 * index is in the valid range, but the corresponding
2633 			 * interface has been detached dynamically.  The above
2634 			 * check probably avoids such case to happen here, but
2635 			 * we check it explicitly for safety.
2636 			 */
2637 			error = EADDRNOTAVAIL;
2638 			break;
2639 		} else {	/* ipv6mr_interface == 0 */
2640 			struct sockaddr_in6 sa6_mc;
2641 
2642 			/*
2643 			 * The API spec says as follows:
2644 			 *  If the interface index is specified as 0, the
2645 			 *  system may choose a multicast group membership to
2646 			 *  drop by matching the multicast address only.
2647 			 * On the other hand, we cannot disambiguate the scope
2648 			 * zone unless an interface is provided.  Thus, we
2649 			 * check if there's ambiguity with the default scope
2650 			 * zone as the last resort.
2651 			 */
2652 			sockaddr_in6_init(&sa6_mc, &mreq.ipv6mr_multiaddr,
2653 			    0, 0, 0);
2654 			error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
2655 			if (error != 0)
2656 				break;
2657 			mreq.ipv6mr_multiaddr = sa6_mc.sin6_addr;
2658 		}
2659 
2660 		/*
2661 		 * Find the membership in the membership list.
2662 		 */
2663 		LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
2664 			if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
2665 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
2666 			    &mreq.ipv6mr_multiaddr))
2667 				break;
2668 		}
2669 		if (imm == NULL) {
2670 			/* Unable to resolve interface */
2671 			error = EADDRNOTAVAIL;
2672 			break;
2673 		}
2674 		/*
2675 		 * Give up the multicast address record to which the
2676 		 * membership points.
2677 		 */
2678 		LIST_REMOVE(imm, i6mm_chain);
2679 		in6m_ifp = imm->i6mm_maddr->in6m_ifp;
2680 		IFNET_LOCK(in6m_ifp);
2681 		in6_leavegroup(imm);
2682 		/* in6m_ifp should not leave thanks to in6p_lock */
2683 		IFNET_UNLOCK(in6m_ifp);
2684 		break;
2685 	    }
2686 
2687 	default:
2688 		error = EOPNOTSUPP;
2689 		break;
2690 	}
2691 
2692 	/*
2693 	 * If all options have default values, no need to keep the mbuf.
2694 	 */
2695 	if (im6o->im6o_multicast_if_index == 0 &&
2696 	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2697 	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2698 	    LIST_EMPTY(&im6o->im6o_memberships)) {
2699 		free(in6p->in6p_moptions, M_IPMOPTS);
2700 		in6p->in6p_moptions = NULL;
2701 	}
2702 
2703 	return (error);
2704 }
2705 
2706 /*
2707  * Return the IP6 multicast options in response to user getsockopt().
2708  */
2709 static int
2710 ip6_getmoptions(struct sockopt *sopt, struct in6pcb *in6p)
2711 {
2712 	u_int optval;
2713 	int error;
2714 	struct ip6_moptions *im6o = in6p->in6p_moptions;
2715 
2716 	switch (sopt->sopt_name) {
2717 	case IPV6_MULTICAST_IF:
2718 		if (im6o == NULL || im6o->im6o_multicast_if_index == 0)
2719 			optval = 0;
2720 		else
2721 			optval = im6o->im6o_multicast_if_index;
2722 
2723 		error = sockopt_set(sopt, &optval, sizeof(optval));
2724 		break;
2725 
2726 	case IPV6_MULTICAST_HOPS:
2727 		if (im6o == NULL)
2728 			optval = ip6_defmcasthlim;
2729 		else
2730 			optval = im6o->im6o_multicast_hlim;
2731 
2732 		error = sockopt_set(sopt, &optval, sizeof(optval));
2733 		break;
2734 
2735 	case IPV6_MULTICAST_LOOP:
2736 		if (im6o == NULL)
2737 			optval = IPV6_DEFAULT_MULTICAST_LOOP;
2738 		else
2739 			optval = im6o->im6o_multicast_loop;
2740 
2741 		error = sockopt_set(sopt, &optval, sizeof(optval));
2742 		break;
2743 
2744 	default:
2745 		error = EOPNOTSUPP;
2746 	}
2747 
2748 	return (error);
2749 }
2750 
2751 /*
2752  * Discard the IP6 multicast options.
2753  */
2754 void
2755 ip6_freemoptions(struct ip6_moptions *im6o)
2756 {
2757 	struct in6_multi_mship *imm, *nimm;
2758 
2759 	if (im6o == NULL)
2760 		return;
2761 
2762 	/* The owner of im6o (in6p) should be protected by solock */
2763 	LIST_FOREACH_SAFE(imm, &im6o->im6o_memberships, i6mm_chain, nimm) {
2764 		struct ifnet *ifp;
2765 
2766 		LIST_REMOVE(imm, i6mm_chain);
2767 
2768 		ifp = imm->i6mm_maddr->in6m_ifp;
2769 		IFNET_LOCK(ifp);
2770 		in6_leavegroup(imm);
2771 		/* ifp should not leave thanks to solock */
2772 		IFNET_UNLOCK(ifp);
2773 	}
2774 	free(im6o, M_IPMOPTS);
2775 }
2776 
2777 /*
2778  * Set IPv6 outgoing packet options based on advanced API.
2779  */
2780 int
2781 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2782 	struct ip6_pktopts *stickyopt, kauth_cred_t cred, int uproto)
2783 {
2784 	struct cmsghdr *cm = 0;
2785 
2786 	if (control == NULL || opt == NULL)
2787 		return (EINVAL);
2788 
2789 	ip6_initpktopts(opt);
2790 	if (stickyopt) {
2791 		int error;
2792 
2793 		/*
2794 		 * If stickyopt is provided, make a local copy of the options
2795 		 * for this particular packet, then override them by ancillary
2796 		 * objects.
2797 		 * XXX: copypktopts() does not copy the cached route to a next
2798 		 * hop (if any).  This is not very good in terms of efficiency,
2799 		 * but we can allow this since this option should be rarely
2800 		 * used.
2801 		 */
2802 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2803 			return (error);
2804 	}
2805 
2806 	/*
2807 	 * XXX: Currently, we assume all the optional information is stored
2808 	 * in a single mbuf.
2809 	 */
2810 	if (control->m_next)
2811 		return (EINVAL);
2812 
2813 	/* XXX if cm->cmsg_len is not aligned, control->m_len can become <0 */
2814 	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2815 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2816 		int error;
2817 
2818 		if (control->m_len < CMSG_LEN(0))
2819 			return (EINVAL);
2820 
2821 		cm = mtod(control, struct cmsghdr *);
2822 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2823 			return (EINVAL);
2824 		if (cm->cmsg_level != IPPROTO_IPV6)
2825 			continue;
2826 
2827 		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2828 		    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
2829 		if (error)
2830 			return (error);
2831 	}
2832 
2833 	return (0);
2834 }
2835 
2836 /*
2837  * Set a particular packet option, as a sticky option or an ancillary data
2838  * item.  "len" can be 0 only when it's a sticky option.
2839  * We have 4 cases of combination of "sticky" and "cmsg":
2840  * "sticky=0, cmsg=0": impossible
2841  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2842  * "sticky=1, cmsg=0": RFC3542 socket option
2843  * "sticky=1, cmsg=1": RFC2292 socket option
2844  */
2845 static int
2846 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2847     kauth_cred_t cred, int sticky, int cmsg, int uproto)
2848 {
2849 	int minmtupolicy;
2850 	int error;
2851 
2852 	if (!sticky && !cmsg) {
2853 #ifdef DIAGNOSTIC
2854 		printf("ip6_setpktopt: impossible case\n");
2855 #endif
2856 		return (EINVAL);
2857 	}
2858 
2859 	/*
2860 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2861 	 * not be specified in the context of RFC3542.  Conversely,
2862 	 * RFC3542 types should not be specified in the context of RFC2292.
2863 	 */
2864 	if (!cmsg) {
2865 		switch (optname) {
2866 		case IPV6_2292PKTINFO:
2867 		case IPV6_2292HOPLIMIT:
2868 		case IPV6_2292NEXTHOP:
2869 		case IPV6_2292HOPOPTS:
2870 		case IPV6_2292DSTOPTS:
2871 		case IPV6_2292RTHDR:
2872 		case IPV6_2292PKTOPTIONS:
2873 			return (ENOPROTOOPT);
2874 		}
2875 	}
2876 	if (sticky && cmsg) {
2877 		switch (optname) {
2878 		case IPV6_PKTINFO:
2879 		case IPV6_HOPLIMIT:
2880 		case IPV6_NEXTHOP:
2881 		case IPV6_HOPOPTS:
2882 		case IPV6_DSTOPTS:
2883 		case IPV6_RTHDRDSTOPTS:
2884 		case IPV6_RTHDR:
2885 		case IPV6_USE_MIN_MTU:
2886 		case IPV6_DONTFRAG:
2887 		case IPV6_OTCLASS:
2888 		case IPV6_TCLASS:
2889 		case IPV6_PREFER_TEMPADDR: /* XXX not an RFC3542 option */
2890 			return (ENOPROTOOPT);
2891 		}
2892 	}
2893 
2894 	switch (optname) {
2895 #ifdef RFC2292
2896 	case IPV6_2292PKTINFO:
2897 #endif
2898 	case IPV6_PKTINFO:
2899 	{
2900 		struct in6_pktinfo *pktinfo;
2901 
2902 		if (len != sizeof(struct in6_pktinfo))
2903 			return (EINVAL);
2904 
2905 		pktinfo = (struct in6_pktinfo *)buf;
2906 
2907 		/*
2908 		 * An application can clear any sticky IPV6_PKTINFO option by
2909 		 * doing a "regular" setsockopt with ipi6_addr being
2910 		 * in6addr_any and ipi6_ifindex being zero.
2911 		 * [RFC 3542, Section 6]
2912 		 */
2913 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
2914 		    pktinfo->ipi6_ifindex == 0 &&
2915 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2916 			ip6_clearpktopts(opt, optname);
2917 			break;
2918 		}
2919 
2920 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
2921 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
2922 			return (EINVAL);
2923 		}
2924 
2925 		/* Validate the interface index if specified. */
2926 		if (pktinfo->ipi6_ifindex) {
2927 			struct ifnet *ifp;
2928 			int s = pserialize_read_enter();
2929 			ifp = if_byindex(pktinfo->ipi6_ifindex);
2930 			if (ifp == NULL) {
2931 				pserialize_read_exit(s);
2932 				return ENXIO;
2933 			}
2934 			pserialize_read_exit(s);
2935 		}
2936 
2937 		/*
2938 		 * We store the address anyway, and let in6_selectsrc()
2939 		 * validate the specified address.  This is because ipi6_addr
2940 		 * may not have enough information about its scope zone, and
2941 		 * we may need additional information (such as outgoing
2942 		 * interface or the scope zone of a destination address) to
2943 		 * disambiguate the scope.
2944 		 * XXX: the delay of the validation may confuse the
2945 		 * application when it is used as a sticky option.
2946 		 */
2947 		if (opt->ip6po_pktinfo == NULL) {
2948 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
2949 			    M_IP6OPT, M_NOWAIT);
2950 			if (opt->ip6po_pktinfo == NULL)
2951 				return (ENOBUFS);
2952 		}
2953 		memcpy(opt->ip6po_pktinfo, pktinfo, sizeof(*pktinfo));
2954 		break;
2955 	}
2956 
2957 #ifdef RFC2292
2958 	case IPV6_2292HOPLIMIT:
2959 #endif
2960 	case IPV6_HOPLIMIT:
2961 	{
2962 		int *hlimp;
2963 
2964 		/*
2965 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
2966 		 * to simplify the ordering among hoplimit options.
2967 		 */
2968 		if (optname == IPV6_HOPLIMIT && sticky)
2969 			return (ENOPROTOOPT);
2970 
2971 		if (len != sizeof(int))
2972 			return (EINVAL);
2973 		hlimp = (int *)buf;
2974 		if (*hlimp < -1 || *hlimp > 255)
2975 			return (EINVAL);
2976 
2977 		opt->ip6po_hlim = *hlimp;
2978 		break;
2979 	}
2980 
2981 	case IPV6_OTCLASS:
2982 		if (len != sizeof(u_int8_t))
2983 			return (EINVAL);
2984 
2985 		opt->ip6po_tclass = *(u_int8_t *)buf;
2986 		break;
2987 
2988 	case IPV6_TCLASS:
2989 	{
2990 		int tclass;
2991 
2992 		if (len != sizeof(int))
2993 			return (EINVAL);
2994 		tclass = *(int *)buf;
2995 		if (tclass < -1 || tclass > 255)
2996 			return (EINVAL);
2997 
2998 		opt->ip6po_tclass = tclass;
2999 		break;
3000 	}
3001 
3002 #ifdef RFC2292
3003 	case IPV6_2292NEXTHOP:
3004 #endif
3005 	case IPV6_NEXTHOP:
3006 		error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
3007 		    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
3008 		if (error)
3009 			return (error);
3010 
3011 		if (len == 0) {	/* just remove the option */
3012 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
3013 			break;
3014 		}
3015 
3016 		/* check if cmsg_len is large enough for sa_len */
3017 		if (len < sizeof(struct sockaddr) || len < *buf)
3018 			return (EINVAL);
3019 
3020 		switch (((struct sockaddr *)buf)->sa_family) {
3021 		case AF_INET6:
3022 		{
3023 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3024 
3025 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3026 				return (EINVAL);
3027 
3028 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3029 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3030 				return (EINVAL);
3031 			}
3032 			if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3033 			    != 0) {
3034 				return (error);
3035 			}
3036 			break;
3037 		}
3038 		case AF_LINK:	/* eventually be supported? */
3039 		default:
3040 			return (EAFNOSUPPORT);
3041 		}
3042 
3043 		/* turn off the previous option, then set the new option. */
3044 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
3045 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
3046 		if (opt->ip6po_nexthop == NULL)
3047 			return (ENOBUFS);
3048 		memcpy(opt->ip6po_nexthop, buf, *buf);
3049 		break;
3050 
3051 #ifdef RFC2292
3052 	case IPV6_2292HOPOPTS:
3053 #endif
3054 	case IPV6_HOPOPTS:
3055 	{
3056 		struct ip6_hbh *hbh;
3057 		int hbhlen;
3058 
3059 		/*
3060 		 * XXX: We don't allow a non-privileged user to set ANY HbH
3061 		 * options, since per-option restriction has too much
3062 		 * overhead.
3063 		 */
3064 		error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
3065 		    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
3066 		if (error)
3067 			return (error);
3068 
3069 		if (len == 0) {
3070 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
3071 			break;	/* just remove the option */
3072 		}
3073 
3074 		/* message length validation */
3075 		if (len < sizeof(struct ip6_hbh))
3076 			return (EINVAL);
3077 		hbh = (struct ip6_hbh *)buf;
3078 		hbhlen = (hbh->ip6h_len + 1) << 3;
3079 		if (len != hbhlen)
3080 			return (EINVAL);
3081 
3082 		/* turn off the previous option, then set the new option. */
3083 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
3084 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
3085 		if (opt->ip6po_hbh == NULL)
3086 			return (ENOBUFS);
3087 		memcpy(opt->ip6po_hbh, hbh, hbhlen);
3088 
3089 		break;
3090 	}
3091 
3092 #ifdef RFC2292
3093 	case IPV6_2292DSTOPTS:
3094 #endif
3095 	case IPV6_DSTOPTS:
3096 	case IPV6_RTHDRDSTOPTS:
3097 	{
3098 		struct ip6_dest *dest, **newdest = NULL;
3099 		int destlen;
3100 
3101 		/* XXX: see the comment for IPV6_HOPOPTS */
3102 		error = kauth_authorize_network(cred, KAUTH_NETWORK_IPV6,
3103 		    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
3104 		if (error)
3105 			return (error);
3106 
3107 		if (len == 0) {
3108 			ip6_clearpktopts(opt, optname);
3109 			break;	/* just remove the option */
3110 		}
3111 
3112 		/* message length validation */
3113 		if (len < sizeof(struct ip6_dest))
3114 			return (EINVAL);
3115 		dest = (struct ip6_dest *)buf;
3116 		destlen = (dest->ip6d_len + 1) << 3;
3117 		if (len != destlen)
3118 			return (EINVAL);
3119 		/*
3120 		 * Determine the position that the destination options header
3121 		 * should be inserted; before or after the routing header.
3122 		 */
3123 		switch (optname) {
3124 		case IPV6_2292DSTOPTS:
3125 			/*
3126 			 * The old advanced API is ambiguous on this point.
3127 			 * Our approach is to determine the position based
3128 			 * according to the existence of a routing header.
3129 			 * Note, however, that this depends on the order of the
3130 			 * extension headers in the ancillary data; the 1st
3131 			 * part of the destination options header must appear
3132 			 * before the routing header in the ancillary data,
3133 			 * too.
3134 			 * RFC3542 solved the ambiguity by introducing
3135 			 * separate ancillary data or option types.
3136 			 */
3137 			if (opt->ip6po_rthdr == NULL)
3138 				newdest = &opt->ip6po_dest1;
3139 			else
3140 				newdest = &opt->ip6po_dest2;
3141 			break;
3142 		case IPV6_RTHDRDSTOPTS:
3143 			newdest = &opt->ip6po_dest1;
3144 			break;
3145 		case IPV6_DSTOPTS:
3146 			newdest = &opt->ip6po_dest2;
3147 			break;
3148 		}
3149 
3150 		/* turn off the previous option, then set the new option. */
3151 		ip6_clearpktopts(opt, optname);
3152 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
3153 		if (*newdest == NULL)
3154 			return (ENOBUFS);
3155 		memcpy(*newdest, dest, destlen);
3156 
3157 		break;
3158 	}
3159 
3160 #ifdef RFC2292
3161 	case IPV6_2292RTHDR:
3162 #endif
3163 	case IPV6_RTHDR:
3164 	{
3165 		struct ip6_rthdr *rth;
3166 		int rthlen;
3167 
3168 		if (len == 0) {
3169 			ip6_clearpktopts(opt, IPV6_RTHDR);
3170 			break;	/* just remove the option */
3171 		}
3172 
3173 		/* message length validation */
3174 		if (len < sizeof(struct ip6_rthdr))
3175 			return (EINVAL);
3176 		rth = (struct ip6_rthdr *)buf;
3177 		rthlen = (rth->ip6r_len + 1) << 3;
3178 		if (len != rthlen)
3179 			return (EINVAL);
3180 		switch (rth->ip6r_type) {
3181 		case IPV6_RTHDR_TYPE_0:
3182 			/* Dropped, RFC5095. */
3183 		default:
3184 			return (EINVAL);	/* not supported */
3185 		}
3186 		/* turn off the previous option */
3187 		ip6_clearpktopts(opt, IPV6_RTHDR);
3188 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
3189 		if (opt->ip6po_rthdr == NULL)
3190 			return (ENOBUFS);
3191 		memcpy(opt->ip6po_rthdr, rth, rthlen);
3192 		break;
3193 	}
3194 
3195 	case IPV6_USE_MIN_MTU:
3196 		if (len != sizeof(int))
3197 			return (EINVAL);
3198 		minmtupolicy = *(int *)buf;
3199 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3200 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
3201 		    minmtupolicy != IP6PO_MINMTU_ALL) {
3202 			return (EINVAL);
3203 		}
3204 		opt->ip6po_minmtu = minmtupolicy;
3205 		break;
3206 
3207 	case IPV6_DONTFRAG:
3208 		if (len != sizeof(int))
3209 			return (EINVAL);
3210 
3211 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3212 			/*
3213 			 * we ignore this option for TCP sockets.
3214 			 * (RFC3542 leaves this case unspecified.)
3215 			 */
3216 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3217 		} else
3218 			opt->ip6po_flags |= IP6PO_DONTFRAG;
3219 		break;
3220 
3221 	case IPV6_PREFER_TEMPADDR:
3222 	{
3223 		int preftemp;
3224 
3225 		if (len != sizeof(int))
3226 			return (EINVAL);
3227 		preftemp = *(int *)buf;
3228 		switch (preftemp) {
3229 		case IP6PO_TEMPADDR_SYSTEM:
3230 		case IP6PO_TEMPADDR_NOTPREFER:
3231 		case IP6PO_TEMPADDR_PREFER:
3232 			break;
3233 		default:
3234 			return (EINVAL);
3235 		}
3236 		opt->ip6po_prefer_tempaddr = preftemp;
3237 		break;
3238 	}
3239 
3240 	default:
3241 		return (ENOPROTOOPT);
3242 	} /* end of switch */
3243 
3244 	return (0);
3245 }
3246 
3247 /*
3248  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3249  * packet to the input queue of a specified interface.  Note that this
3250  * calls the output routine of the loopback "driver", but with an interface
3251  * pointer that might NOT be lo0ifp -- easier than replicating that code here.
3252  */
3253 void
3254 ip6_mloopback(struct ifnet *ifp, struct mbuf *m,
3255 	const struct sockaddr_in6 *dst)
3256 {
3257 	struct mbuf *copym;
3258 	struct ip6_hdr *ip6;
3259 
3260 	copym = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
3261 	if (copym == NULL)
3262 		return;
3263 
3264 	/*
3265 	 * Make sure to deep-copy IPv6 header portion in case the data
3266 	 * is in an mbuf cluster, so that we can safely override the IPv6
3267 	 * header portion later.
3268 	 */
3269 	if ((copym->m_flags & M_EXT) != 0 ||
3270 	    copym->m_len < sizeof(struct ip6_hdr)) {
3271 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
3272 		if (copym == NULL)
3273 			return;
3274 	}
3275 
3276 #ifdef DIAGNOSTIC
3277 	if (copym->m_len < sizeof(*ip6)) {
3278 		m_freem(copym);
3279 		return;
3280 	}
3281 #endif
3282 
3283 	ip6 = mtod(copym, struct ip6_hdr *);
3284 	/*
3285 	 * clear embedded scope identifiers if necessary.
3286 	 * in6_clearscope will touch the addresses only when necessary.
3287 	 */
3288 	in6_clearscope(&ip6->ip6_src);
3289 	in6_clearscope(&ip6->ip6_dst);
3290 
3291 	(void)looutput(ifp, copym, (const struct sockaddr *)dst, NULL);
3292 }
3293 
3294 /*
3295  * Chop IPv6 header off from the payload.
3296  */
3297 static int
3298 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3299 {
3300 	struct mbuf *mh;
3301 	struct ip6_hdr *ip6;
3302 
3303 	ip6 = mtod(m, struct ip6_hdr *);
3304 	if (m->m_len > sizeof(*ip6)) {
3305 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3306 		if (mh == NULL) {
3307 			m_freem(m);
3308 			return ENOBUFS;
3309 		}
3310 		M_MOVE_PKTHDR(mh, m);
3311 		MH_ALIGN(mh, sizeof(*ip6));
3312 		m->m_len -= sizeof(*ip6);
3313 		m->m_data += sizeof(*ip6);
3314 		mh->m_next = m;
3315 		mh->m_len = sizeof(*ip6);
3316 		memcpy(mtod(mh, void *), (void *)ip6, sizeof(*ip6));
3317 		m = mh;
3318 	}
3319 	exthdrs->ip6e_ip6 = m;
3320 	return 0;
3321 }
3322 
3323 /*
3324  * Compute IPv6 extension header length.
3325  */
3326 int
3327 ip6_optlen(struct in6pcb *in6p)
3328 {
3329 	int len;
3330 
3331 	if (!in6p->in6p_outputopts)
3332 		return 0;
3333 
3334 	len = 0;
3335 #define elen(x) \
3336     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3337 
3338 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
3339 	len += elen(in6p->in6p_outputopts->ip6po_dest1);
3340 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3341 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
3342 	return len;
3343 #undef elen
3344 }
3345 
3346 /*
3347  * Ensure sending address is valid.
3348  * Returns 0 on success, -1 if an error should be sent back or 1
3349  * if the packet could be dropped without error (protocol dependent).
3350  */
3351 static int
3352 ip6_ifaddrvalid(const struct in6_addr *src, const struct in6_addr *dst)
3353 {
3354 	struct sockaddr_in6 sin6;
3355 	int s, error;
3356 	struct ifaddr *ifa;
3357 	struct in6_ifaddr *ia6;
3358 
3359 	if (IN6_IS_ADDR_UNSPECIFIED(src))
3360 		return 0;
3361 
3362 	memset(&sin6, 0, sizeof(sin6));
3363 	sin6.sin6_family = AF_INET6;
3364 	sin6.sin6_len = sizeof(sin6);
3365 	sin6.sin6_addr = *src;
3366 
3367 	s = pserialize_read_enter();
3368 	ifa = ifa_ifwithaddr(sin6tosa(&sin6));
3369 	if ((ia6 = ifatoia6(ifa)) == NULL ||
3370 	    ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_DUPLICATED))
3371 		error = -1;
3372 	else if (ia6->ia6_flags & IN6_IFF_TENTATIVE)
3373 		error = 1;
3374 	else if (ia6->ia6_flags & IN6_IFF_DETACHED &&
3375 	    (sin6.sin6_addr = *dst, ifa_ifwithaddr(sin6tosa(&sin6)) == NULL))
3376 		/* Allow internal traffic to DETACHED addresses */
3377 		error = 1;
3378 	else
3379 		error = 0;
3380 	pserialize_read_exit(s);
3381 
3382 	return error;
3383 }
3384