xref: /openbsd-src/sys/netinet6/ip6_output.c (revision 8445c53715e7030056b779e8ab40efb7820981f2)
1 /*	$OpenBSD: ip6_output.c,v 1.50 2001/09/15 03:54:40 frantzen Exp $	*/
2 /*	$KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
66  */
67 
68 #include "pf.h"
69 
70 #include <sys/param.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/errno.h>
74 #include <sys/protosw.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/systm.h>
78 #include <sys/proc.h>
79 
80 #include <net/if.h>
81 #include <net/route.h>
82 
83 #include <netinet/in.h>
84 #include <netinet/in_var.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
87 #include <netinet/in_pcb.h>
88 
89 #include <netinet/ip6.h>
90 #include <netinet/icmp6.h>
91 #include <netinet6/ip6_var.h>
92 #include <netinet6/nd6.h>
93 
94 #if NPF > 0
95 #include <net/pfvar.h>
96 #endif
97 
98 #ifdef IPSEC
99 #include <netinet/ip_ah.h>
100 #include <netinet/ip_esp.h>
101 #include <netinet/udp.h>
102 #include <netinet/tcp.h>
103 #include <net/pfkeyv2.h>
104 
105 extern u_int8_t get_sa_require  __P((struct inpcb *));
106 
107 extern int ipsec_auth_default_level;
108 extern int ipsec_esp_trans_default_level;
109 extern int ipsec_esp_network_default_level;
110 extern int ipsec_ipcomp_default_level;
111 #endif /* IPSEC */
112 
113 struct ip6_exthdrs {
114 	struct mbuf *ip6e_ip6;
115 	struct mbuf *ip6e_hbh;
116 	struct mbuf *ip6e_dest1;
117 	struct mbuf *ip6e_rthdr;
118 	struct mbuf *ip6e_dest2;
119 };
120 
121 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
122 			    struct socket *));
123 static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
124 static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
125 static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
126 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
127 				  struct ip6_frag **));
128 static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
129 static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
130 
131 /*
132  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
133  * header (with pri, len, nxt, hlim, src, dst).
134  * This function may modify ver and hlim only.
135  * The mbuf chain containing the packet will be freed.
136  * The mbuf opt, if present, will not be freed.
137  *
138  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
139  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
140  * which is rt_rmx.rmx_mtu.
141  */
142 int
143 ip6_output(m0, opt, ro, flags, im6o, ifpp)
144 	struct mbuf *m0;
145 	struct ip6_pktopts *opt;
146 	struct route_in6 *ro;
147 	int flags;
148 	struct ip6_moptions *im6o;
149 	struct ifnet **ifpp;		/* XXX: just for statistics */
150 {
151 	struct ip6_hdr *ip6, *mhip6;
152 	struct ifnet *ifp, *origifp;
153 	struct mbuf *m = m0;
154 	int hlen, tlen, len, off;
155 	struct route_in6 ip6route;
156 	struct sockaddr_in6 *dst;
157 	int error = 0;
158 	struct in6_ifaddr *ia;
159 	u_long mtu;
160 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
161 	struct ip6_exthdrs exthdrs;
162 	struct in6_addr finaldst;
163 	struct route_in6 *ro_pmtu = NULL;
164 	int hdrsplit = 0;
165 	u_int8_t sproto = 0;
166 #ifdef IPSEC
167 	struct m_tag *mtag;
168 	union sockaddr_union sdst;
169 	struct tdb_ident *tdbi;
170 	u_int32_t sspi;
171 	struct inpcb *inp;
172 	struct tdb *tdb;
173 	int s;
174 #endif /* IPSEC */
175 
176 #ifdef IPSEC
177 	inp = NULL;	/*XXX*/
178 	if (inp && (inp->inp_flags & INP_IPV6) == 0)
179 		panic("ip6_output: IPv4 pcb is passed");
180 #endif /* IPSEC */
181 
182 #define MAKE_EXTHDR(hp, mp)						\
183     do {								\
184 	if (hp) {							\
185 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
186 		error = ip6_copyexthdr((mp), (caddr_t)(hp), 		\
187 				       ((eh)->ip6e_len + 1) << 3);	\
188 		if (error)						\
189 			goto freehdrs;					\
190 	}								\
191     } while (0)
192 
193 	bzero(&exthdrs, sizeof(exthdrs));
194 	if (opt) {
195 		/* Hop-by-Hop options header */
196 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
197 		/* Destination options header(1st part) */
198 		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
199 		/* Routing header */
200 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
201 		/* Destination options header(2nd part) */
202 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
203 	}
204 
205 #ifdef IPSEC
206 	/*
207 	 * splnet is chosen over spltdb because we are not allowed to
208 	 * lower the level, and udp6_output calls us in splnet(). XXX check
209 	 */
210 	s = splnet();
211 
212 	/*
213 	 * Check if there was an outgoing SA bound to the flow
214 	 * from a transport protocol.
215 	 */
216 	ip6 = mtod(m, struct ip6_hdr *);
217 
218 	/* Do we have any pending SAs to apply ? */
219 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
220 	if (mtag != NULL) {
221 #ifdef DIAGNOSTIC
222 		if (mtag->m_tag_len != sizeof (struct tdb_ident))
223 			panic("ip6_output: tag of length %d (should be %d",
224 			    mtag->m_tag_len, sizeof (struct tdb_ident));
225 #endif
226 		tdbi = (struct tdb_ident *)(mtag + 1);
227 		tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto);
228 		if (tdb == NULL)
229 			error = -EINVAL;
230 		m_tag_delete(m, mtag);
231 	}
232 	else
233 		tdb = ipsp_spd_lookup(m, AF_INET6, sizeof(struct ip6_hdr),
234 		    &error, IPSP_DIRECTION_OUT, NULL, inp);
235 
236 	if (tdb == NULL) {
237 	        splx(s);
238 
239 		if (error == 0) {
240 		        /*
241 			 * No IPsec processing required, we'll just send the
242 			 * packet out.
243 			 */
244 		        sproto = 0;
245 
246 			/* Fall through to routing/multicast handling */
247 		} else {
248 		        /*
249 			 * -EINVAL is used to indicate that the packet should
250 			 * be silently dropped, typically because we've asked
251 			 * key management for an SA.
252 			 */
253 		        if (error == -EINVAL) /* Should silently drop packet */
254 				error = 0;
255 
256 			goto freehdrs;
257 		}
258 	} else {
259 		/* Loop detection */
260 		for (mtag = m_tag_first(m); mtag != NULL;
261 		    mtag = m_tag_next(m, mtag)) {
262 			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
263 			    mtag->m_tag_id !=
264 			    PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
265 				continue;
266 			tdbi = (struct tdb_ident *)(mtag + 1);
267 			if (tdbi->spi == tdb->tdb_spi &&
268 			    tdbi->proto == tdb->tdb_sproto &&
269 			    !bcmp(&tdbi->dst, &tdb->tdb_dst,
270 			    sizeof(union sockaddr_union))) {
271 				splx(s);
272 				sproto = 0; /* mark as no-IPsec-needed */
273 				goto done_spd;
274 			}
275 		}
276 
277 	        /* We need to do IPsec */
278 	        bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst));
279 		sspi = tdb->tdb_spi;
280 		sproto = tdb->tdb_sproto;
281 	        splx(s);
282 
283 #if 1 /* XXX */
284 		/* if we have any extension header, we cannot perform IPsec */
285 		if (exthdrs.ip6e_hbh || exthdrs.ip6e_dest1 ||
286 		    exthdrs.ip6e_rthdr || exthdrs.ip6e_dest2) {
287 			error = EHOSTUNREACH;
288 			goto freehdrs;
289 		}
290 #endif
291 	}
292 
293 	/* Fall through to the routing/multicast handling code */
294  done_spd:
295 #endif /* IPSEC */
296 
297 	/*
298 	 * Calculate the total length of the extension header chain.
299 	 * Keep the length of the unfragmentable part for fragmentation.
300 	 */
301 	optlen = 0;
302 	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
303 	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
304 	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
305 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
306 	/* NOTE: we don't add AH/ESP length here. do that later. */
307 	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
308 
309 	/*
310 	 * If we need IPsec, or there is at least one extension header,
311 	 * separate IP6 header from the payload.
312 	 */
313 	if ((sproto || optlen) && !hdrsplit) {
314 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
315 			m = NULL;
316 			goto freehdrs;
317 		}
318 		m = exthdrs.ip6e_ip6;
319 		hdrsplit++;
320 	}
321 
322 	/* adjust pointer */
323 	ip6 = mtod(m, struct ip6_hdr *);
324 
325 	/* adjust mbuf packet header length */
326 	m->m_pkthdr.len += optlen;
327 	plen = m->m_pkthdr.len - sizeof(*ip6);
328 
329 	/* If this is a jumbo payload, insert a jumbo payload option. */
330 	if (plen > IPV6_MAXPACKET) {
331 		if (!hdrsplit) {
332 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
333 				m = NULL;
334 				goto freehdrs;
335 			}
336 			m = exthdrs.ip6e_ip6;
337 			hdrsplit++;
338 		}
339 		/* adjust pointer */
340 		ip6 = mtod(m, struct ip6_hdr *);
341 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
342 			goto freehdrs;
343 		ip6->ip6_plen = 0;
344 	} else
345 		ip6->ip6_plen = htons(plen);
346 
347 	/*
348 	 * Concatenate headers and fill in next header fields.
349 	 * Here we have, on "m"
350 	 *	IPv6 payload
351 	 * and we insert headers accordingly.  Finally, we should be getting:
352 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
353 	 *
354 	 * during the header composing process, "m" points to IPv6 header.
355 	 * "mprev" points to an extension header prior to esp.
356 	 */
357 	{
358 		u_char *nexthdrp = &ip6->ip6_nxt;
359 		struct mbuf *mprev = m;
360 
361 		/*
362 		 * we treat dest2 specially.  this makes IPsec processing
363 		 * much easier.
364 		 *
365 		 * result: IPv6 dest2 payload
366 		 * m and mprev will point to IPv6 header.
367 		 */
368 		if (exthdrs.ip6e_dest2) {
369 			if (!hdrsplit)
370 				panic("assumption failed: hdr not split");
371 			exthdrs.ip6e_dest2->m_next = m->m_next;
372 			m->m_next = exthdrs.ip6e_dest2;
373 			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
374 			ip6->ip6_nxt = IPPROTO_DSTOPTS;
375 		}
376 
377 #define MAKE_CHAIN(m, mp, p, i)\
378     do {\
379 	if (m) {\
380 		if (!hdrsplit) \
381 			panic("assumption failed: hdr not split"); \
382 		*mtod((m), u_char *) = *(p);\
383 		*(p) = (i);\
384 		p = mtod((m), u_char *);\
385 		(m)->m_next = (mp)->m_next;\
386 		(mp)->m_next = (m);\
387 		(mp) = (m);\
388 	}\
389     } while (0)
390 		/*
391 		 * result: IPv6 hbh dest1 rthdr dest2 payload
392 		 * m will point to IPv6 header.  mprev will point to the
393 		 * extension header prior to dest2 (rthdr in the above case).
394 		 */
395 		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev,
396 			   nexthdrp, IPPROTO_HOPOPTS);
397 		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev,
398 			   nexthdrp, IPPROTO_DSTOPTS);
399 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev,
400 			   nexthdrp, IPPROTO_ROUTING);
401 
402 #if 0 /*KAME IPSEC*/
403 		if (!needipsec)
404 			goto skip_ipsec2;
405 
406 		/*
407 		 * pointers after IPsec headers are not valid any more.
408 		 * other pointers need a great care too.
409 		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
410 		 */
411 		exthdrs.ip6e_dest2 = NULL;
412 
413 	    {
414 		struct ip6_rthdr *rh = NULL;
415 		int segleft_org = 0;
416 		struct ipsec_output_state state;
417 
418 		if (exthdrs.ip6e_rthdr) {
419 			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
420 			segleft_org = rh->ip6r_segleft;
421 			rh->ip6r_segleft = 0;
422 		}
423 
424 		bzero(&state, sizeof(state));
425 		state.m = m;
426 		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
427 			&needipsectun);
428 		m = state.m;
429 		if (error) {
430 			/* mbuf is already reclaimed in ipsec6_output_trans. */
431 			m = NULL;
432 			switch (error) {
433 			case EHOSTUNREACH:
434 			case ENETUNREACH:
435 			case EMSGSIZE:
436 			case ENOBUFS:
437 			case ENOMEM:
438 				break;
439 			default:
440 				printf("ip6_output (ipsec): error code %d\n", error);
441 				/*fall through*/
442 			case ENOENT:
443 				/* don't show these error codes to the user */
444 				error = 0;
445 				break;
446 			}
447 			goto bad;
448 		}
449 		if (exthdrs.ip6e_rthdr) {
450 			/* ah6_output doesn't modify mbuf chain */
451 			rh->ip6r_segleft = segleft_org;
452 		}
453 	    }
454 skip_ipsec2:;
455 #endif
456 	}
457 
458 	/*
459 	 * If there is a routing header, replace destination address field
460 	 * with the first hop of the routing header.
461 	 */
462 	if (exthdrs.ip6e_rthdr) {
463 		struct ip6_rthdr *rh =
464 			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
465 						  struct ip6_rthdr *));
466 		struct ip6_rthdr0 *rh0;
467 
468 		finaldst = ip6->ip6_dst;
469 		switch(rh->ip6r_type) {
470 		case IPV6_RTHDR_TYPE_0:
471 			 rh0 = (struct ip6_rthdr0 *)rh;
472 			 ip6->ip6_dst = rh0->ip6r0_addr[0];
473 			 bcopy((caddr_t)&rh0->ip6r0_addr[1],
474 				 (caddr_t)&rh0->ip6r0_addr[0],
475 				 sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
476 				 );
477 			 rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst;
478 			 break;
479 		default:	/* is it possible? */
480 			 error = EINVAL;
481 			 goto bad;
482 		}
483 	}
484 
485 	/* Source address validation */
486 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
487 	    (flags & IPV6_DADOUTPUT) == 0) {
488 		error = EOPNOTSUPP;
489 		ip6stat.ip6s_badscope++;
490 		goto bad;
491 	}
492 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
493 		error = EOPNOTSUPP;
494 		ip6stat.ip6s_badscope++;
495 		goto bad;
496 	}
497 
498 	ip6stat.ip6s_localout++;
499 
500 	/*
501 	 * Route packet.
502 	 */
503 	if (ro == 0) {
504 		ro = &ip6route;
505 		bzero((caddr_t)ro, sizeof(*ro));
506 	}
507 	ro_pmtu = ro;
508 	if (opt && opt->ip6po_rthdr)
509 		ro = &opt->ip6po_route;
510 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
511 	/*
512 	 * If there is a cached route,
513 	 * check that it is to the same destination
514 	 * and is still up. If not, free it and try again.
515 	 */
516 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
517 			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
518 		RTFREE(ro->ro_rt);
519 		ro->ro_rt = (struct rtentry *)0;
520 	}
521 	if (ro->ro_rt == 0) {
522 		bzero(dst, sizeof(*dst));
523 		dst->sin6_family = AF_INET6;
524 		dst->sin6_len = sizeof(struct sockaddr_in6);
525 		dst->sin6_addr = ip6->ip6_dst;
526 	}
527 #ifdef IPSEC
528 	/*
529 	 * Check if the packet needs encapsulation.
530 	 * ipsp_process_packet will never come back to here.
531 	 */
532 	if (sproto != 0) {
533 	        s = splnet();
534 
535 		/* fill in IPv6 header which would be filled later */
536 		if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
537 			if (opt && opt->ip6po_hlim != -1)
538 				ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
539 		} else {
540 			if (im6o != NULL)
541 				ip6->ip6_hlim = im6o->im6o_multicast_hlim;
542 			else
543 				ip6->ip6_hlim = ip6_defmcasthlim;
544 			if (opt && opt->ip6po_hlim != -1)
545 				ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
546 
547 			/*
548 			 * XXX what should we do if ip6_hlim == 0 and the
549 			 * packet gets tunnelled?
550 			 */
551 		}
552 
553 		tdb = gettdb(sspi, &sdst, sproto);
554 		if (tdb == NULL) {
555 			splx(s);
556 			error = EHOSTUNREACH;
557 			m_freem(m);
558 			goto done;
559 		}
560 
561 		/* Latch to PCB */
562 		if (inp)
563 		        tdb_add_inp(tdb, inp, 0);
564 
565 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
566 
567 		/* Callee frees mbuf */
568 		error = ipsp_process_packet(m, tdb, AF_INET6, 0);
569 		splx(s);
570 		return error;  /* Nothing more to be done */
571 	}
572 #endif /* IPSEC */
573 
574 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
575 		/* Unicast */
576 
577 #define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
578 #define sin6tosa(sin6)	((struct sockaddr *)(sin6))
579 		/* xxx
580 		 * interface selection comes here
581 		 * if an interface is specified from an upper layer,
582 		 * ifp must point it.
583 		 */
584 		if (ro->ro_rt == 0) {
585 			/*
586 			 * non-bsdi always clone routes, if parent is
587 			 * PRF_CLONING.
588 			 */
589 			rtalloc((struct route *)ro);
590 		}
591 		if (ro->ro_rt == 0) {
592 			ip6stat.ip6s_noroute++;
593 			error = EHOSTUNREACH;
594 			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
595 			goto bad;
596 		}
597 		ia = ifatoia6(ro->ro_rt->rt_ifa);
598 		ifp = ro->ro_rt->rt_ifp;
599 		ro->ro_rt->rt_use++;
600 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
601 			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
602 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
603 
604 		in6_ifstat_inc(ifp, ifs6_out_request);
605 
606 		/*
607 		 * Check if the outgoing interface conflicts with
608 		 * the interface specified by ifi6_ifindex (if specified).
609 		 * Note that loopback interface is always okay.
610 		 * (this may happen when we are sending a packet to one of
611 		 *  our own addresses.)
612 		 */
613 		if (opt && opt->ip6po_pktinfo
614 		 && opt->ip6po_pktinfo->ipi6_ifindex) {
615 			if (!(ifp->if_flags & IFF_LOOPBACK)
616 			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
617 				ip6stat.ip6s_noroute++;
618 				in6_ifstat_inc(ifp, ifs6_out_discard);
619 				error = EHOSTUNREACH;
620 				goto bad;
621 			}
622 		}
623 
624 		if (opt && opt->ip6po_hlim != -1)
625 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
626 	} else {
627 		/* Multicast */
628 		struct	in6_multi *in6m;
629 
630 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
631 
632 		/*
633 		 * See if the caller provided any multicast options
634 		 */
635 		ifp = NULL;
636 		if (im6o != NULL) {
637 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
638 			if (im6o->im6o_multicast_ifp != NULL)
639 				ifp = im6o->im6o_multicast_ifp;
640 		} else
641 			ip6->ip6_hlim = ip6_defmcasthlim;
642 
643 		/*
644 		 * See if the caller provided the outgoing interface
645 		 * as an ancillary data.
646 		 * Boundary check for ifindex is assumed to be already done.
647 		 */
648 		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
649 			ifp = ifindex2ifnet[opt->ip6po_pktinfo->ipi6_ifindex];
650 
651 		/*
652 		 * If the destination is a node-local scope multicast,
653 		 * the packet should be loop-backed only.
654 		 */
655 		if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) {
656 			/*
657 			 * If the outgoing interface is already specified,
658 			 * it should be a loopback interface.
659 			 */
660 			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
661 				ip6stat.ip6s_badscope++;
662 				error = ENETUNREACH; /* XXX: better error? */
663 				/* XXX correct ifp? */
664 				in6_ifstat_inc(ifp, ifs6_out_discard);
665 				goto bad;
666 			}
667 			else {
668 				ifp = lo0ifp;
669 			}
670 		}
671 
672 		if (opt && opt->ip6po_hlim != -1)
673 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
674 
675 		/*
676 		 * If caller did not provide an interface lookup a
677 		 * default in the routing table.  This is either a
678 		 * default for the speicfied group (i.e. a host
679 		 * route), or a multicast default (a route for the
680 		 * ``net'' ff00::/8).
681 		 */
682 		if (ifp == NULL) {
683 			if (ro->ro_rt == 0) {
684 				ro->ro_rt = rtalloc1((struct sockaddr *)
685 						&ro->ro_dst, 0);
686 			}
687 			if (ro->ro_rt == 0) {
688 				ip6stat.ip6s_noroute++;
689 				error = EHOSTUNREACH;
690 				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
691 				goto bad;
692 			}
693 			ia = ifatoia6(ro->ro_rt->rt_ifa);
694 			ifp = ro->ro_rt->rt_ifp;
695 			ro->ro_rt->rt_use++;
696 		}
697 
698 		if ((flags & IPV6_FORWARDING) == 0)
699 			in6_ifstat_inc(ifp, ifs6_out_request);
700 		in6_ifstat_inc(ifp, ifs6_out_mcast);
701 
702 		/*
703 		 * Confirm that the outgoing interface supports multicast.
704 		 */
705 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
706 			ip6stat.ip6s_noroute++;
707 			in6_ifstat_inc(ifp, ifs6_out_discard);
708 			error = ENETUNREACH;
709 			goto bad;
710 		}
711 		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
712 		if (in6m != NULL &&
713 		   (im6o == NULL || im6o->im6o_multicast_loop)) {
714 			/*
715 			 * If we belong to the destination multicast group
716 			 * on the outgoing interface, and the caller did not
717 			 * forbid loopback, loop back a copy.
718 			 */
719 			ip6_mloopback(ifp, m, dst);
720 		} else {
721 			/*
722 			 * If we are acting as a multicast router, perform
723 			 * multicast forwarding as if the packet had just
724 			 * arrived on the interface to which we are about
725 			 * to send.  The multicast forwarding function
726 			 * recursively calls this function, using the
727 			 * IPV6_FORWARDING flag to prevent infinite recursion.
728 			 *
729 			 * Multicasts that are looped back by ip6_mloopback(),
730 			 * above, will be forwarded by the ip6_input() routine,
731 			 * if necessary.
732 			 */
733 			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
734 				if (ip6_mforward(ip6, ifp, m) != 0) {
735 					m_freem(m);
736 					goto done;
737 				}
738 			}
739 		}
740 		/*
741 		 * Multicasts with a hoplimit of zero may be looped back,
742 		 * above, but must not be transmitted on a network.
743 		 * Also, multicasts addressed to the loopback interface
744 		 * are not sent -- the above call to ip6_mloopback() will
745 		 * loop back a copy if this host actually belongs to the
746 		 * destination group on the loopback interface.
747 		 */
748 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) {
749 			m_freem(m);
750 			goto done;
751 		}
752 	}
753 
754 	/*
755 	 * Fill the outgoing inteface to tell the upper layer
756 	 * to increment per-interface statistics.
757 	 */
758 	if (ifpp)
759 		*ifpp = ifp;
760 
761 	/*
762 	 * Determine path MTU.
763 	 */
764 	if (ro_pmtu != ro) {
765 		/* The first hop and the final destination may differ. */
766 		struct sockaddr_in6 *sin6_fin =
767 			(struct sockaddr_in6 *)&ro_pmtu->ro_dst;
768 		if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
769 				       !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr,
770 							   &finaldst))) {
771 			RTFREE(ro_pmtu->ro_rt);
772 			ro_pmtu->ro_rt = (struct rtentry *)0;
773 		}
774 		if (ro_pmtu->ro_rt == 0) {
775 			bzero(sin6_fin, sizeof(*sin6_fin));
776 			sin6_fin->sin6_family = AF_INET6;
777 			sin6_fin->sin6_len = sizeof(struct sockaddr_in6);
778 			sin6_fin->sin6_addr = finaldst;
779 
780 			rtalloc((struct route *)ro_pmtu);
781 		}
782 	}
783 	if (ro_pmtu->ro_rt != NULL) {
784 		u_int32_t ifmtu = nd_ifinfo[ifp->if_index].linkmtu;
785 
786 		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
787 		if (mtu > ifmtu || mtu == 0) {
788 			/*
789 			 * The MTU on the route is larger than the MTU on
790 			 * the interface!  This shouldn't happen, unless the
791 			 * MTU of the interface has been changed after the
792 			 * interface was brought up.  Change the MTU in the
793 			 * route to match the interface MTU (as long as the
794 			 * field isn't locked).
795 			 *
796 			 * if MTU on the route is 0, we need to fix the MTU.
797 			 * this case happens with path MTU discovery timeouts.
798 			 */
799 			 mtu = ifmtu;
800 			 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
801 				 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
802 		}
803 	} else {
804 		mtu = nd_ifinfo[ifp->if_index].linkmtu;
805 	}
806 
807 	/* Fake scoped addresses */
808 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
809 		/*
810 		 * If source or destination address is a scoped address, and
811 		 * the packet is going to be sent to a loopback interface,
812 		 * we should keep the original interface.
813 		 */
814 
815 		/*
816 		 * XXX: this is a very experimental and temporary solution.
817 		 * We eventually have sockaddr_in6 and use the sin6_scope_id
818 		 * field of the structure here.
819 		 * We rely on the consistency between two scope zone ids
820 		 * of source add destination, which should already be assured
821 		 * Larger scopes than link will be supported in the near
822 		 * future.
823 		 */
824 		origifp = NULL;
825 		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
826 			origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])];
827 		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
828 			origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])];
829 		/*
830 		 * XXX: origifp can be NULL even in those two cases above.
831 		 * For example, if we remove the (only) link-local address
832 		 * from the loopback interface, and try to send a link-local
833 		 * address without link-id information.  Then the source
834 		 * address is ::1, and the destination address is the
835 		 * link-local address with its s6_addr16[1] being zero.
836 		 * What is worse, if the packet goes to the loopback interface
837 		 * by a default rejected route, the null pointer would be
838 		 * passed to looutput, and the kernel would hang.
839 		 * The following last resort would prevent such disaster.
840 		 */
841 		if (origifp == NULL)
842 			origifp = ifp;
843 	}
844 	else
845 		origifp = ifp;
846 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
847 		ip6->ip6_src.s6_addr16[1] = 0;
848 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
849 		ip6->ip6_dst.s6_addr16[1] = 0;
850 
851 	/*
852 	 * If the outgoing packet contains a hop-by-hop options header,
853 	 * it must be examined and processed even by the source node.
854 	 * (RFC 2460, section 4.)
855 	 */
856 	if (exthdrs.ip6e_hbh) {
857 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh,
858 					   struct ip6_hbh *);
859 		u_int32_t dummy1; /* XXX unused */
860 		u_int32_t dummy2; /* XXX unused */
861 
862 		/*
863 		 *  XXX: if we have to send an ICMPv6 error to the sender,
864 		 *       we need the M_LOOP flag since icmp6_error() expects
865 		 *       the IPv6 and the hop-by-hop options header are
866 		 *       continuous unless the flag is set.
867 		 */
868 		m->m_flags |= M_LOOP;
869 		m->m_pkthdr.rcvif = ifp;
870 		if (ip6_process_hopopts(m,
871 					(u_int8_t *)(hbh + 1),
872 					((hbh->ip6h_len + 1) << 3) -
873 					sizeof(struct ip6_hbh),
874 					&dummy1, &dummy2) < 0) {
875 			/* m was already freed at this point */
876 			error = EINVAL;/* better error? */
877 			goto done;
878 		}
879 		m->m_flags &= ~M_LOOP; /* XXX */
880 		m->m_pkthdr.rcvif = NULL;
881 	}
882 
883 #if NPF > 0
884         if (pf_test6(PF_OUT, ifp, &m) != PF_PASS) {
885                 error = EHOSTUNREACH;
886                 goto done;
887         }
888 #endif
889 
890 	/*
891 	 * Send the packet to the outgoing interface.
892 	 * If necessary, do IPv6 fragmentation before sending.
893 	 */
894 	tlen = m->m_pkthdr.len;
895 	if (tlen <= mtu
896 #ifdef notyet
897 	    /*
898 	     * On any link that cannot convey a 1280-octet packet in one piece,
899 	     * link-specific fragmentation and reassembly must be provided at
900 	     * a layer below IPv6. [RFC 2460, sec.5]
901 	     * Thus if the interface has ability of link-level fragmentation,
902 	     * we can just send the packet even if the packet size is
903 	     * larger than the link's MTU.
904 	     * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet...
905 	     */
906 
907 	    || ifp->if_flags & IFF_FRAGMENTABLE
908 #endif
909 	    )
910 	{
911 #ifdef OLDIP6OUTPUT
912 		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst,
913 					  ro->ro_rt);
914 #else
915 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
916 #endif
917 		goto done;
918 	} else if (mtu < IPV6_MMTU) {
919 		/*
920 		 * note that path MTU is never less than IPV6_MMTU
921 		 * (see icmp6_input).
922 		 */
923 		error = EMSGSIZE;
924 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
925 		goto bad;
926 	} else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */
927 		error = EMSGSIZE;
928 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
929 		goto bad;
930 	} else {
931 		struct mbuf **mnext, *m_frgpart;
932 		struct ip6_frag *ip6f;
933 		u_int32_t id = htonl(ip6_id++);
934 		u_char nextproto;
935 
936 		/*
937 		 * Too large for the destination or interface;
938 		 * fragment if possible.
939 		 * Must be able to put at least 8 bytes per fragment.
940 		 */
941 		hlen = unfragpartlen;
942 		if (mtu > IPV6_MAXPACKET)
943 			mtu = IPV6_MAXPACKET;
944 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
945 		if (len < 8) {
946 			error = EMSGSIZE;
947 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
948 			goto bad;
949 		}
950 
951 		mnext = &m->m_nextpkt;
952 
953 		/*
954 		 * Change the next header field of the last header in the
955 		 * unfragmentable part.
956 		 */
957 		if (exthdrs.ip6e_rthdr) {
958 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
959 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
960 		} else if (exthdrs.ip6e_dest1) {
961 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
962 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
963 		} else if (exthdrs.ip6e_hbh) {
964 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
965 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
966 		} else {
967 			nextproto = ip6->ip6_nxt;
968 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
969 		}
970 
971 		/*
972 		 * Loop through length of segment after first fragment,
973 		 * make new header and copy data of each part and link onto chain.
974 		 */
975 		m0 = m;
976 		for (off = hlen; off < tlen; off += len) {
977 			MGETHDR(m, M_DONTWAIT, MT_HEADER);
978 			if (!m) {
979 				error = ENOBUFS;
980 				ip6stat.ip6s_odropped++;
981 				goto sendorfree;
982 			}
983 			m->m_flags = m0->m_flags & M_COPYFLAGS;
984 			*mnext = m;
985 			mnext = &m->m_nextpkt;
986 			m->m_data += max_linkhdr;
987 			mhip6 = mtod(m, struct ip6_hdr *);
988 			*mhip6 = *ip6;
989 			m->m_len = sizeof(*mhip6);
990  			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
991  			if (error) {
992 				ip6stat.ip6s_odropped++;
993 				goto sendorfree;
994 			}
995 			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
996 			if (off + len >= tlen)
997 				len = tlen - off;
998 			else
999 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1000 			mhip6->ip6_plen = htons((u_short)(len + hlen +
1001 							  sizeof(*ip6f) -
1002 							  sizeof(struct ip6_hdr)));
1003 			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1004 				error = ENOBUFS;
1005 				ip6stat.ip6s_odropped++;
1006 				goto sendorfree;
1007 			}
1008 			m_cat(m, m_frgpart);
1009 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1010 			m->m_pkthdr.rcvif = (struct ifnet *)0;
1011 			ip6f->ip6f_reserved = 0;
1012 			ip6f->ip6f_ident = id;
1013 			ip6f->ip6f_nxt = nextproto;
1014 			ip6stat.ip6s_ofragments++;
1015 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1016 		}
1017 
1018 		in6_ifstat_inc(ifp, ifs6_out_fragok);
1019 	}
1020 
1021 	/*
1022 	 * Remove leading garbages.
1023 	 */
1024 sendorfree:
1025 	m = m0->m_nextpkt;
1026 	m0->m_nextpkt = 0;
1027 	m_freem(m0);
1028 	for (m0 = m; m; m = m0) {
1029 		m0 = m->m_nextpkt;
1030 		m->m_nextpkt = 0;
1031 		if (error == 0) {
1032 #ifdef OLDIP6OUTPUT
1033 			error = (*ifp->if_output)(ifp, m,
1034 						  (struct sockaddr *)dst,
1035 						  ro->ro_rt);
1036 #else
1037 			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1038 #endif
1039 		} else
1040 			m_freem(m);
1041 	}
1042 
1043 	if (error == 0)
1044 		ip6stat.ip6s_fragmented++;
1045 
1046 done:
1047 	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1048 		RTFREE(ro->ro_rt);
1049 	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1050 		RTFREE(ro_pmtu->ro_rt);
1051 	}
1052 
1053 	return(error);
1054 
1055 freehdrs:
1056 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1057 	m_freem(exthdrs.ip6e_dest1);
1058 	m_freem(exthdrs.ip6e_rthdr);
1059 	m_freem(exthdrs.ip6e_dest2);
1060 	/* fall through */
1061 bad:
1062 	m_freem(m);
1063 	goto done;
1064 }
1065 
1066 static int
1067 ip6_copyexthdr(mp, hdr, hlen)
1068 	struct mbuf **mp;
1069 	caddr_t hdr;
1070 	int hlen;
1071 {
1072 	struct mbuf *m;
1073 
1074 	if (hlen > MCLBYTES)
1075 		return(ENOBUFS); /* XXX */
1076 
1077 	MGET(m, M_DONTWAIT, MT_DATA);
1078 	if (!m)
1079 		return(ENOBUFS);
1080 
1081 	if (hlen > MLEN) {
1082 		MCLGET(m, M_DONTWAIT);
1083 		if ((m->m_flags & M_EXT) == 0) {
1084 			m_free(m);
1085 			return(ENOBUFS);
1086 		}
1087 	}
1088 	m->m_len = hlen;
1089 	if (hdr)
1090 		bcopy(hdr, mtod(m, caddr_t), hlen);
1091 
1092 	*mp = m;
1093 	return(0);
1094 }
1095 
1096 /*
1097  * Insert jumbo payload option.
1098  */
1099 static int
1100 ip6_insert_jumboopt(exthdrs, plen)
1101 	struct ip6_exthdrs *exthdrs;
1102 	u_int32_t plen;
1103 {
1104 	struct mbuf *mopt;
1105 	u_char *optbuf;
1106 	u_int32_t v;
1107 
1108 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1109 
1110 	/*
1111 	 * If there is no hop-by-hop options header, allocate new one.
1112 	 * If there is one but it doesn't have enough space to store the
1113 	 * jumbo payload option, allocate a cluster to store the whole options.
1114 	 * Otherwise, use it to store the options.
1115 	 */
1116 	if (exthdrs->ip6e_hbh == 0) {
1117 		MGET(mopt, M_DONTWAIT, MT_DATA);
1118 		if (mopt == 0)
1119 			return(ENOBUFS);
1120 		mopt->m_len = JUMBOOPTLEN;
1121 		optbuf = mtod(mopt, u_char *);
1122 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1123 		exthdrs->ip6e_hbh = mopt;
1124 	} else {
1125 		struct ip6_hbh *hbh;
1126 
1127 		mopt = exthdrs->ip6e_hbh;
1128 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1129 			/*
1130 			 * XXX assumption:
1131 			 * - exthdrs->ip6e_hbh is not referenced from places
1132 			 *   other than exthdrs.
1133 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1134 			 */
1135 			int oldoptlen = mopt->m_len;
1136 			struct mbuf *n;
1137 
1138 			/*
1139 			 * XXX: give up if the whole (new) hbh header does
1140 			 * not fit even in an mbuf cluster.
1141 			 */
1142 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1143 				return(ENOBUFS);
1144 
1145 			/*
1146 			 * As a consequence, we must always prepare a cluster
1147 			 * at this point.
1148 			 */
1149 			MGET(n, M_DONTWAIT, MT_DATA);
1150 			if (n) {
1151 				MCLGET(n, M_DONTWAIT);
1152 				if ((n->m_flags & M_EXT) == 0) {
1153 					m_freem(n);
1154 					n = NULL;
1155 				}
1156 			}
1157 			if (!n)
1158 				return(ENOBUFS);
1159 			n->m_len = oldoptlen + JUMBOOPTLEN;
1160 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1161 			      oldoptlen);
1162 			optbuf = mtod(n, caddr_t) + oldoptlen;
1163 			m_freem(mopt);
1164 			mopt = exthdrs->ip6e_hbh = n;
1165 		} else {
1166 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1167 			mopt->m_len += JUMBOOPTLEN;
1168 		}
1169 		optbuf[0] = IP6OPT_PADN;
1170 		optbuf[1] = 1;
1171 
1172 		/*
1173 		 * Adjust the header length according to the pad and
1174 		 * the jumbo payload option.
1175 		 */
1176 		hbh = mtod(mopt, struct ip6_hbh *);
1177 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1178 	}
1179 
1180 	/* fill in the option. */
1181 	optbuf[2] = IP6OPT_JUMBO;
1182 	optbuf[3] = 4;
1183 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1184 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1185 
1186 	/* finally, adjust the packet header length */
1187 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1188 
1189 	return(0);
1190 #undef JUMBOOPTLEN
1191 }
1192 
1193 /*
1194  * Insert fragment header and copy unfragmentable header portions.
1195  */
1196 static int
1197 ip6_insertfraghdr(m0, m, hlen, frghdrp)
1198 	struct mbuf *m0, *m;
1199 	int hlen;
1200 	struct ip6_frag **frghdrp;
1201 {
1202 	struct mbuf *n, *mlast;
1203 
1204 	if (hlen > sizeof(struct ip6_hdr)) {
1205 		n = m_copym(m0, sizeof(struct ip6_hdr),
1206 			    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1207 		if (n == 0)
1208 			return(ENOBUFS);
1209 		m->m_next = n;
1210 	} else
1211 		n = m;
1212 
1213 	/* Search for the last mbuf of unfragmentable part. */
1214 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1215 		;
1216 
1217 	if ((mlast->m_flags & M_EXT) == 0 &&
1218 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1219 		/* use the trailing space of the last mbuf for the fragment hdr */
1220 		*frghdrp =
1221 			(struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len);
1222 		mlast->m_len += sizeof(struct ip6_frag);
1223 		m->m_pkthdr.len += sizeof(struct ip6_frag);
1224 	} else {
1225 		/* allocate a new mbuf for the fragment header */
1226 		struct mbuf *mfrg;
1227 
1228 		MGET(mfrg, M_DONTWAIT, MT_DATA);
1229 		if (mfrg == 0)
1230 			return(ENOBUFS);
1231 		mfrg->m_len = sizeof(struct ip6_frag);
1232 		*frghdrp = mtod(mfrg, struct ip6_frag *);
1233 		mlast->m_next = mfrg;
1234 	}
1235 
1236 	return(0);
1237 }
1238 
1239 /*
1240  * IP6 socket option processing.
1241  */
1242 int
1243 ip6_ctloutput(op, so, level, optname, mp)
1244 	int op;
1245 	struct socket *so;
1246 	int level, optname;
1247 	struct mbuf **mp;
1248 {
1249 	int privileged;
1250 	struct inpcb *inp = sotoinpcb(so);
1251 	struct mbuf *m = *mp;
1252 	int error, optval;
1253 	int optlen;
1254 #ifdef IPSEC
1255 	struct proc *p = curproc; /* XXX */
1256 	struct tdb *tdb;
1257 	struct tdb_ident *tdbip, tdbi;
1258 	int s;
1259 #endif
1260 
1261 	optlen = m ? m->m_len : 0;
1262 	error = optval = 0;
1263 
1264 	privileged = (inp->inp_socket->so_state & SS_PRIV);
1265 
1266 	if (level == IPPROTO_IPV6) {
1267 		switch (op) {
1268 		case PRCO_SETOPT:
1269 			switch (optname) {
1270 			case IPV6_PKTOPTIONS:
1271 				/* m is freed in ip6_pcbopts */
1272 				return(ip6_pcbopts(&inp->inp_outputopts6,
1273 						   m, so));
1274 			case IPV6_HOPOPTS:
1275 			case IPV6_DSTOPTS:
1276 				if (!privileged) {
1277 					error = EPERM;
1278 					break;
1279 				}
1280 				/* fall through */
1281 			case IPV6_UNICAST_HOPS:
1282 			case IPV6_RECVOPTS:
1283 			case IPV6_RECVRETOPTS:
1284 			case IPV6_RECVDSTADDR:
1285 			case IPV6_PKTINFO:
1286 			case IPV6_HOPLIMIT:
1287 			case IPV6_RTHDR:
1288 			case IPV6_CHECKSUM:
1289 			case IPV6_FAITH:
1290 				if (optlen != sizeof(int))
1291 					error = EINVAL;
1292 				else {
1293 					optval = *mtod(m, int *);
1294 					switch (optname) {
1295 
1296 					case IPV6_UNICAST_HOPS:
1297 						if (optval < -1 || optval >= 256)
1298 							error = EINVAL;
1299 						else {
1300 							/* -1 = kernel default */
1301 							inp->inp_hops = optval;
1302 						}
1303 						break;
1304 #define OPTSET(bit) \
1305 	if (optval) \
1306 		inp->inp_flags |= bit; \
1307 	else \
1308 		inp->inp_flags &= ~bit;
1309 					case IPV6_RECVOPTS:
1310 						OPTSET(IN6P_RECVOPTS);
1311 						break;
1312 
1313 					case IPV6_RECVRETOPTS:
1314 						OPTSET(IN6P_RECVRETOPTS);
1315 						break;
1316 
1317 					case IPV6_RECVDSTADDR:
1318 						OPTSET(IN6P_RECVDSTADDR);
1319 						break;
1320 
1321 					case IPV6_PKTINFO:
1322 						OPTSET(IN6P_PKTINFO);
1323 						break;
1324 
1325 					case IPV6_HOPLIMIT:
1326 						OPTSET(IN6P_HOPLIMIT);
1327 						break;
1328 
1329 					case IPV6_HOPOPTS:
1330 						OPTSET(IN6P_HOPOPTS);
1331 						break;
1332 
1333 					case IPV6_DSTOPTS:
1334 						OPTSET(IN6P_DSTOPTS);
1335 						break;
1336 
1337 					case IPV6_RTHDR:
1338 						OPTSET(IN6P_RTHDR);
1339 						break;
1340 
1341 					case IPV6_CHECKSUM:
1342 						inp->in6p_cksum = optval;
1343 						break;
1344 
1345 					case IPV6_FAITH:
1346 						OPTSET(IN6P_FAITH);
1347 						break;
1348 					}
1349 				}
1350 				break;
1351 #undef OPTSET
1352 
1353 			case IPV6_MULTICAST_IF:
1354 			case IPV6_MULTICAST_HOPS:
1355 			case IPV6_MULTICAST_LOOP:
1356 			case IPV6_JOIN_GROUP:
1357 			case IPV6_LEAVE_GROUP:
1358 				error =	ip6_setmoptions(optname,
1359 					&inp->inp_moptions6, m);
1360 				break;
1361 
1362 			case IPV6_PORTRANGE:
1363 				optval = *mtod(m, int *);
1364 
1365 # define in6p		inp
1366 # define in6p_flags	inp_flags
1367 				switch (optval) {
1368 				case IPV6_PORTRANGE_DEFAULT:
1369 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1370 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1371 					break;
1372 
1373 				case IPV6_PORTRANGE_HIGH:
1374 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1375 					in6p->in6p_flags |= IN6P_HIGHPORT;
1376 					break;
1377 
1378 				case IPV6_PORTRANGE_LOW:
1379 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1380 					in6p->in6p_flags |= IN6P_LOWPORT;
1381 					break;
1382 
1383 				default:
1384 					error = EINVAL;
1385 					break;
1386 				}
1387 # undef in6p
1388 # undef in6p_flags
1389 				break;
1390 
1391 #if 0 /*KAME IPSEC*/
1392 			case IPV6_IPSEC_POLICY:
1393 			    {
1394 				caddr_t req = NULL;
1395 				if (m != 0)
1396 					req = mtod(m, caddr_t);
1397 				error = ipsec6_set_policy(in6p, optname, req,
1398 				                          privileged);
1399 			    }
1400 				break;
1401 #endif /* IPSEC */
1402 
1403 			case IPSEC6_OUTSA:
1404 #ifndef IPSEC
1405 				error = EINVAL;
1406 #else
1407 				s = spltdb();
1408 				if (m == 0 || m->m_len != sizeof(struct tdb_ident)) {
1409 					error = EINVAL;
1410 				} else {
1411 					tdbip = mtod(m, struct tdb_ident *);
1412 					tdb = gettdb(tdbip->spi, &tdbip->dst,
1413 						     tdbip->proto);
1414 					if (tdb == NULL)
1415 						error = ESRCH;
1416 					else
1417 						tdb_add_inp(tdb, inp, 0);
1418 				}
1419 				splx(s);
1420 #endif /* IPSEC */
1421 				break;
1422 
1423 			case IPV6_AUTH_LEVEL:
1424 			case IPV6_ESP_TRANS_LEVEL:
1425 			case IPV6_ESP_NETWORK_LEVEL:
1426 			case IPV6_IPCOMP_LEVEL:
1427 #ifndef IPSEC
1428 				error = EINVAL;
1429 #else
1430 				if (m == 0 || m->m_len != sizeof(int)) {
1431 					error = EINVAL;
1432 					break;
1433 				}
1434 				optval = *mtod(m, int *);
1435 
1436 				if (optval < IPSEC_LEVEL_BYPASS ||
1437 				    optval > IPSEC_LEVEL_UNIQUE) {
1438 					error = EINVAL;
1439 					break;
1440 				}
1441 
1442 				switch (optname) {
1443 				case IPV6_AUTH_LEVEL:
1444 				        if (optval < ipsec_auth_default_level &&
1445 					    suser(p->p_ucred, &p->p_acflag)) {
1446 						error = EACCES;
1447 						break;
1448 					}
1449 					inp->inp_seclevel[SL_AUTH] = optval;
1450 					break;
1451 
1452 				case IPV6_ESP_TRANS_LEVEL:
1453 				        if (optval < ipsec_esp_trans_default_level &&
1454 					    suser(p->p_ucred, &p->p_acflag)) {
1455 						error = EACCES;
1456 						break;
1457 					}
1458 					inp->inp_seclevel[SL_ESP_TRANS] = optval;
1459 					break;
1460 
1461 				case IPV6_ESP_NETWORK_LEVEL:
1462 				        if (optval < ipsec_esp_network_default_level &&
1463 					    suser(p->p_ucred, &p->p_acflag)) {
1464 						error = EACCES;
1465 						break;
1466 					}
1467 					inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1468 					break;
1469 
1470 				case IPV6_IPCOMP_LEVEL:
1471 				        if (optval < ipsec_ipcomp_default_level &&
1472 					    suser(p->p_ucred, &p->p_acflag)) {
1473 						error = EACCES;
1474 						break;
1475 					}
1476 					inp->inp_seclevel[SL_IPCOMP] = optval;
1477 					break;
1478 				}
1479 				if (!error)
1480 					inp->inp_secrequire = get_sa_require(inp);
1481 #endif
1482 				break;
1483 
1484 
1485 			default:
1486 				error = ENOPROTOOPT;
1487 				break;
1488 			}
1489 			if (m)
1490 				(void)m_free(m);
1491 			break;
1492 
1493 		case PRCO_GETOPT:
1494 			switch (optname) {
1495 
1496 			case IPV6_OPTIONS:
1497 			case IPV6_RETOPTS:
1498 #if 0
1499 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1500 				if (in6p->in6p_options) {
1501 					m->m_len = in6p->in6p_options->m_len;
1502 					bcopy(mtod(in6p->in6p_options, caddr_t),
1503 					      mtod(m, caddr_t),
1504 					      (unsigned)m->m_len);
1505 				} else
1506 					m->m_len = 0;
1507 				break;
1508 #else
1509 				error = ENOPROTOOPT;
1510 				break;
1511 #endif
1512 
1513 			case IPV6_PKTOPTIONS:
1514 				if (inp->inp_options) {
1515 					*mp = m_copym(inp->inp_options, 0,
1516 						      M_COPYALL, M_WAIT);
1517 				} else {
1518 					*mp = m_get(M_WAIT, MT_SOOPTS);
1519 					(*mp)->m_len = 0;
1520 				}
1521 				break;
1522 
1523 			case IPV6_HOPOPTS:
1524 			case IPV6_DSTOPTS:
1525 				if (!privileged) {
1526 					error = EPERM;
1527 					break;
1528 				}
1529 				/* fall through */
1530 			case IPV6_UNICAST_HOPS:
1531 			case IPV6_RECVOPTS:
1532 			case IPV6_RECVRETOPTS:
1533 			case IPV6_RECVDSTADDR:
1534 			case IPV6_PKTINFO:
1535 			case IPV6_HOPLIMIT:
1536 			case IPV6_RTHDR:
1537 			case IPV6_CHECKSUM:
1538 			case IPV6_FAITH:
1539 			case IPV6_PORTRANGE:
1540 				switch (optname) {
1541 
1542 				case IPV6_UNICAST_HOPS:
1543 					optval = inp->inp_hops;
1544 					break;
1545 
1546 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1547 
1548 				case IPV6_RECVOPTS:
1549 					optval = OPTBIT(IN6P_RECVOPTS);
1550 					break;
1551 
1552 				case IPV6_RECVRETOPTS:
1553 					optval = OPTBIT(IN6P_RECVRETOPTS);
1554 					break;
1555 
1556 				case IPV6_RECVDSTADDR:
1557 					optval = OPTBIT(IN6P_RECVDSTADDR);
1558 					break;
1559 
1560 				case IPV6_PKTINFO:
1561 					optval = OPTBIT(IN6P_PKTINFO);
1562 					break;
1563 
1564 				case IPV6_HOPLIMIT:
1565 					optval = OPTBIT(IN6P_HOPLIMIT);
1566 					break;
1567 
1568 				case IPV6_HOPOPTS:
1569 					optval = OPTBIT(IN6P_HOPOPTS);
1570 					break;
1571 
1572 				case IPV6_DSTOPTS:
1573 					optval = OPTBIT(IN6P_DSTOPTS);
1574 					break;
1575 
1576 				case IPV6_RTHDR:
1577 					optval = OPTBIT(IN6P_RTHDR);
1578 					break;
1579 
1580 				case IPV6_CHECKSUM:
1581 					optval = inp->in6p_cksum;
1582 					break;
1583 
1584 				case IPV6_FAITH:
1585 					optval = OPTBIT(IN6P_FAITH);
1586 					break;
1587 
1588 				case IPV6_PORTRANGE:
1589 				    {
1590 					int flags;
1591 
1592 					flags = inp->inp_flags;
1593 					if (flags & IN6P_HIGHPORT)
1594 						optval = IPV6_PORTRANGE_HIGH;
1595 					else if (flags & IN6P_LOWPORT)
1596 						optval = IPV6_PORTRANGE_LOW;
1597 					else
1598 						optval = 0;
1599 					break;
1600 				    }
1601 				}
1602 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1603 				m->m_len = sizeof(int);
1604 				*mtod(m, int *) = optval;
1605 				break;
1606 
1607 			case IPV6_MULTICAST_IF:
1608 			case IPV6_MULTICAST_HOPS:
1609 			case IPV6_MULTICAST_LOOP:
1610 			case IPV6_JOIN_GROUP:
1611 			case IPV6_LEAVE_GROUP:
1612 				error = ip6_getmoptions(optname, inp->inp_moptions6, mp);
1613 				break;
1614 
1615 #if 0 /*KAME IPSEC*/
1616 			case IPV6_IPSEC_POLICY:
1617 			  {
1618 				caddr_t req = NULL;
1619 				int len = 0;
1620 
1621 				if (m != 0) {
1622 					req = mtod(m, caddr_t);
1623 					len = m->m_len;
1624 				}
1625 				error = ipsec6_get_policy(in6p, req, mp);
1626 				break;
1627 			  }
1628 #endif /* IPSEC */
1629 
1630 			case IPSEC6_OUTSA:
1631 #ifndef IPSEC
1632 				error = EINVAL;
1633 #else
1634 				s = spltdb();
1635 				if (inp->inp_tdb_out == NULL) {
1636 					error = ENOENT;
1637 				} else {
1638 					tdbi.spi = inp->inp_tdb_out->tdb_spi;
1639 					tdbi.dst = inp->inp_tdb_out->tdb_dst;
1640 					tdbi.proto = inp->inp_tdb_out->tdb_sproto;
1641 					*mp = m = m_get(M_WAIT, MT_SOOPTS);
1642 					m->m_len = sizeof(tdbi);
1643 					bcopy((caddr_t)&tdbi, mtod(m, caddr_t),
1644 					    (unsigned)m->m_len);
1645 				}
1646 				splx(s);
1647 #endif /* IPSEC */
1648 				break;
1649 
1650 			case IPV6_AUTH_LEVEL:
1651 			case IPV6_ESP_TRANS_LEVEL:
1652 			case IPV6_ESP_NETWORK_LEVEL:
1653 			case IPV6_IPCOMP_LEVEL:
1654 #ifndef IPSEC
1655 				m->m_len = sizeof(int);
1656 				*mtod(m, int *) = IPSEC_LEVEL_NONE;
1657 #else
1658 				m->m_len = sizeof(int);
1659 				switch (optname) {
1660 				case IP_AUTH_LEVEL:
1661 					optval = inp->inp_seclevel[SL_AUTH];
1662 					break;
1663 
1664 				case IP_ESP_TRANS_LEVEL:
1665 					optval =
1666 					    inp->inp_seclevel[SL_ESP_TRANS];
1667 					break;
1668 
1669 				case IP_ESP_NETWORK_LEVEL:
1670 					optval =
1671 					    inp->inp_seclevel[SL_ESP_NETWORK];
1672 					break;
1673 
1674 				case IP_IPCOMP_LEVEL:
1675 					optval = inp->inp_seclevel[SL_IPCOMP];
1676 					break;
1677 				}
1678 				*mtod(m, int *) = optval;
1679 #endif
1680 				break;
1681 
1682 			default:
1683 				error = ENOPROTOOPT;
1684 				break;
1685 			}
1686 			break;
1687 		}
1688 	} else {
1689 		error = EINVAL;
1690 		if (op == PRCO_SETOPT && *mp)
1691 			(void)m_free(*mp);
1692 	}
1693 	return(error);
1694 }
1695 
1696 /*
1697  * Set up IP6 options in pcb for insertion in output packets.
1698  * Store in mbuf with pointer in pcbopt, adding pseudo-option
1699  * with destination address if source routed.
1700  */
1701 static int
1702 ip6_pcbopts(pktopt, m, so)
1703 	struct ip6_pktopts **pktopt;
1704 	struct mbuf *m;
1705 	struct socket *so;
1706 {
1707 	struct ip6_pktopts *opt = *pktopt;
1708 	int error = 0;
1709 	struct proc *p = curproc;	/* XXX */
1710 	int priv = 0;
1711 
1712 	/* turn off any old options. */
1713 	if (opt) {
1714 		if (opt->ip6po_m)
1715 			(void)m_free(opt->ip6po_m);
1716 	}
1717 	else
1718 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
1719 	*pktopt = 0;
1720 
1721 	if (!m || m->m_len == 0) {
1722 		/*
1723 		 * Only turning off any previous options.
1724 		 */
1725 		if (opt)
1726 			free(opt, M_IP6OPT);
1727 		if (m)
1728 			(void)m_free(m);
1729 		return(0);
1730 	}
1731 
1732 	/*  set options specified by user. */
1733 	if (p && !suser(p->p_ucred, &p->p_acflag))
1734 		priv = 1;
1735 	if ((error = ip6_setpktoptions(m, opt, priv)) != 0) {
1736 		(void)m_free(m);
1737 		return(error);
1738 	}
1739 	*pktopt = opt;
1740 	return(0);
1741 }
1742 
1743 /*
1744  * Set the IP6 multicast options in response to user setsockopt().
1745  */
1746 static int
1747 ip6_setmoptions(optname, im6op, m)
1748 	int optname;
1749 	struct ip6_moptions **im6op;
1750 	struct mbuf *m;
1751 {
1752 	int error = 0;
1753 	u_int loop, ifindex;
1754 	struct ipv6_mreq *mreq;
1755 	struct ifnet *ifp;
1756 	struct ip6_moptions *im6o = *im6op;
1757 	struct route_in6 ro;
1758 	struct sockaddr_in6 *dst;
1759 	struct in6_multi_mship *imm;
1760 	struct proc *p = curproc;	/* XXX */
1761 
1762 	if (im6o == NULL) {
1763 		/*
1764 		 * No multicast option buffer attached to the pcb;
1765 		 * allocate one and initialize to default values.
1766 		 */
1767 		im6o = (struct ip6_moptions *)
1768 			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
1769 
1770 		*im6op = im6o;
1771 		im6o->im6o_multicast_ifp = NULL;
1772 		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
1773 		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
1774 		LIST_INIT(&im6o->im6o_memberships);
1775 	}
1776 
1777 	switch (optname) {
1778 
1779 	case IPV6_MULTICAST_IF:
1780 		/*
1781 		 * Select the interface for outgoing multicast packets.
1782 		 */
1783 		if (m == NULL || m->m_len != sizeof(u_int)) {
1784 			error = EINVAL;
1785 			break;
1786 		}
1787 		ifindex = *(mtod(m, u_int *));
1788 		if (ifindex < 0 || if_index < ifindex) {
1789 			error = ENXIO;	/* XXX EINVAL? */
1790 			break;
1791 		}
1792 		ifp = ifindex2ifnet[ifindex];
1793 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1794 			error = EADDRNOTAVAIL;
1795 			break;
1796 		}
1797 		im6o->im6o_multicast_ifp = ifp;
1798 		break;
1799 
1800 	case IPV6_MULTICAST_HOPS:
1801 	    {
1802 		/*
1803 		 * Set the IP6 hoplimit for outgoing multicast packets.
1804 		 */
1805 		int optval;
1806 		if (m == NULL || m->m_len != sizeof(int)) {
1807 			error = EINVAL;
1808 			break;
1809 		}
1810 		optval = *(mtod(m, u_int *));
1811 		if (optval < -1 || optval >= 256)
1812 			error = EINVAL;
1813 		else if (optval == -1)
1814 			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
1815 		else
1816 			im6o->im6o_multicast_hlim = optval;
1817 		break;
1818 	    }
1819 
1820 	case IPV6_MULTICAST_LOOP:
1821 		/*
1822 		 * Set the loopback flag for outgoing multicast packets.
1823 		 * Must be zero or one.
1824 		 */
1825 		if (m == NULL || m->m_len != sizeof(u_int) ||
1826 		   (loop = *(mtod(m, u_int *))) > 1) {
1827 			error = EINVAL;
1828 			break;
1829 		}
1830 		im6o->im6o_multicast_loop = loop;
1831 		break;
1832 
1833 	case IPV6_JOIN_GROUP:
1834 		/*
1835 		 * Add a multicast group membership.
1836 		 * Group must be a valid IP6 multicast address.
1837 		 */
1838 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
1839 			error = EINVAL;
1840 			break;
1841 		}
1842 		mreq = mtod(m, struct ipv6_mreq *);
1843 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
1844 			/*
1845 			 * We use the unspecified address to specify to accept
1846 			 * all multicast addresses. Only super user is allowed
1847 			 * to do this.
1848 			 */
1849 			if (suser(p->p_ucred, &p->p_acflag)) {
1850 				error = EACCES;
1851 				break;
1852 			}
1853 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
1854 			error = EINVAL;
1855 			break;
1856 		}
1857 
1858 		/*
1859 		 * If the interface is specified, validate it.
1860 		 */
1861 		if (mreq->ipv6mr_interface < 0
1862 		 || if_index < mreq->ipv6mr_interface) {
1863 			error = ENXIO;	/* XXX EINVAL? */
1864 			break;
1865 		}
1866 		/*
1867 		 * If no interface was explicitly specified, choose an
1868 		 * appropriate one according to the given multicast address.
1869 		 */
1870 		if (mreq->ipv6mr_interface == 0) {
1871 			/*
1872 			 * If the multicast address is in node-local scope,
1873 			 * the interface should be a loopback interface.
1874 			 * Otherwise, look up the routing table for the
1875 			 * address, and choose the outgoing interface.
1876 			 *   XXX: is it a good approach?
1877 			 */
1878 			if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) {
1879 				ifp = lo0ifp;
1880 			}
1881 			else {
1882 				ro.ro_rt = NULL;
1883 				dst = (struct sockaddr_in6 *)&ro.ro_dst;
1884 				bzero(dst, sizeof(*dst));
1885 				dst->sin6_len = sizeof(struct sockaddr_in6);
1886 				dst->sin6_family = AF_INET6;
1887 				dst->sin6_addr = mreq->ipv6mr_multiaddr;
1888 				rtalloc((struct route *)&ro);
1889 				if (ro.ro_rt == NULL) {
1890 					error = EADDRNOTAVAIL;
1891 					break;
1892 				}
1893 				ifp = ro.ro_rt->rt_ifp;
1894 				rtfree(ro.ro_rt);
1895 			}
1896 		} else
1897 			ifp = ifindex2ifnet[mreq->ipv6mr_interface];
1898 
1899 		/*
1900 		 * See if we found an interface, and confirm that it
1901 		 * supports multicast
1902 		 */
1903 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1904 			error = EADDRNOTAVAIL;
1905 			break;
1906 		}
1907 		/*
1908 		 * Put interface index into the multicast address,
1909 		 * if the address has link-local scope.
1910 		 */
1911 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
1912 			mreq->ipv6mr_multiaddr.s6_addr16[1]
1913 				= htons(mreq->ipv6mr_interface);
1914 		}
1915 		/*
1916 		 * See if the membership already exists.
1917 		 */
1918 		for (imm = im6o->im6o_memberships.lh_first;
1919 		     imm != NULL; imm = imm->i6mm_chain.le_next)
1920 			if (imm->i6mm_maddr->in6m_ifp == ifp &&
1921 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
1922 					       &mreq->ipv6mr_multiaddr))
1923 				break;
1924 		if (imm != NULL) {
1925 			error = EADDRINUSE;
1926 			break;
1927 		}
1928 		/*
1929 		 * Everything looks good; add a new record to the multicast
1930 		 * address list for the given interface.
1931 		 */
1932 		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
1933 
1934 		if ((imm->i6mm_maddr =
1935 		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
1936 			free(imm, M_IPMADDR);
1937 			break;
1938 		}
1939 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1940 		break;
1941 
1942 	case IPV6_LEAVE_GROUP:
1943 		/*
1944 		 * Drop a multicast group membership.
1945 		 * Group must be a valid IP6 multicast address.
1946 		 */
1947 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
1948 			error = EINVAL;
1949 			break;
1950 		}
1951 		mreq = mtod(m, struct ipv6_mreq *);
1952 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
1953 			if (suser(p->p_ucred, &p->p_acflag)) {
1954 				error = EACCES;
1955 				break;
1956 			}
1957 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
1958 			error = EINVAL;
1959 			break;
1960 		}
1961 		/*
1962 		 * If an interface address was specified, get a pointer
1963 		 * to its ifnet structure.
1964 		 */
1965 		if (mreq->ipv6mr_interface < 0
1966 		 || if_index < mreq->ipv6mr_interface) {
1967 			error = ENXIO;	/* XXX EINVAL? */
1968 			break;
1969 		}
1970 		ifp = ifindex2ifnet[mreq->ipv6mr_interface];
1971 		/*
1972 		 * Put interface index into the multicast address,
1973 		 * if the address has link-local scope.
1974 		 */
1975 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
1976 			mreq->ipv6mr_multiaddr.s6_addr16[1]
1977 				= htons(mreq->ipv6mr_interface);
1978 		}
1979 		/*
1980 		 * Find the membership in the membership list.
1981 		 */
1982 		for (imm = im6o->im6o_memberships.lh_first;
1983 		     imm != NULL; imm = imm->i6mm_chain.le_next) {
1984 			if ((ifp == NULL ||
1985 			     imm->i6mm_maddr->in6m_ifp == ifp) &&
1986 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
1987 					       &mreq->ipv6mr_multiaddr))
1988 				break;
1989 		}
1990 		if (imm == NULL) {
1991 			/* Unable to resolve interface */
1992 			error = EADDRNOTAVAIL;
1993 			break;
1994 		}
1995 		/*
1996 		 * Give up the multicast address record to which the
1997 		 * membership points.
1998 		 */
1999 		LIST_REMOVE(imm, i6mm_chain);
2000 		in6_delmulti(imm->i6mm_maddr);
2001 		free(imm, M_IPMADDR);
2002 		break;
2003 
2004 	default:
2005 		error = EOPNOTSUPP;
2006 		break;
2007 	}
2008 
2009 	/*
2010 	 * If all options have default values, no need to keep the mbuf.
2011 	 */
2012 	if (im6o->im6o_multicast_ifp == NULL &&
2013 	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2014 	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2015 	    im6o->im6o_memberships.lh_first == NULL) {
2016 		free(*im6op, M_IPMOPTS);
2017 		*im6op = NULL;
2018 	}
2019 
2020 	return(error);
2021 }
2022 
2023 /*
2024  * Return the IP6 multicast options in response to user getsockopt().
2025  */
2026 static int
2027 ip6_getmoptions(optname, im6o, mp)
2028 	int optname;
2029 	struct ip6_moptions *im6o;
2030 	struct mbuf **mp;
2031 {
2032 	u_int *hlim, *loop, *ifindex;
2033 
2034 	*mp = m_get(M_WAIT, MT_SOOPTS);
2035 
2036 	switch (optname) {
2037 
2038 	case IPV6_MULTICAST_IF:
2039 		ifindex = mtod(*mp, u_int *);
2040 		(*mp)->m_len = sizeof(u_int);
2041 		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2042 			*ifindex = 0;
2043 		else
2044 			*ifindex = im6o->im6o_multicast_ifp->if_index;
2045 		return(0);
2046 
2047 	case IPV6_MULTICAST_HOPS:
2048 		hlim = mtod(*mp, u_int *);
2049 		(*mp)->m_len = sizeof(u_int);
2050 		if (im6o == NULL)
2051 			*hlim = ip6_defmcasthlim;
2052 		else
2053 			*hlim = im6o->im6o_multicast_hlim;
2054 		return(0);
2055 
2056 	case IPV6_MULTICAST_LOOP:
2057 		loop = mtod(*mp, u_int *);
2058 		(*mp)->m_len = sizeof(u_int);
2059 		if (im6o == NULL)
2060 			*loop = ip6_defmcasthlim;
2061 		else
2062 			*loop = im6o->im6o_multicast_loop;
2063 		return(0);
2064 
2065 	default:
2066 		return(EOPNOTSUPP);
2067 	}
2068 }
2069 
2070 /*
2071  * Discard the IP6 multicast options.
2072  */
2073 void
2074 ip6_freemoptions(im6o)
2075 	struct ip6_moptions *im6o;
2076 {
2077 	struct in6_multi_mship *imm;
2078 
2079 	if (im6o == NULL)
2080 		return;
2081 
2082 	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2083 		LIST_REMOVE(imm, i6mm_chain);
2084 		if (imm->i6mm_maddr)
2085 			in6_delmulti(imm->i6mm_maddr);
2086 		free(imm, M_IPMADDR);
2087 	}
2088 	free(im6o, M_IPMOPTS);
2089 }
2090 
2091 /*
2092  * Set IPv6 outgoing packet options based on advanced API.
2093  */
2094 int
2095 ip6_setpktoptions(control, opt, priv)
2096 	struct mbuf *control;
2097 	struct ip6_pktopts *opt;
2098 	int priv;
2099 {
2100 	struct cmsghdr *cm = 0;
2101 
2102 	if (control == 0 || opt == 0)
2103 		return(EINVAL);
2104 
2105 	bzero(opt, sizeof(*opt));
2106 	opt->ip6po_hlim = -1; /* -1 means to use default hop limit */
2107 
2108 	/*
2109 	 * XXX: Currently, we assume all the optional information is stored
2110 	 * in a single mbuf.
2111 	 */
2112 	if (control->m_next)
2113 		return(EINVAL);
2114 
2115 	opt->ip6po_m = control;
2116 
2117 	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2118 		     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2119 		cm = mtod(control, struct cmsghdr *);
2120 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2121 			return(EINVAL);
2122 		if (cm->cmsg_level != IPPROTO_IPV6)
2123 			continue;
2124 
2125 		switch(cm->cmsg_type) {
2126 		case IPV6_PKTINFO:
2127 			if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo)))
2128 				return(EINVAL);
2129 			opt->ip6po_pktinfo = (struct in6_pktinfo *)CMSG_DATA(cm);
2130 			if (opt->ip6po_pktinfo->ipi6_ifindex &&
2131 			    IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr))
2132 				opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] =
2133 					htons(opt->ip6po_pktinfo->ipi6_ifindex);
2134 
2135 			if (opt->ip6po_pktinfo->ipi6_ifindex > if_index
2136 			 || opt->ip6po_pktinfo->ipi6_ifindex < 0) {
2137 				return(ENXIO);
2138 			}
2139 
2140 			/*
2141 			 * Check if the requested source address is indeed a
2142 			 * unicast address assigned to the node, and can be
2143 			 * used as the packet's source address.
2144 			 */
2145 			if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) {
2146 				struct ifaddr *ia;
2147 				struct in6_ifaddr *ia6;
2148 				struct sockaddr_in6 sin6;
2149 
2150 				bzero(&sin6, sizeof(sin6));
2151 				sin6.sin6_len = sizeof(sin6);
2152 				sin6.sin6_family = AF_INET6;
2153 				sin6.sin6_addr =
2154 					opt->ip6po_pktinfo->ipi6_addr;
2155 				ia = ifa_ifwithaddr(sin6tosa(&sin6));
2156 				if (ia == NULL ||
2157 				    (opt->ip6po_pktinfo->ipi6_ifindex &&
2158 				     (ia->ifa_ifp->if_index !=
2159 				      opt->ip6po_pktinfo->ipi6_ifindex))) {
2160 					return(EADDRNOTAVAIL);
2161 				}
2162 				ia6 = (struct in6_ifaddr *)ia;
2163 				if ((ia6->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) != 0) {
2164 					return(EADDRNOTAVAIL);
2165 				}
2166 
2167 				/*
2168 				 * Check if the requested source address is
2169 				 * indeed a unicast address assigned to the
2170 				 * node.
2171 				 */
2172 				if (IN6_IS_ADDR_MULTICAST(&opt->ip6po_pktinfo->ipi6_addr))
2173 					return(EADDRNOTAVAIL);
2174 			}
2175 			break;
2176 
2177 		case IPV6_HOPLIMIT:
2178 			if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
2179 				return(EINVAL);
2180 
2181 			opt->ip6po_hlim = *(int *)CMSG_DATA(cm);
2182 			if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255)
2183 				return(EINVAL);
2184 			break;
2185 
2186 		case IPV6_NEXTHOP:
2187 			if (!priv)
2188 				return(EPERM);
2189 
2190 			if (cm->cmsg_len < sizeof(u_char) ||
2191 			    /* check if cmsg_len is large enough for sa_len */
2192 			    cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm)))
2193 				return(EINVAL);
2194 
2195 			opt->ip6po_nexthop = (struct sockaddr *)CMSG_DATA(cm);
2196 
2197 			break;
2198 
2199 		case IPV6_HOPOPTS:
2200 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh)))
2201 				return(EINVAL);
2202 			opt->ip6po_hbh = (struct ip6_hbh *)CMSG_DATA(cm);
2203 			if (cm->cmsg_len !=
2204 			    CMSG_LEN((opt->ip6po_hbh->ip6h_len + 1) << 3))
2205 				return(EINVAL);
2206 			break;
2207 
2208 		case IPV6_DSTOPTS:
2209 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest)))
2210 				return(EINVAL);
2211 
2212 			/*
2213 			 * If there is no routing header yet, the destination
2214 			 * options header should be put on the 1st part.
2215 			 * Otherwise, the header should be on the 2nd part.
2216 			 * (See RFC 2460, section 4.1)
2217 			 */
2218 			if (opt->ip6po_rthdr == NULL) {
2219 				opt->ip6po_dest1 =
2220 					(struct ip6_dest *)CMSG_DATA(cm);
2221 				if (cm->cmsg_len !=
2222 				    CMSG_LEN((opt->ip6po_dest1->ip6d_len + 1)
2223 					     << 3))
2224 					return(EINVAL);
2225 			}
2226 			else {
2227 				opt->ip6po_dest2 =
2228 					(struct ip6_dest *)CMSG_DATA(cm);
2229 				if (cm->cmsg_len !=
2230 				    CMSG_LEN((opt->ip6po_dest2->ip6d_len + 1)
2231 					     << 3))
2232 					return(EINVAL);
2233 			}
2234 			break;
2235 
2236 		case IPV6_RTHDR:
2237 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr)))
2238 				return(EINVAL);
2239 			opt->ip6po_rthdr = (struct ip6_rthdr *)CMSG_DATA(cm);
2240 			if (cm->cmsg_len !=
2241 			    CMSG_LEN((opt->ip6po_rthdr->ip6r_len + 1) << 3))
2242 				return(EINVAL);
2243 			switch(opt->ip6po_rthdr->ip6r_type) {
2244 			case IPV6_RTHDR_TYPE_0:
2245 				if (opt->ip6po_rthdr->ip6r_segleft == 0)
2246 					return(EINVAL);
2247 				break;
2248 			default:
2249 				return(EINVAL);
2250 			}
2251 			break;
2252 
2253 		default:
2254 			return(ENOPROTOOPT);
2255 		}
2256 	}
2257 
2258 	return(0);
2259 }
2260 
2261 /*
2262  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
2263  * packet to the input queue of a specified interface.  Note that this
2264  * calls the output routine of the loopback "driver", but with an interface
2265  * pointer that might NOT be lo0ifp -- easier than replicating that code here.
2266  */
2267 void
2268 ip6_mloopback(ifp, m, dst)
2269 	struct ifnet *ifp;
2270 	struct mbuf *m;
2271 	struct sockaddr_in6 *dst;
2272 {
2273 	struct mbuf *copym;
2274 	struct ip6_hdr *ip6;
2275 
2276 	copym = m_copy(m, 0, M_COPYALL);
2277 	if (copym == NULL)
2278 		return;
2279 
2280 	/*
2281 	 * Make sure to deep-copy IPv6 header portion in case the data
2282 	 * is in an mbuf cluster, so that we can safely override the IPv6
2283 	 * header portion later.
2284 	 */
2285 	if ((copym->m_flags & M_EXT) != 0 ||
2286 	    copym->m_len < sizeof(struct ip6_hdr)) {
2287 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
2288 		if (copym == NULL)
2289 			return;
2290 	}
2291 
2292 #ifdef DIAGNOSTIC
2293 	if (copym->m_len < sizeof(*ip6)) {
2294 		m_freem(copym);
2295 		return;
2296 	}
2297 #endif
2298 
2299 	ip6 = mtod(copym, struct ip6_hdr *);
2300 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
2301 		ip6->ip6_src.s6_addr16[1] = 0;
2302 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
2303 		ip6->ip6_dst.s6_addr16[1] = 0;
2304 
2305 	(void)looutput(ifp, copym, (struct sockaddr *)dst, NULL);
2306 }
2307 
2308 /*
2309  * Chop IPv6 header off from the payload.
2310  */
2311 static int
2312 ip6_splithdr(m, exthdrs)
2313 	struct mbuf *m;
2314 	struct ip6_exthdrs *exthdrs;
2315 {
2316 	struct mbuf *mh;
2317 	struct ip6_hdr *ip6;
2318 
2319 	ip6 = mtod(m, struct ip6_hdr *);
2320 	if (m->m_len > sizeof(*ip6)) {
2321 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
2322 		if (mh == 0) {
2323 			m_freem(m);
2324 			return ENOBUFS;
2325 		}
2326 		M_MOVE_PKTHDR(mh, m);
2327 		MH_ALIGN(mh, sizeof(*ip6));
2328 		m->m_len -= sizeof(*ip6);
2329 		m->m_data += sizeof(*ip6);
2330 		mh->m_next = m;
2331 		m = mh;
2332 		m->m_len = sizeof(*ip6);
2333 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
2334 	}
2335 	exthdrs->ip6e_ip6 = m;
2336 	return 0;
2337 }
2338 
2339 /*
2340  * Compute IPv6 extension header length.
2341  */
2342 # define in6pcb	inpcb
2343 # define in6p_outputopts	inp_outputopts6
2344 int
2345 ip6_optlen(in6p)
2346 	struct in6pcb *in6p;
2347 {
2348 	int len;
2349 
2350 	if (!in6p->in6p_outputopts)
2351 		return 0;
2352 
2353 	len = 0;
2354 #define elen(x) \
2355     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
2356 
2357 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
2358 	len += elen(in6p->in6p_outputopts->ip6po_dest1);
2359 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
2360 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
2361 	return len;
2362 #undef elen
2363 }
2364 # undef in6pcb
2365 # undef in6p_outputopts
2366