xref: /openbsd-src/sys/netinet6/ip6_output.c (revision 3a3fbb3f2e2521ab7c4a56b7ff7462ebd9095ec5)
1 /*	$OpenBSD: ip6_output.c,v 1.56 2001/12/07 09:16:07 itojun Exp $	*/
2 /*	$KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1990, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
66  */
67 
68 #include "pf.h"
69 
70 #include <sys/param.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/errno.h>
74 #include <sys/protosw.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/systm.h>
78 #include <sys/proc.h>
79 
80 #include <net/if.h>
81 #include <net/route.h>
82 
83 #include <netinet/in.h>
84 #include <netinet/in_var.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/ip.h>
87 #include <netinet/in_pcb.h>
88 
89 #include <netinet/ip6.h>
90 #include <netinet/icmp6.h>
91 #include <netinet6/ip6_var.h>
92 #include <netinet6/nd6.h>
93 
94 #if NPF > 0
95 #include <net/pfvar.h>
96 #endif
97 
98 #ifdef IPSEC
99 #include <netinet/ip_ah.h>
100 #include <netinet/ip_esp.h>
101 #include <netinet/udp.h>
102 #include <netinet/tcp.h>
103 #include <net/pfkeyv2.h>
104 
105 extern u_int8_t get_sa_require  __P((struct inpcb *));
106 
107 extern int ipsec_auth_default_level;
108 extern int ipsec_esp_trans_default_level;
109 extern int ipsec_esp_network_default_level;
110 extern int ipsec_ipcomp_default_level;
111 #endif /* IPSEC */
112 
113 struct ip6_exthdrs {
114 	struct mbuf *ip6e_ip6;
115 	struct mbuf *ip6e_hbh;
116 	struct mbuf *ip6e_dest1;
117 	struct mbuf *ip6e_rthdr;
118 	struct mbuf *ip6e_dest2;
119 };
120 
121 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
122 			    struct socket *));
123 static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
124 static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
125 static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
126 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
127 				  struct ip6_frag **));
128 static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
129 static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
130 
131 /*
132  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
133  * header (with pri, len, nxt, hlim, src, dst).
134  * This function may modify ver and hlim only.
135  * The mbuf chain containing the packet will be freed.
136  * The mbuf opt, if present, will not be freed.
137  *
138  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
139  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
140  * which is rt_rmx.rmx_mtu.
141  */
142 int
143 ip6_output(m0, opt, ro, flags, im6o, ifpp)
144 	struct mbuf *m0;
145 	struct ip6_pktopts *opt;
146 	struct route_in6 *ro;
147 	int flags;
148 	struct ip6_moptions *im6o;
149 	struct ifnet **ifpp;		/* XXX: just for statistics */
150 {
151 	struct ip6_hdr *ip6, *mhip6;
152 	struct ifnet *ifp, *origifp;
153 	struct mbuf *m = m0;
154 	int hlen, tlen, len, off;
155 	struct route_in6 ip6route;
156 	struct sockaddr_in6 *dst;
157 	int error = 0;
158 	struct in6_ifaddr *ia;
159 	u_long mtu;
160 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
161 	struct ip6_exthdrs exthdrs;
162 	struct in6_addr finaldst;
163 	struct route_in6 *ro_pmtu = NULL;
164 	int hdrsplit = 0;
165 	u_int8_t sproto = 0;
166 #ifdef IPSEC
167 	struct m_tag *mtag;
168 	union sockaddr_union sdst;
169 	struct tdb_ident *tdbi;
170 	u_int32_t sspi;
171 	struct inpcb *inp;
172 	struct tdb *tdb;
173 	int s;
174 #endif /* IPSEC */
175 
176 #ifdef IPSEC
177 	inp = NULL;	/*XXX*/
178 	if (inp && (inp->inp_flags & INP_IPV6) == 0)
179 		panic("ip6_output: IPv4 pcb is passed");
180 #endif /* IPSEC */
181 
182 #define MAKE_EXTHDR(hp, mp)						\
183     do {								\
184 	if (hp) {							\
185 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
186 		error = ip6_copyexthdr((mp), (caddr_t)(hp), 		\
187 				       ((eh)->ip6e_len + 1) << 3);	\
188 		if (error)						\
189 			goto freehdrs;					\
190 	}								\
191     } while (0)
192 
193 	bzero(&exthdrs, sizeof(exthdrs));
194 	if (opt) {
195 		/* Hop-by-Hop options header */
196 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
197 		/* Destination options header(1st part) */
198 		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
199 		/* Routing header */
200 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
201 		/* Destination options header(2nd part) */
202 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
203 	}
204 
205 #ifdef IPSEC
206 	/*
207 	 * splnet is chosen over spltdb because we are not allowed to
208 	 * lower the level, and udp6_output calls us in splnet(). XXX check
209 	 */
210 	s = splnet();
211 
212 	/*
213 	 * Check if there was an outgoing SA bound to the flow
214 	 * from a transport protocol.
215 	 */
216 	ip6 = mtod(m, struct ip6_hdr *);
217 
218 	/* Do we have any pending SAs to apply ? */
219 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
220 	if (mtag != NULL) {
221 #ifdef DIAGNOSTIC
222 		if (mtag->m_tag_len != sizeof (struct tdb_ident))
223 			panic("ip6_output: tag of length %d (should be %d",
224 			    mtag->m_tag_len, sizeof (struct tdb_ident));
225 #endif
226 		tdbi = (struct tdb_ident *)(mtag + 1);
227 		tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto);
228 		if (tdb == NULL)
229 			error = -EINVAL;
230 		m_tag_delete(m, mtag);
231 	}
232 	else
233 		tdb = ipsp_spd_lookup(m, AF_INET6, sizeof(struct ip6_hdr),
234 		    &error, IPSP_DIRECTION_OUT, NULL, inp);
235 
236 	if (tdb == NULL) {
237 	        splx(s);
238 
239 		if (error == 0) {
240 		        /*
241 			 * No IPsec processing required, we'll just send the
242 			 * packet out.
243 			 */
244 		        sproto = 0;
245 
246 			/* Fall through to routing/multicast handling */
247 		} else {
248 		        /*
249 			 * -EINVAL is used to indicate that the packet should
250 			 * be silently dropped, typically because we've asked
251 			 * key management for an SA.
252 			 */
253 		        if (error == -EINVAL) /* Should silently drop packet */
254 				error = 0;
255 
256 			goto freehdrs;
257 		}
258 	} else {
259 		/* Loop detection */
260 		for (mtag = m_tag_first(m); mtag != NULL;
261 		    mtag = m_tag_next(m, mtag)) {
262 			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
263 			    mtag->m_tag_id !=
264 			    PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
265 				continue;
266 			tdbi = (struct tdb_ident *)(mtag + 1);
267 			if (tdbi->spi == tdb->tdb_spi &&
268 			    tdbi->proto == tdb->tdb_sproto &&
269 			    !bcmp(&tdbi->dst, &tdb->tdb_dst,
270 			    sizeof(union sockaddr_union))) {
271 				splx(s);
272 				sproto = 0; /* mark as no-IPsec-needed */
273 				goto done_spd;
274 			}
275 		}
276 
277 	        /* We need to do IPsec */
278 	        bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst));
279 		sspi = tdb->tdb_spi;
280 		sproto = tdb->tdb_sproto;
281 	        splx(s);
282 
283 #if 1 /* XXX */
284 		/* if we have any extension header, we cannot perform IPsec */
285 		if (exthdrs.ip6e_hbh || exthdrs.ip6e_dest1 ||
286 		    exthdrs.ip6e_rthdr || exthdrs.ip6e_dest2) {
287 			error = EHOSTUNREACH;
288 			goto freehdrs;
289 		}
290 #endif
291 	}
292 
293 	/* Fall through to the routing/multicast handling code */
294  done_spd:
295 #endif /* IPSEC */
296 
297 	/*
298 	 * Calculate the total length of the extension header chain.
299 	 * Keep the length of the unfragmentable part for fragmentation.
300 	 */
301 	optlen = 0;
302 	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
303 	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
304 	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
305 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
306 	/* NOTE: we don't add AH/ESP length here. do that later. */
307 	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
308 
309 	/*
310 	 * If we need IPsec, or there is at least one extension header,
311 	 * separate IP6 header from the payload.
312 	 */
313 	if ((sproto || optlen) && !hdrsplit) {
314 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
315 			m = NULL;
316 			goto freehdrs;
317 		}
318 		m = exthdrs.ip6e_ip6;
319 		hdrsplit++;
320 	}
321 
322 	/* adjust pointer */
323 	ip6 = mtod(m, struct ip6_hdr *);
324 
325 	/* adjust mbuf packet header length */
326 	m->m_pkthdr.len += optlen;
327 	plen = m->m_pkthdr.len - sizeof(*ip6);
328 
329 	/* If this is a jumbo payload, insert a jumbo payload option. */
330 	if (plen > IPV6_MAXPACKET) {
331 		if (!hdrsplit) {
332 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
333 				m = NULL;
334 				goto freehdrs;
335 			}
336 			m = exthdrs.ip6e_ip6;
337 			hdrsplit++;
338 		}
339 		/* adjust pointer */
340 		ip6 = mtod(m, struct ip6_hdr *);
341 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
342 			goto freehdrs;
343 		ip6->ip6_plen = 0;
344 	} else
345 		ip6->ip6_plen = htons(plen);
346 
347 	/*
348 	 * Concatenate headers and fill in next header fields.
349 	 * Here we have, on "m"
350 	 *	IPv6 payload
351 	 * and we insert headers accordingly.  Finally, we should be getting:
352 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
353 	 *
354 	 * during the header composing process, "m" points to IPv6 header.
355 	 * "mprev" points to an extension header prior to esp.
356 	 */
357 	{
358 		u_char *nexthdrp = &ip6->ip6_nxt;
359 		struct mbuf *mprev = m;
360 
361 		/*
362 		 * we treat dest2 specially.  this makes IPsec processing
363 		 * much easier.
364 		 *
365 		 * result: IPv6 dest2 payload
366 		 * m and mprev will point to IPv6 header.
367 		 */
368 		if (exthdrs.ip6e_dest2) {
369 			if (!hdrsplit)
370 				panic("assumption failed: hdr not split");
371 			exthdrs.ip6e_dest2->m_next = m->m_next;
372 			m->m_next = exthdrs.ip6e_dest2;
373 			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
374 			ip6->ip6_nxt = IPPROTO_DSTOPTS;
375 		}
376 
377 #define MAKE_CHAIN(m, mp, p, i)\
378     do {\
379 	if (m) {\
380 		if (!hdrsplit) \
381 			panic("assumption failed: hdr not split"); \
382 		*mtod((m), u_char *) = *(p);\
383 		*(p) = (i);\
384 		p = mtod((m), u_char *);\
385 		(m)->m_next = (mp)->m_next;\
386 		(mp)->m_next = (m);\
387 		(mp) = (m);\
388 	}\
389     } while (0)
390 		/*
391 		 * result: IPv6 hbh dest1 rthdr dest2 payload
392 		 * m will point to IPv6 header.  mprev will point to the
393 		 * extension header prior to dest2 (rthdr in the above case).
394 		 */
395 		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev,
396 			   nexthdrp, IPPROTO_HOPOPTS);
397 		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev,
398 			   nexthdrp, IPPROTO_DSTOPTS);
399 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev,
400 			   nexthdrp, IPPROTO_ROUTING);
401 
402 #if 0 /*KAME IPSEC*/
403 		if (!needipsec)
404 			goto skip_ipsec2;
405 
406 		/*
407 		 * pointers after IPsec headers are not valid any more.
408 		 * other pointers need a great care too.
409 		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
410 		 */
411 		exthdrs.ip6e_dest2 = NULL;
412 
413 	    {
414 		struct ip6_rthdr *rh = NULL;
415 		int segleft_org = 0;
416 		struct ipsec_output_state state;
417 
418 		if (exthdrs.ip6e_rthdr) {
419 			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
420 			segleft_org = rh->ip6r_segleft;
421 			rh->ip6r_segleft = 0;
422 		}
423 
424 		bzero(&state, sizeof(state));
425 		state.m = m;
426 		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
427 			&needipsectun);
428 		m = state.m;
429 		if (error) {
430 			/* mbuf is already reclaimed in ipsec6_output_trans. */
431 			m = NULL;
432 			switch (error) {
433 			case EHOSTUNREACH:
434 			case ENETUNREACH:
435 			case EMSGSIZE:
436 			case ENOBUFS:
437 			case ENOMEM:
438 				break;
439 			default:
440 				printf("ip6_output (ipsec): error code %d\n", error);
441 				/*fall through*/
442 			case ENOENT:
443 				/* don't show these error codes to the user */
444 				error = 0;
445 				break;
446 			}
447 			goto bad;
448 		}
449 		if (exthdrs.ip6e_rthdr) {
450 			/* ah6_output doesn't modify mbuf chain */
451 			rh->ip6r_segleft = segleft_org;
452 		}
453 	    }
454 skip_ipsec2:;
455 #endif
456 	}
457 
458 	/*
459 	 * If there is a routing header, replace destination address field
460 	 * with the first hop of the routing header.
461 	 */
462 	if (exthdrs.ip6e_rthdr) {
463 		struct ip6_rthdr *rh =
464 			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
465 						  struct ip6_rthdr *));
466 		struct ip6_rthdr0 *rh0;
467 
468 		finaldst = ip6->ip6_dst;
469 		switch(rh->ip6r_type) {
470 		case IPV6_RTHDR_TYPE_0:
471 			 rh0 = (struct ip6_rthdr0 *)rh;
472 			 ip6->ip6_dst = rh0->ip6r0_addr[0];
473 			 bcopy((caddr_t)&rh0->ip6r0_addr[1],
474 				 (caddr_t)&rh0->ip6r0_addr[0],
475 				 sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
476 				 );
477 			 rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst;
478 			 break;
479 		default:	/* is it possible? */
480 			 error = EINVAL;
481 			 goto bad;
482 		}
483 	}
484 
485 	/* Source address validation */
486 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
487 	    (flags & IPV6_DADOUTPUT) == 0) {
488 		error = EOPNOTSUPP;
489 		ip6stat.ip6s_badscope++;
490 		goto bad;
491 	}
492 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
493 		error = EOPNOTSUPP;
494 		ip6stat.ip6s_badscope++;
495 		goto bad;
496 	}
497 
498 	ip6stat.ip6s_localout++;
499 
500 	/*
501 	 * Route packet.
502 	 */
503 	if (ro == 0) {
504 		ro = &ip6route;
505 		bzero((caddr_t)ro, sizeof(*ro));
506 	}
507 	ro_pmtu = ro;
508 	if (opt && opt->ip6po_rthdr)
509 		ro = &opt->ip6po_route;
510 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
511 	/*
512 	 * If there is a cached route,
513 	 * check that it is to the same destination
514 	 * and is still up. If not, free it and try again.
515 	 */
516 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
517 			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
518 		RTFREE(ro->ro_rt);
519 		ro->ro_rt = (struct rtentry *)0;
520 	}
521 	if (ro->ro_rt == 0) {
522 		bzero(dst, sizeof(*dst));
523 		dst->sin6_family = AF_INET6;
524 		dst->sin6_len = sizeof(struct sockaddr_in6);
525 		dst->sin6_addr = ip6->ip6_dst;
526 	}
527 #ifdef IPSEC
528 	/*
529 	 * Check if the packet needs encapsulation.
530 	 * ipsp_process_packet will never come back to here.
531 	 */
532 	if (sproto != 0) {
533 	        s = splnet();
534 
535 		/* fill in IPv6 header which would be filled later */
536 		if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
537 			if (opt && opt->ip6po_hlim != -1)
538 				ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
539 		} else {
540 			if (im6o != NULL)
541 				ip6->ip6_hlim = im6o->im6o_multicast_hlim;
542 			else
543 				ip6->ip6_hlim = ip6_defmcasthlim;
544 			if (opt && opt->ip6po_hlim != -1)
545 				ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
546 
547 			/*
548 			 * XXX what should we do if ip6_hlim == 0 and the
549 			 * packet gets tunnelled?
550 			 */
551 		}
552 
553 		tdb = gettdb(sspi, &sdst, sproto);
554 		if (tdb == NULL) {
555 			splx(s);
556 			error = EHOSTUNREACH;
557 			m_freem(m);
558 			goto done;
559 		}
560 
561 		/* Latch to PCB */
562 		if (inp)
563 			tdb_add_inp(tdb, inp, 0);
564 
565 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
566 
567 		/* Callee frees mbuf */
568 		error = ipsp_process_packet(m, tdb, AF_INET6, 0);
569 		splx(s);
570 		return error;  /* Nothing more to be done */
571 	}
572 #endif /* IPSEC */
573 
574 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
575 		/* Unicast */
576 
577 #define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
578 #define sin6tosa(sin6)	((struct sockaddr *)(sin6))
579 		/* xxx
580 		 * interface selection comes here
581 		 * if an interface is specified from an upper layer,
582 		 * ifp must point it.
583 		 */
584 		if (ro->ro_rt == 0) {
585 			/*
586 			 * non-bsdi always clone routes, if parent is
587 			 * PRF_CLONING.
588 			 */
589 			rtalloc((struct route *)ro);
590 		}
591 		if (ro->ro_rt == 0) {
592 			ip6stat.ip6s_noroute++;
593 			error = EHOSTUNREACH;
594 			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
595 			goto bad;
596 		}
597 		ia = ifatoia6(ro->ro_rt->rt_ifa);
598 		ifp = ro->ro_rt->rt_ifp;
599 		ro->ro_rt->rt_use++;
600 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
601 			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
602 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
603 
604 		in6_ifstat_inc(ifp, ifs6_out_request);
605 
606 		/*
607 		 * Check if the outgoing interface conflicts with
608 		 * the interface specified by ifi6_ifindex (if specified).
609 		 * Note that loopback interface is always okay.
610 		 * (this may happen when we are sending a packet to one of
611 		 *  our own addresses.)
612 		 */
613 		if (opt && opt->ip6po_pktinfo
614 		 && opt->ip6po_pktinfo->ipi6_ifindex) {
615 			if (!(ifp->if_flags & IFF_LOOPBACK)
616 			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
617 				ip6stat.ip6s_noroute++;
618 				in6_ifstat_inc(ifp, ifs6_out_discard);
619 				error = EHOSTUNREACH;
620 				goto bad;
621 			}
622 		}
623 
624 		if (opt && opt->ip6po_hlim != -1)
625 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
626 	} else {
627 		/* Multicast */
628 		struct	in6_multi *in6m;
629 
630 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
631 
632 		/*
633 		 * See if the caller provided any multicast options
634 		 */
635 		ifp = NULL;
636 		if (im6o != NULL) {
637 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
638 			if (im6o->im6o_multicast_ifp != NULL)
639 				ifp = im6o->im6o_multicast_ifp;
640 		} else
641 			ip6->ip6_hlim = ip6_defmcasthlim;
642 
643 		/*
644 		 * See if the caller provided the outgoing interface
645 		 * as an ancillary data.
646 		 * Boundary check for ifindex is assumed to be already done.
647 		 */
648 		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
649 			ifp = ifindex2ifnet[opt->ip6po_pktinfo->ipi6_ifindex];
650 
651 		/*
652 		 * If the destination is a node-local scope multicast,
653 		 * the packet should be loop-backed only.
654 		 */
655 		if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) {
656 			/*
657 			 * If the outgoing interface is already specified,
658 			 * it should be a loopback interface.
659 			 */
660 			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
661 				ip6stat.ip6s_badscope++;
662 				error = ENETUNREACH; /* XXX: better error? */
663 				/* XXX correct ifp? */
664 				in6_ifstat_inc(ifp, ifs6_out_discard);
665 				goto bad;
666 			}
667 			else {
668 				ifp = lo0ifp;
669 			}
670 		}
671 
672 		if (opt && opt->ip6po_hlim != -1)
673 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
674 
675 		/*
676 		 * If caller did not provide an interface lookup a
677 		 * default in the routing table.  This is either a
678 		 * default for the speicfied group (i.e. a host
679 		 * route), or a multicast default (a route for the
680 		 * ``net'' ff00::/8).
681 		 */
682 		if (ifp == NULL) {
683 			if (ro->ro_rt == 0) {
684 				ro->ro_rt = rtalloc1((struct sockaddr *)
685 						&ro->ro_dst, 0);
686 			}
687 			if (ro->ro_rt == 0) {
688 				ip6stat.ip6s_noroute++;
689 				error = EHOSTUNREACH;
690 				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
691 				goto bad;
692 			}
693 			ia = ifatoia6(ro->ro_rt->rt_ifa);
694 			ifp = ro->ro_rt->rt_ifp;
695 			ro->ro_rt->rt_use++;
696 		}
697 
698 		if ((flags & IPV6_FORWARDING) == 0)
699 			in6_ifstat_inc(ifp, ifs6_out_request);
700 		in6_ifstat_inc(ifp, ifs6_out_mcast);
701 
702 		/*
703 		 * Confirm that the outgoing interface supports multicast.
704 		 */
705 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
706 			ip6stat.ip6s_noroute++;
707 			in6_ifstat_inc(ifp, ifs6_out_discard);
708 			error = ENETUNREACH;
709 			goto bad;
710 		}
711 		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
712 		if (in6m != NULL &&
713 		   (im6o == NULL || im6o->im6o_multicast_loop)) {
714 			/*
715 			 * If we belong to the destination multicast group
716 			 * on the outgoing interface, and the caller did not
717 			 * forbid loopback, loop back a copy.
718 			 */
719 			ip6_mloopback(ifp, m, dst);
720 		} else {
721 			/*
722 			 * If we are acting as a multicast router, perform
723 			 * multicast forwarding as if the packet had just
724 			 * arrived on the interface to which we are about
725 			 * to send.  The multicast forwarding function
726 			 * recursively calls this function, using the
727 			 * IPV6_FORWARDING flag to prevent infinite recursion.
728 			 *
729 			 * Multicasts that are looped back by ip6_mloopback(),
730 			 * above, will be forwarded by the ip6_input() routine,
731 			 * if necessary.
732 			 */
733 			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
734 				if (ip6_mforward(ip6, ifp, m) != 0) {
735 					m_freem(m);
736 					goto done;
737 				}
738 			}
739 		}
740 		/*
741 		 * Multicasts with a hoplimit of zero may be looped back,
742 		 * above, but must not be transmitted on a network.
743 		 * Also, multicasts addressed to the loopback interface
744 		 * are not sent -- the above call to ip6_mloopback() will
745 		 * loop back a copy if this host actually belongs to the
746 		 * destination group on the loopback interface.
747 		 */
748 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) {
749 			m_freem(m);
750 			goto done;
751 		}
752 	}
753 
754 	/*
755 	 * Fill the outgoing inteface to tell the upper layer
756 	 * to increment per-interface statistics.
757 	 */
758 	if (ifpp)
759 		*ifpp = ifp;
760 
761 	/*
762 	 * Determine path MTU.
763 	 */
764 	if (ro_pmtu != ro) {
765 		/* The first hop and the final destination may differ. */
766 		struct sockaddr_in6 *sin6_fin =
767 			(struct sockaddr_in6 *)&ro_pmtu->ro_dst;
768 		if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
769 				       !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr,
770 							   &finaldst))) {
771 			RTFREE(ro_pmtu->ro_rt);
772 			ro_pmtu->ro_rt = (struct rtentry *)0;
773 		}
774 		if (ro_pmtu->ro_rt == 0) {
775 			bzero(sin6_fin, sizeof(*sin6_fin));
776 			sin6_fin->sin6_family = AF_INET6;
777 			sin6_fin->sin6_len = sizeof(struct sockaddr_in6);
778 			sin6_fin->sin6_addr = finaldst;
779 
780 			rtalloc((struct route *)ro_pmtu);
781 		}
782 	}
783 	if (ro_pmtu->ro_rt != NULL) {
784 		u_int32_t ifmtu = nd_ifinfo[ifp->if_index].linkmtu;
785 
786 		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
787 		if (mtu > ifmtu || mtu == 0) {
788 			/*
789 			 * The MTU on the route is larger than the MTU on
790 			 * the interface!  This shouldn't happen, unless the
791 			 * MTU of the interface has been changed after the
792 			 * interface was brought up.  Change the MTU in the
793 			 * route to match the interface MTU (as long as the
794 			 * field isn't locked).
795 			 *
796 			 * if MTU on the route is 0, we need to fix the MTU.
797 			 * this case happens with path MTU discovery timeouts.
798 			 */
799 			 mtu = ifmtu;
800 			 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
801 				 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
802 		}
803 	} else {
804 		mtu = nd_ifinfo[ifp->if_index].linkmtu;
805 	}
806 
807 	/* Fake scoped addresses */
808 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
809 		/*
810 		 * If source or destination address is a scoped address, and
811 		 * the packet is going to be sent to a loopback interface,
812 		 * we should keep the original interface.
813 		 */
814 
815 		/*
816 		 * XXX: this is a very experimental and temporary solution.
817 		 * We eventually have sockaddr_in6 and use the sin6_scope_id
818 		 * field of the structure here.
819 		 * We rely on the consistency between two scope zone ids
820 		 * of source and destination, which should already be assured.
821 		 * Larger scopes than link will be supported in the future.
822 		 */
823 		origifp = NULL;
824 		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
825 			origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])];
826 		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
827 			origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])];
828 		/*
829 		 * XXX: origifp can be NULL even in those two cases above.
830 		 * For example, if we remove the (only) link-local address
831 		 * from the loopback interface, and try to send a link-local
832 		 * address without link-id information.  Then the source
833 		 * address is ::1, and the destination address is the
834 		 * link-local address with its s6_addr16[1] being zero.
835 		 * What is worse, if the packet goes to the loopback interface
836 		 * by a default rejected route, the null pointer would be
837 		 * passed to looutput, and the kernel would hang.
838 		 * The following last resort would prevent such disaster.
839 		 */
840 		if (origifp == NULL)
841 			origifp = ifp;
842 	}
843 	else
844 		origifp = ifp;
845 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
846 		ip6->ip6_src.s6_addr16[1] = 0;
847 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
848 		ip6->ip6_dst.s6_addr16[1] = 0;
849 
850 	/*
851 	 * If the outgoing packet contains a hop-by-hop options header,
852 	 * it must be examined and processed even by the source node.
853 	 * (RFC 2460, section 4.)
854 	 */
855 	if (exthdrs.ip6e_hbh) {
856 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh,
857 					   struct ip6_hbh *);
858 		u_int32_t dummy1; /* XXX unused */
859 		u_int32_t dummy2; /* XXX unused */
860 
861 		/*
862 		 *  XXX: if we have to send an ICMPv6 error to the sender,
863 		 *       we need the M_LOOP flag since icmp6_error() expects
864 		 *       the IPv6 and the hop-by-hop options header are
865 		 *       continuous unless the flag is set.
866 		 */
867 		m->m_flags |= M_LOOP;
868 		m->m_pkthdr.rcvif = ifp;
869 		if (ip6_process_hopopts(m,
870 					(u_int8_t *)(hbh + 1),
871 					((hbh->ip6h_len + 1) << 3) -
872 					sizeof(struct ip6_hbh),
873 					&dummy1, &dummy2) < 0) {
874 			/* m was already freed at this point */
875 			error = EINVAL;/* better error? */
876 			goto done;
877 		}
878 		m->m_flags &= ~M_LOOP; /* XXX */
879 		m->m_pkthdr.rcvif = NULL;
880 	}
881 
882 #if NPF > 0
883 	if (pf_test6(PF_OUT, ifp, &m) != PF_PASS) {
884 		error = EHOSTUNREACH;
885 		m_freem(m);
886 		goto done;
887 	}
888 	if (m == NULL)
889 		goto done;
890 	ip6 = mtod(m, struct ip6_hdr *);
891 #endif
892 
893 	/*
894 	 * Send the packet to the outgoing interface.
895 	 * If necessary, do IPv6 fragmentation before sending.
896 	 */
897 	tlen = m->m_pkthdr.len;
898 	if (tlen <= mtu
899 #ifdef notyet
900 	    /*
901 	     * On any link that cannot convey a 1280-octet packet in one piece,
902 	     * link-specific fragmentation and reassembly must be provided at
903 	     * a layer below IPv6. [RFC 2460, sec.5]
904 	     * Thus if the interface has ability of link-level fragmentation,
905 	     * we can just send the packet even if the packet size is
906 	     * larger than the link's MTU.
907 	     * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet...
908 	     */
909 
910 	    || ifp->if_flags & IFF_FRAGMENTABLE
911 #endif
912 	    )
913 	{
914 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
915 		goto done;
916 	} else if (mtu < IPV6_MMTU) {
917 		/*
918 		 * note that path MTU is never less than IPV6_MMTU
919 		 * (see icmp6_input).
920 		 */
921 		error = EMSGSIZE;
922 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
923 		goto bad;
924 	} else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */
925 		error = EMSGSIZE;
926 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
927 		goto bad;
928 	} else {
929 		struct mbuf **mnext, *m_frgpart;
930 		struct ip6_frag *ip6f;
931 		u_int32_t id = htonl(ip6_id++);
932 		u_char nextproto;
933 
934 		/*
935 		 * Too large for the destination or interface;
936 		 * fragment if possible.
937 		 * Must be able to put at least 8 bytes per fragment.
938 		 */
939 		hlen = unfragpartlen;
940 		if (mtu > IPV6_MAXPACKET)
941 			mtu = IPV6_MAXPACKET;
942 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
943 		if (len < 8) {
944 			error = EMSGSIZE;
945 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
946 			goto bad;
947 		}
948 
949 		mnext = &m->m_nextpkt;
950 
951 		/*
952 		 * Change the next header field of the last header in the
953 		 * unfragmentable part.
954 		 */
955 		if (exthdrs.ip6e_rthdr) {
956 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
957 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
958 		} else if (exthdrs.ip6e_dest1) {
959 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
960 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
961 		} else if (exthdrs.ip6e_hbh) {
962 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
963 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
964 		} else {
965 			nextproto = ip6->ip6_nxt;
966 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
967 		}
968 
969 		/*
970 		 * Loop through length of segment after first fragment,
971 		 * make new header and copy data of each part and link onto
972 		 * chain.
973 		 */
974 		m0 = m;
975 		for (off = hlen; off < tlen; off += len) {
976 			MGETHDR(m, M_DONTWAIT, MT_HEADER);
977 			if (!m) {
978 				error = ENOBUFS;
979 				ip6stat.ip6s_odropped++;
980 				goto sendorfree;
981 			}
982 			m->m_flags = m0->m_flags & M_COPYFLAGS;
983 			*mnext = m;
984 			mnext = &m->m_nextpkt;
985 			m->m_data += max_linkhdr;
986 			mhip6 = mtod(m, struct ip6_hdr *);
987 			*mhip6 = *ip6;
988 			m->m_len = sizeof(*mhip6);
989  			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
990  			if (error) {
991 				ip6stat.ip6s_odropped++;
992 				goto sendorfree;
993 			}
994 			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
995 			if (off + len >= tlen)
996 				len = tlen - off;
997 			else
998 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
999 			mhip6->ip6_plen = htons((u_short)(len + hlen +
1000 							  sizeof(*ip6f) -
1001 							  sizeof(struct ip6_hdr)));
1002 			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1003 				error = ENOBUFS;
1004 				ip6stat.ip6s_odropped++;
1005 				goto sendorfree;
1006 			}
1007 			m_cat(m, m_frgpart);
1008 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1009 			m->m_pkthdr.rcvif = (struct ifnet *)0;
1010 			ip6f->ip6f_reserved = 0;
1011 			ip6f->ip6f_ident = id;
1012 			ip6f->ip6f_nxt = nextproto;
1013 			ip6stat.ip6s_ofragments++;
1014 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1015 		}
1016 
1017 		in6_ifstat_inc(ifp, ifs6_out_fragok);
1018 	}
1019 
1020 	/*
1021 	 * Remove leading garbages.
1022 	 */
1023 sendorfree:
1024 	m = m0->m_nextpkt;
1025 	m0->m_nextpkt = 0;
1026 	m_freem(m0);
1027 	for (m0 = m; m; m = m0) {
1028 		m0 = m->m_nextpkt;
1029 		m->m_nextpkt = 0;
1030 		if (error == 0) {
1031 			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1032 		} else
1033 			m_freem(m);
1034 	}
1035 
1036 	if (error == 0)
1037 		ip6stat.ip6s_fragmented++;
1038 
1039 done:
1040 	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
1041 		RTFREE(ro->ro_rt);
1042 	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1043 		RTFREE(ro_pmtu->ro_rt);
1044 	}
1045 
1046 	return(error);
1047 
1048 freehdrs:
1049 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
1050 	m_freem(exthdrs.ip6e_dest1);
1051 	m_freem(exthdrs.ip6e_rthdr);
1052 	m_freem(exthdrs.ip6e_dest2);
1053 	/* fall through */
1054 bad:
1055 	m_freem(m);
1056 	goto done;
1057 }
1058 
1059 static int
1060 ip6_copyexthdr(mp, hdr, hlen)
1061 	struct mbuf **mp;
1062 	caddr_t hdr;
1063 	int hlen;
1064 {
1065 	struct mbuf *m;
1066 
1067 	if (hlen > MCLBYTES)
1068 		return(ENOBUFS); /* XXX */
1069 
1070 	MGET(m, M_DONTWAIT, MT_DATA);
1071 	if (!m)
1072 		return(ENOBUFS);
1073 
1074 	if (hlen > MLEN) {
1075 		MCLGET(m, M_DONTWAIT);
1076 		if ((m->m_flags & M_EXT) == 0) {
1077 			m_free(m);
1078 			return(ENOBUFS);
1079 		}
1080 	}
1081 	m->m_len = hlen;
1082 	if (hdr)
1083 		bcopy(hdr, mtod(m, caddr_t), hlen);
1084 
1085 	*mp = m;
1086 	return(0);
1087 }
1088 
1089 /*
1090  * Insert jumbo payload option.
1091  */
1092 static int
1093 ip6_insert_jumboopt(exthdrs, plen)
1094 	struct ip6_exthdrs *exthdrs;
1095 	u_int32_t plen;
1096 {
1097 	struct mbuf *mopt;
1098 	u_char *optbuf;
1099 	u_int32_t v;
1100 
1101 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
1102 
1103 	/*
1104 	 * If there is no hop-by-hop options header, allocate new one.
1105 	 * If there is one but it doesn't have enough space to store the
1106 	 * jumbo payload option, allocate a cluster to store the whole options.
1107 	 * Otherwise, use it to store the options.
1108 	 */
1109 	if (exthdrs->ip6e_hbh == 0) {
1110 		MGET(mopt, M_DONTWAIT, MT_DATA);
1111 		if (mopt == 0)
1112 			return(ENOBUFS);
1113 		mopt->m_len = JUMBOOPTLEN;
1114 		optbuf = mtod(mopt, u_char *);
1115 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
1116 		exthdrs->ip6e_hbh = mopt;
1117 	} else {
1118 		struct ip6_hbh *hbh;
1119 
1120 		mopt = exthdrs->ip6e_hbh;
1121 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1122 			/*
1123 			 * XXX assumption:
1124 			 * - exthdrs->ip6e_hbh is not referenced from places
1125 			 *   other than exthdrs.
1126 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
1127 			 */
1128 			int oldoptlen = mopt->m_len;
1129 			struct mbuf *n;
1130 
1131 			/*
1132 			 * XXX: give up if the whole (new) hbh header does
1133 			 * not fit even in an mbuf cluster.
1134 			 */
1135 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1136 				return(ENOBUFS);
1137 
1138 			/*
1139 			 * As a consequence, we must always prepare a cluster
1140 			 * at this point.
1141 			 */
1142 			MGET(n, M_DONTWAIT, MT_DATA);
1143 			if (n) {
1144 				MCLGET(n, M_DONTWAIT);
1145 				if ((n->m_flags & M_EXT) == 0) {
1146 					m_freem(n);
1147 					n = NULL;
1148 				}
1149 			}
1150 			if (!n)
1151 				return(ENOBUFS);
1152 			n->m_len = oldoptlen + JUMBOOPTLEN;
1153 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1154 			      oldoptlen);
1155 			optbuf = mtod(n, caddr_t) + oldoptlen;
1156 			m_freem(mopt);
1157 			mopt = exthdrs->ip6e_hbh = n;
1158 		} else {
1159 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
1160 			mopt->m_len += JUMBOOPTLEN;
1161 		}
1162 		optbuf[0] = IP6OPT_PADN;
1163 		optbuf[1] = 1;
1164 
1165 		/*
1166 		 * Adjust the header length according to the pad and
1167 		 * the jumbo payload option.
1168 		 */
1169 		hbh = mtod(mopt, struct ip6_hbh *);
1170 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1171 	}
1172 
1173 	/* fill in the option. */
1174 	optbuf[2] = IP6OPT_JUMBO;
1175 	optbuf[3] = 4;
1176 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1177 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1178 
1179 	/* finally, adjust the packet header length */
1180 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1181 
1182 	return(0);
1183 #undef JUMBOOPTLEN
1184 }
1185 
1186 /*
1187  * Insert fragment header and copy unfragmentable header portions.
1188  */
1189 static int
1190 ip6_insertfraghdr(m0, m, hlen, frghdrp)
1191 	struct mbuf *m0, *m;
1192 	int hlen;
1193 	struct ip6_frag **frghdrp;
1194 {
1195 	struct mbuf *n, *mlast;
1196 
1197 	if (hlen > sizeof(struct ip6_hdr)) {
1198 		n = m_copym(m0, sizeof(struct ip6_hdr),
1199 			    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1200 		if (n == 0)
1201 			return(ENOBUFS);
1202 		m->m_next = n;
1203 	} else
1204 		n = m;
1205 
1206 	/* Search for the last mbuf of unfragmentable part. */
1207 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1208 		;
1209 
1210 	if ((mlast->m_flags & M_EXT) == 0 &&
1211 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1212 		/* use the trailing space of the last mbuf for the fragment hdr */
1213 		*frghdrp =
1214 			(struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len);
1215 		mlast->m_len += sizeof(struct ip6_frag);
1216 		m->m_pkthdr.len += sizeof(struct ip6_frag);
1217 	} else {
1218 		/* allocate a new mbuf for the fragment header */
1219 		struct mbuf *mfrg;
1220 
1221 		MGET(mfrg, M_DONTWAIT, MT_DATA);
1222 		if (mfrg == 0)
1223 			return(ENOBUFS);
1224 		mfrg->m_len = sizeof(struct ip6_frag);
1225 		*frghdrp = mtod(mfrg, struct ip6_frag *);
1226 		mlast->m_next = mfrg;
1227 	}
1228 
1229 	return(0);
1230 }
1231 
1232 /*
1233  * IP6 socket option processing.
1234  */
1235 int
1236 ip6_ctloutput(op, so, level, optname, mp)
1237 	int op;
1238 	struct socket *so;
1239 	int level, optname;
1240 	struct mbuf **mp;
1241 {
1242 	int privileged;
1243 	struct inpcb *inp = sotoinpcb(so);
1244 	struct mbuf *m = *mp;
1245 	int error, optval;
1246 	int optlen;
1247 #ifdef IPSEC
1248 	struct proc *p = curproc; /* XXX */
1249 	struct tdb *tdb;
1250 	struct tdb_ident *tdbip, tdbi;
1251 	int s;
1252 #endif
1253 
1254 	optlen = m ? m->m_len : 0;
1255 	error = optval = 0;
1256 
1257 	privileged = (inp->inp_socket->so_state & SS_PRIV);
1258 
1259 	if (level == IPPROTO_IPV6) {
1260 		switch (op) {
1261 		case PRCO_SETOPT:
1262 			switch (optname) {
1263 			case IPV6_PKTOPTIONS:
1264 				/* m is freed in ip6_pcbopts */
1265 				return(ip6_pcbopts(&inp->inp_outputopts6,
1266 						   m, so));
1267 			case IPV6_HOPOPTS:
1268 			case IPV6_DSTOPTS:
1269 				if (!privileged) {
1270 					error = EPERM;
1271 					break;
1272 				}
1273 				/* fall through */
1274 			case IPV6_UNICAST_HOPS:
1275 			case IPV6_RECVOPTS:
1276 			case IPV6_RECVRETOPTS:
1277 			case IPV6_RECVDSTADDR:
1278 			case IPV6_PKTINFO:
1279 			case IPV6_HOPLIMIT:
1280 			case IPV6_RTHDR:
1281 			case IPV6_CHECKSUM:
1282 			case IPV6_FAITH:
1283 				if (optlen != sizeof(int))
1284 					error = EINVAL;
1285 				else {
1286 					optval = *mtod(m, int *);
1287 					switch (optname) {
1288 
1289 					case IPV6_UNICAST_HOPS:
1290 						if (optval < -1 || optval >= 256)
1291 							error = EINVAL;
1292 						else {
1293 							/* -1 = kernel default */
1294 							inp->inp_hops = optval;
1295 						}
1296 						break;
1297 #define OPTSET(bit) \
1298 	if (optval) \
1299 		inp->inp_flags |= bit; \
1300 	else \
1301 		inp->inp_flags &= ~bit;
1302 					case IPV6_RECVOPTS:
1303 						OPTSET(IN6P_RECVOPTS);
1304 						break;
1305 
1306 					case IPV6_RECVRETOPTS:
1307 						OPTSET(IN6P_RECVRETOPTS);
1308 						break;
1309 
1310 					case IPV6_RECVDSTADDR:
1311 						OPTSET(IN6P_RECVDSTADDR);
1312 						break;
1313 
1314 					case IPV6_PKTINFO:
1315 						OPTSET(IN6P_PKTINFO);
1316 						break;
1317 
1318 					case IPV6_HOPLIMIT:
1319 						OPTSET(IN6P_HOPLIMIT);
1320 						break;
1321 
1322 					case IPV6_HOPOPTS:
1323 						OPTSET(IN6P_HOPOPTS);
1324 						break;
1325 
1326 					case IPV6_DSTOPTS:
1327 						OPTSET(IN6P_DSTOPTS);
1328 						break;
1329 
1330 					case IPV6_RTHDR:
1331 						OPTSET(IN6P_RTHDR);
1332 						break;
1333 
1334 					case IPV6_CHECKSUM:
1335 						inp->in6p_cksum = optval;
1336 						break;
1337 
1338 					case IPV6_FAITH:
1339 						OPTSET(IN6P_FAITH);
1340 						break;
1341 					}
1342 				}
1343 				break;
1344 #undef OPTSET
1345 
1346 			case IPV6_MULTICAST_IF:
1347 			case IPV6_MULTICAST_HOPS:
1348 			case IPV6_MULTICAST_LOOP:
1349 			case IPV6_JOIN_GROUP:
1350 			case IPV6_LEAVE_GROUP:
1351 				error =	ip6_setmoptions(optname,
1352 					&inp->inp_moptions6, m);
1353 				break;
1354 
1355 			case IPV6_PORTRANGE:
1356 				optval = *mtod(m, int *);
1357 
1358 # define in6p		inp
1359 # define in6p_flags	inp_flags
1360 				switch (optval) {
1361 				case IPV6_PORTRANGE_DEFAULT:
1362 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1363 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1364 					break;
1365 
1366 				case IPV6_PORTRANGE_HIGH:
1367 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
1368 					in6p->in6p_flags |= IN6P_HIGHPORT;
1369 					break;
1370 
1371 				case IPV6_PORTRANGE_LOW:
1372 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
1373 					in6p->in6p_flags |= IN6P_LOWPORT;
1374 					break;
1375 
1376 				default:
1377 					error = EINVAL;
1378 					break;
1379 				}
1380 # undef in6p
1381 # undef in6p_flags
1382 				break;
1383 
1384 #if 0 /*KAME IPSEC*/
1385 			case IPV6_IPSEC_POLICY:
1386 			    {
1387 				caddr_t req = NULL;
1388 				if (m != 0)
1389 					req = mtod(m, caddr_t);
1390 				error = ipsec6_set_policy(in6p, optname, req,
1391 				                          privileged);
1392 			    }
1393 				break;
1394 #endif /* IPSEC */
1395 
1396 			case IPSEC6_OUTSA:
1397 #ifndef IPSEC
1398 				error = EINVAL;
1399 #else
1400 				s = spltdb();
1401 				if (m == 0 || m->m_len != sizeof(struct tdb_ident)) {
1402 					error = EINVAL;
1403 				} else {
1404 					tdbip = mtod(m, struct tdb_ident *);
1405 					tdb = gettdb(tdbip->spi, &tdbip->dst,
1406 						     tdbip->proto);
1407 					if (tdb == NULL)
1408 						error = ESRCH;
1409 					else
1410 						tdb_add_inp(tdb, inp, 0);
1411 				}
1412 				splx(s);
1413 #endif /* IPSEC */
1414 				break;
1415 
1416 			case IPV6_AUTH_LEVEL:
1417 			case IPV6_ESP_TRANS_LEVEL:
1418 			case IPV6_ESP_NETWORK_LEVEL:
1419 			case IPV6_IPCOMP_LEVEL:
1420 #ifndef IPSEC
1421 				error = EINVAL;
1422 #else
1423 				if (m == 0 || m->m_len != sizeof(int)) {
1424 					error = EINVAL;
1425 					break;
1426 				}
1427 				optval = *mtod(m, int *);
1428 
1429 				if (optval < IPSEC_LEVEL_BYPASS ||
1430 				    optval > IPSEC_LEVEL_UNIQUE) {
1431 					error = EINVAL;
1432 					break;
1433 				}
1434 
1435 				switch (optname) {
1436 				case IPV6_AUTH_LEVEL:
1437 				        if (optval < ipsec_auth_default_level &&
1438 					    suser(p->p_ucred, &p->p_acflag)) {
1439 						error = EACCES;
1440 						break;
1441 					}
1442 					inp->inp_seclevel[SL_AUTH] = optval;
1443 					break;
1444 
1445 				case IPV6_ESP_TRANS_LEVEL:
1446 				        if (optval < ipsec_esp_trans_default_level &&
1447 					    suser(p->p_ucred, &p->p_acflag)) {
1448 						error = EACCES;
1449 						break;
1450 					}
1451 					inp->inp_seclevel[SL_ESP_TRANS] = optval;
1452 					break;
1453 
1454 				case IPV6_ESP_NETWORK_LEVEL:
1455 				        if (optval < ipsec_esp_network_default_level &&
1456 					    suser(p->p_ucred, &p->p_acflag)) {
1457 						error = EACCES;
1458 						break;
1459 					}
1460 					inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1461 					break;
1462 
1463 				case IPV6_IPCOMP_LEVEL:
1464 				        if (optval < ipsec_ipcomp_default_level &&
1465 					    suser(p->p_ucred, &p->p_acflag)) {
1466 						error = EACCES;
1467 						break;
1468 					}
1469 					inp->inp_seclevel[SL_IPCOMP] = optval;
1470 					break;
1471 				}
1472 				if (!error)
1473 					inp->inp_secrequire = get_sa_require(inp);
1474 #endif
1475 				break;
1476 
1477 
1478 			default:
1479 				error = ENOPROTOOPT;
1480 				break;
1481 			}
1482 			if (m)
1483 				(void)m_free(m);
1484 			break;
1485 
1486 		case PRCO_GETOPT:
1487 			switch (optname) {
1488 
1489 			case IPV6_OPTIONS:
1490 			case IPV6_RETOPTS:
1491 #if 0
1492 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1493 				if (in6p->in6p_options) {
1494 					m->m_len = in6p->in6p_options->m_len;
1495 					bcopy(mtod(in6p->in6p_options, caddr_t),
1496 					      mtod(m, caddr_t),
1497 					      (unsigned)m->m_len);
1498 				} else
1499 					m->m_len = 0;
1500 				break;
1501 #else
1502 				error = ENOPROTOOPT;
1503 				break;
1504 #endif
1505 
1506 			case IPV6_PKTOPTIONS:
1507 				if (inp->inp_options) {
1508 					*mp = m_copym(inp->inp_options, 0,
1509 						      M_COPYALL, M_WAIT);
1510 				} else {
1511 					*mp = m_get(M_WAIT, MT_SOOPTS);
1512 					(*mp)->m_len = 0;
1513 				}
1514 				break;
1515 
1516 			case IPV6_HOPOPTS:
1517 			case IPV6_DSTOPTS:
1518 				if (!privileged) {
1519 					error = EPERM;
1520 					break;
1521 				}
1522 				/* fall through */
1523 			case IPV6_UNICAST_HOPS:
1524 			case IPV6_RECVOPTS:
1525 			case IPV6_RECVRETOPTS:
1526 			case IPV6_RECVDSTADDR:
1527 			case IPV6_PKTINFO:
1528 			case IPV6_HOPLIMIT:
1529 			case IPV6_RTHDR:
1530 			case IPV6_CHECKSUM:
1531 			case IPV6_FAITH:
1532 			case IPV6_PORTRANGE:
1533 				switch (optname) {
1534 
1535 				case IPV6_UNICAST_HOPS:
1536 					optval = inp->inp_hops;
1537 					break;
1538 
1539 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
1540 
1541 				case IPV6_RECVOPTS:
1542 					optval = OPTBIT(IN6P_RECVOPTS);
1543 					break;
1544 
1545 				case IPV6_RECVRETOPTS:
1546 					optval = OPTBIT(IN6P_RECVRETOPTS);
1547 					break;
1548 
1549 				case IPV6_RECVDSTADDR:
1550 					optval = OPTBIT(IN6P_RECVDSTADDR);
1551 					break;
1552 
1553 				case IPV6_PKTINFO:
1554 					optval = OPTBIT(IN6P_PKTINFO);
1555 					break;
1556 
1557 				case IPV6_HOPLIMIT:
1558 					optval = OPTBIT(IN6P_HOPLIMIT);
1559 					break;
1560 
1561 				case IPV6_HOPOPTS:
1562 					optval = OPTBIT(IN6P_HOPOPTS);
1563 					break;
1564 
1565 				case IPV6_DSTOPTS:
1566 					optval = OPTBIT(IN6P_DSTOPTS);
1567 					break;
1568 
1569 				case IPV6_RTHDR:
1570 					optval = OPTBIT(IN6P_RTHDR);
1571 					break;
1572 
1573 				case IPV6_CHECKSUM:
1574 					optval = inp->in6p_cksum;
1575 					break;
1576 
1577 				case IPV6_FAITH:
1578 					optval = OPTBIT(IN6P_FAITH);
1579 					break;
1580 
1581 				case IPV6_PORTRANGE:
1582 				    {
1583 					int flags;
1584 
1585 					flags = inp->inp_flags;
1586 					if (flags & IN6P_HIGHPORT)
1587 						optval = IPV6_PORTRANGE_HIGH;
1588 					else if (flags & IN6P_LOWPORT)
1589 						optval = IPV6_PORTRANGE_LOW;
1590 					else
1591 						optval = 0;
1592 					break;
1593 				    }
1594 				}
1595 				*mp = m = m_get(M_WAIT, MT_SOOPTS);
1596 				m->m_len = sizeof(int);
1597 				*mtod(m, int *) = optval;
1598 				break;
1599 
1600 			case IPV6_MULTICAST_IF:
1601 			case IPV6_MULTICAST_HOPS:
1602 			case IPV6_MULTICAST_LOOP:
1603 			case IPV6_JOIN_GROUP:
1604 			case IPV6_LEAVE_GROUP:
1605 				error = ip6_getmoptions(optname, inp->inp_moptions6, mp);
1606 				break;
1607 
1608 #if 0 /*KAME IPSEC*/
1609 			case IPV6_IPSEC_POLICY:
1610 			  {
1611 				caddr_t req = NULL;
1612 				int len = 0;
1613 
1614 				if (m != 0) {
1615 					req = mtod(m, caddr_t);
1616 					len = m->m_len;
1617 				}
1618 				error = ipsec6_get_policy(in6p, req, mp);
1619 				break;
1620 			  }
1621 #endif /* IPSEC */
1622 
1623 			case IPSEC6_OUTSA:
1624 #ifndef IPSEC
1625 				error = EINVAL;
1626 #else
1627 				s = spltdb();
1628 				if (inp->inp_tdb_out == NULL) {
1629 					error = ENOENT;
1630 				} else {
1631 					tdbi.spi = inp->inp_tdb_out->tdb_spi;
1632 					tdbi.dst = inp->inp_tdb_out->tdb_dst;
1633 					tdbi.proto = inp->inp_tdb_out->tdb_sproto;
1634 					*mp = m = m_get(M_WAIT, MT_SOOPTS);
1635 					m->m_len = sizeof(tdbi);
1636 					bcopy((caddr_t)&tdbi, mtod(m, caddr_t),
1637 					    (unsigned)m->m_len);
1638 				}
1639 				splx(s);
1640 #endif /* IPSEC */
1641 				break;
1642 
1643 			case IPV6_AUTH_LEVEL:
1644 			case IPV6_ESP_TRANS_LEVEL:
1645 			case IPV6_ESP_NETWORK_LEVEL:
1646 			case IPV6_IPCOMP_LEVEL:
1647 #ifndef IPSEC
1648 				m->m_len = sizeof(int);
1649 				*mtod(m, int *) = IPSEC_LEVEL_NONE;
1650 #else
1651 				m->m_len = sizeof(int);
1652 				switch (optname) {
1653 				case IPV6_AUTH_LEVEL:
1654 					optval = inp->inp_seclevel[SL_AUTH];
1655 					break;
1656 
1657 				case IPV6_ESP_TRANS_LEVEL:
1658 					optval =
1659 					    inp->inp_seclevel[SL_ESP_TRANS];
1660 					break;
1661 
1662 				case IPV6_ESP_NETWORK_LEVEL:
1663 					optval =
1664 					    inp->inp_seclevel[SL_ESP_NETWORK];
1665 					break;
1666 
1667 				case IPV6_IPCOMP_LEVEL:
1668 					optval = inp->inp_seclevel[SL_IPCOMP];
1669 					break;
1670 				}
1671 				*mtod(m, int *) = optval;
1672 #endif
1673 				break;
1674 
1675 			default:
1676 				error = ENOPROTOOPT;
1677 				break;
1678 			}
1679 			break;
1680 		}
1681 	} else {
1682 		error = EINVAL;
1683 		if (op == PRCO_SETOPT && *mp)
1684 			(void)m_free(*mp);
1685 	}
1686 	return(error);
1687 }
1688 
1689 /*
1690  * Set up IP6 options in pcb for insertion in output packets.
1691  * Store in mbuf with pointer in pcbopt, adding pseudo-option
1692  * with destination address if source routed.
1693  */
1694 static int
1695 ip6_pcbopts(pktopt, m, so)
1696 	struct ip6_pktopts **pktopt;
1697 	struct mbuf *m;
1698 	struct socket *so;
1699 {
1700 	struct ip6_pktopts *opt = *pktopt;
1701 	int error = 0;
1702 	struct proc *p = curproc;	/* XXX */
1703 	int priv = 0;
1704 
1705 	/* turn off any old options. */
1706 	if (opt) {
1707 		if (opt->ip6po_m)
1708 			(void)m_free(opt->ip6po_m);
1709 	}
1710 	else
1711 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
1712 	*pktopt = 0;
1713 
1714 	if (!m || m->m_len == 0) {
1715 		/*
1716 		 * Only turning off any previous options.
1717 		 */
1718 		if (opt)
1719 			free(opt, M_IP6OPT);
1720 		if (m)
1721 			(void)m_free(m);
1722 		return(0);
1723 	}
1724 
1725 	/*  set options specified by user. */
1726 	if (p && !suser(p->p_ucred, &p->p_acflag))
1727 		priv = 1;
1728 	if ((error = ip6_setpktoptions(m, opt, priv)) != 0) {
1729 		(void)m_free(m);
1730 		return(error);
1731 	}
1732 	*pktopt = opt;
1733 	return(0);
1734 }
1735 
1736 /*
1737  * Set the IP6 multicast options in response to user setsockopt().
1738  */
1739 static int
1740 ip6_setmoptions(optname, im6op, m)
1741 	int optname;
1742 	struct ip6_moptions **im6op;
1743 	struct mbuf *m;
1744 {
1745 	int error = 0;
1746 	u_int loop, ifindex;
1747 	struct ipv6_mreq *mreq;
1748 	struct ifnet *ifp;
1749 	struct ip6_moptions *im6o = *im6op;
1750 	struct route_in6 ro;
1751 	struct sockaddr_in6 *dst;
1752 	struct in6_multi_mship *imm;
1753 	struct proc *p = curproc;	/* XXX */
1754 
1755 	if (im6o == NULL) {
1756 		/*
1757 		 * No multicast option buffer attached to the pcb;
1758 		 * allocate one and initialize to default values.
1759 		 */
1760 		im6o = (struct ip6_moptions *)
1761 			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
1762 
1763 		*im6op = im6o;
1764 		im6o->im6o_multicast_ifp = NULL;
1765 		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
1766 		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
1767 		LIST_INIT(&im6o->im6o_memberships);
1768 	}
1769 
1770 	switch (optname) {
1771 
1772 	case IPV6_MULTICAST_IF:
1773 		/*
1774 		 * Select the interface for outgoing multicast packets.
1775 		 */
1776 		if (m == NULL || m->m_len != sizeof(u_int)) {
1777 			error = EINVAL;
1778 			break;
1779 		}
1780 		ifindex = *(mtod(m, u_int *));
1781 		if (ifindex < 0 || if_index < ifindex) {
1782 			error = ENXIO;	/* XXX EINVAL? */
1783 			break;
1784 		}
1785 		ifp = ifindex2ifnet[ifindex];
1786 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1787 			error = EADDRNOTAVAIL;
1788 			break;
1789 		}
1790 		im6o->im6o_multicast_ifp = ifp;
1791 		break;
1792 
1793 	case IPV6_MULTICAST_HOPS:
1794 	    {
1795 		/*
1796 		 * Set the IP6 hoplimit for outgoing multicast packets.
1797 		 */
1798 		int optval;
1799 		if (m == NULL || m->m_len != sizeof(int)) {
1800 			error = EINVAL;
1801 			break;
1802 		}
1803 		optval = *(mtod(m, u_int *));
1804 		if (optval < -1 || optval >= 256)
1805 			error = EINVAL;
1806 		else if (optval == -1)
1807 			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
1808 		else
1809 			im6o->im6o_multicast_hlim = optval;
1810 		break;
1811 	    }
1812 
1813 	case IPV6_MULTICAST_LOOP:
1814 		/*
1815 		 * Set the loopback flag for outgoing multicast packets.
1816 		 * Must be zero or one.
1817 		 */
1818 		if (m == NULL || m->m_len != sizeof(u_int) ||
1819 		   (loop = *(mtod(m, u_int *))) > 1) {
1820 			error = EINVAL;
1821 			break;
1822 		}
1823 		im6o->im6o_multicast_loop = loop;
1824 		break;
1825 
1826 	case IPV6_JOIN_GROUP:
1827 		/*
1828 		 * Add a multicast group membership.
1829 		 * Group must be a valid IP6 multicast address.
1830 		 */
1831 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
1832 			error = EINVAL;
1833 			break;
1834 		}
1835 		mreq = mtod(m, struct ipv6_mreq *);
1836 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
1837 			/*
1838 			 * We use the unspecified address to specify to accept
1839 			 * all multicast addresses. Only super user is allowed
1840 			 * to do this.
1841 			 */
1842 			if (suser(p->p_ucred, &p->p_acflag))
1843 			{
1844 				error = EACCES;
1845 				break;
1846 			}
1847 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
1848 			error = EINVAL;
1849 			break;
1850 		}
1851 
1852 		/*
1853 		 * If the interface is specified, validate it.
1854 		 */
1855 		if (mreq->ipv6mr_interface < 0
1856 		 || if_index < mreq->ipv6mr_interface) {
1857 			error = ENXIO;	/* XXX EINVAL? */
1858 			break;
1859 		}
1860 		/*
1861 		 * If no interface was explicitly specified, choose an
1862 		 * appropriate one according to the given multicast address.
1863 		 */
1864 		if (mreq->ipv6mr_interface == 0) {
1865 			/*
1866 			 * If the multicast address is in node-local scope,
1867 			 * the interface should be a loopback interface.
1868 			 * Otherwise, look up the routing table for the
1869 			 * address, and choose the outgoing interface.
1870 			 *   XXX: is it a good approach?
1871 			 */
1872 			if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) {
1873 				ifp = lo0ifp;
1874 			}
1875 			else {
1876 				ro.ro_rt = NULL;
1877 				dst = (struct sockaddr_in6 *)&ro.ro_dst;
1878 				bzero(dst, sizeof(*dst));
1879 				dst->sin6_len = sizeof(struct sockaddr_in6);
1880 				dst->sin6_family = AF_INET6;
1881 				dst->sin6_addr = mreq->ipv6mr_multiaddr;
1882 				rtalloc((struct route *)&ro);
1883 				if (ro.ro_rt == NULL) {
1884 					error = EADDRNOTAVAIL;
1885 					break;
1886 				}
1887 				ifp = ro.ro_rt->rt_ifp;
1888 				rtfree(ro.ro_rt);
1889 			}
1890 		} else
1891 			ifp = ifindex2ifnet[mreq->ipv6mr_interface];
1892 
1893 		/*
1894 		 * See if we found an interface, and confirm that it
1895 		 * supports multicast
1896 		 */
1897 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1898 			error = EADDRNOTAVAIL;
1899 			break;
1900 		}
1901 		/*
1902 		 * Put interface index into the multicast address,
1903 		 * if the address has link-local scope.
1904 		 */
1905 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
1906 			mreq->ipv6mr_multiaddr.s6_addr16[1]
1907 				= htons(mreq->ipv6mr_interface);
1908 		}
1909 		/*
1910 		 * See if the membership already exists.
1911 		 */
1912 		for (imm = im6o->im6o_memberships.lh_first;
1913 		     imm != NULL; imm = imm->i6mm_chain.le_next)
1914 			if (imm->i6mm_maddr->in6m_ifp == ifp &&
1915 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
1916 					       &mreq->ipv6mr_multiaddr))
1917 				break;
1918 		if (imm != NULL) {
1919 			error = EADDRINUSE;
1920 			break;
1921 		}
1922 		/*
1923 		 * Everything looks good; add a new record to the multicast
1924 		 * address list for the given interface.
1925 		 */
1926 		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
1927 
1928 		if ((imm->i6mm_maddr =
1929 		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
1930 			free(imm, M_IPMADDR);
1931 			break;
1932 		}
1933 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
1934 		break;
1935 
1936 	case IPV6_LEAVE_GROUP:
1937 		/*
1938 		 * Drop a multicast group membership.
1939 		 * Group must be a valid IP6 multicast address.
1940 		 */
1941 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
1942 			error = EINVAL;
1943 			break;
1944 		}
1945 		mreq = mtod(m, struct ipv6_mreq *);
1946 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
1947 			if (suser(p->p_ucred, &p->p_acflag))
1948 			{
1949 				error = EACCES;
1950 				break;
1951 			}
1952 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
1953 			error = EINVAL;
1954 			break;
1955 		}
1956 		/*
1957 		 * If an interface address was specified, get a pointer
1958 		 * to its ifnet structure.
1959 		 */
1960 		if (mreq->ipv6mr_interface < 0
1961 		 || if_index < mreq->ipv6mr_interface) {
1962 			error = ENXIO;	/* XXX EINVAL? */
1963 			break;
1964 		}
1965 		ifp = ifindex2ifnet[mreq->ipv6mr_interface];
1966 		/*
1967 		 * Put interface index into the multicast address,
1968 		 * if the address has link-local scope.
1969 		 */
1970 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
1971 			mreq->ipv6mr_multiaddr.s6_addr16[1]
1972 				= htons(mreq->ipv6mr_interface);
1973 		}
1974 		/*
1975 		 * Find the membership in the membership list.
1976 		 */
1977 		for (imm = im6o->im6o_memberships.lh_first;
1978 		     imm != NULL; imm = imm->i6mm_chain.le_next) {
1979 			if ((ifp == NULL ||
1980 			     imm->i6mm_maddr->in6m_ifp == ifp) &&
1981 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
1982 					       &mreq->ipv6mr_multiaddr))
1983 				break;
1984 		}
1985 		if (imm == NULL) {
1986 			/* Unable to resolve interface */
1987 			error = EADDRNOTAVAIL;
1988 			break;
1989 		}
1990 		/*
1991 		 * Give up the multicast address record to which the
1992 		 * membership points.
1993 		 */
1994 		LIST_REMOVE(imm, i6mm_chain);
1995 		in6_delmulti(imm->i6mm_maddr);
1996 		free(imm, M_IPMADDR);
1997 		break;
1998 
1999 	default:
2000 		error = EOPNOTSUPP;
2001 		break;
2002 	}
2003 
2004 	/*
2005 	 * If all options have default values, no need to keep the mbuf.
2006 	 */
2007 	if (im6o->im6o_multicast_ifp == NULL &&
2008 	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
2009 	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
2010 	    im6o->im6o_memberships.lh_first == NULL) {
2011 		free(*im6op, M_IPMOPTS);
2012 		*im6op = NULL;
2013 	}
2014 
2015 	return(error);
2016 }
2017 
2018 /*
2019  * Return the IP6 multicast options in response to user getsockopt().
2020  */
2021 static int
2022 ip6_getmoptions(optname, im6o, mp)
2023 	int optname;
2024 	struct ip6_moptions *im6o;
2025 	struct mbuf **mp;
2026 {
2027 	u_int *hlim, *loop, *ifindex;
2028 
2029 	*mp = m_get(M_WAIT, MT_SOOPTS);
2030 
2031 	switch (optname) {
2032 
2033 	case IPV6_MULTICAST_IF:
2034 		ifindex = mtod(*mp, u_int *);
2035 		(*mp)->m_len = sizeof(u_int);
2036 		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
2037 			*ifindex = 0;
2038 		else
2039 			*ifindex = im6o->im6o_multicast_ifp->if_index;
2040 		return(0);
2041 
2042 	case IPV6_MULTICAST_HOPS:
2043 		hlim = mtod(*mp, u_int *);
2044 		(*mp)->m_len = sizeof(u_int);
2045 		if (im6o == NULL)
2046 			*hlim = ip6_defmcasthlim;
2047 		else
2048 			*hlim = im6o->im6o_multicast_hlim;
2049 		return(0);
2050 
2051 	case IPV6_MULTICAST_LOOP:
2052 		loop = mtod(*mp, u_int *);
2053 		(*mp)->m_len = sizeof(u_int);
2054 		if (im6o == NULL)
2055 			*loop = ip6_defmcasthlim;
2056 		else
2057 			*loop = im6o->im6o_multicast_loop;
2058 		return(0);
2059 
2060 	default:
2061 		return(EOPNOTSUPP);
2062 	}
2063 }
2064 
2065 /*
2066  * Discard the IP6 multicast options.
2067  */
2068 void
2069 ip6_freemoptions(im6o)
2070 	struct ip6_moptions *im6o;
2071 {
2072 	struct in6_multi_mship *imm;
2073 
2074 	if (im6o == NULL)
2075 		return;
2076 
2077 	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
2078 		LIST_REMOVE(imm, i6mm_chain);
2079 		if (imm->i6mm_maddr)
2080 			in6_delmulti(imm->i6mm_maddr);
2081 		free(imm, M_IPMADDR);
2082 	}
2083 	free(im6o, M_IPMOPTS);
2084 }
2085 
2086 /*
2087  * Set IPv6 outgoing packet options based on advanced API.
2088  */
2089 int
2090 ip6_setpktoptions(control, opt, priv)
2091 	struct mbuf *control;
2092 	struct ip6_pktopts *opt;
2093 	int priv;
2094 {
2095 	struct cmsghdr *cm = 0;
2096 
2097 	if (control == 0 || opt == 0)
2098 		return(EINVAL);
2099 
2100 	bzero(opt, sizeof(*opt));
2101 	opt->ip6po_hlim = -1; /* -1 means to use default hop limit */
2102 
2103 	/*
2104 	 * XXX: Currently, we assume all the optional information is stored
2105 	 * in a single mbuf.
2106 	 */
2107 	if (control->m_next)
2108 		return(EINVAL);
2109 
2110 	opt->ip6po_m = control;
2111 
2112 	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
2113 		     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
2114 		cm = mtod(control, struct cmsghdr *);
2115 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
2116 			return(EINVAL);
2117 		if (cm->cmsg_level != IPPROTO_IPV6)
2118 			continue;
2119 
2120 		switch(cm->cmsg_type) {
2121 		case IPV6_PKTINFO:
2122 			if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo)))
2123 				return(EINVAL);
2124 			opt->ip6po_pktinfo = (struct in6_pktinfo *)CMSG_DATA(cm);
2125 			if (opt->ip6po_pktinfo->ipi6_ifindex &&
2126 			    IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr))
2127 				opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] =
2128 					htons(opt->ip6po_pktinfo->ipi6_ifindex);
2129 
2130 			if (opt->ip6po_pktinfo->ipi6_ifindex > if_index
2131 			 || opt->ip6po_pktinfo->ipi6_ifindex < 0) {
2132 				return(ENXIO);
2133 			}
2134 
2135 			/*
2136 			 * Check if the requested source address is indeed a
2137 			 * unicast address assigned to the node, and can be
2138 			 * used as the packet's source address.
2139 			 */
2140 			if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) {
2141 				struct ifaddr *ia;
2142 				struct in6_ifaddr *ia6;
2143 				struct sockaddr_in6 sin6;
2144 
2145 				bzero(&sin6, sizeof(sin6));
2146 				sin6.sin6_len = sizeof(sin6);
2147 				sin6.sin6_family = AF_INET6;
2148 				sin6.sin6_addr =
2149 					opt->ip6po_pktinfo->ipi6_addr;
2150 				ia = ifa_ifwithaddr(sin6tosa(&sin6));
2151 				if (ia == NULL ||
2152 				    (opt->ip6po_pktinfo->ipi6_ifindex &&
2153 				     (ia->ifa_ifp->if_index !=
2154 				      opt->ip6po_pktinfo->ipi6_ifindex))) {
2155 					return(EADDRNOTAVAIL);
2156 				}
2157 				ia6 = (struct in6_ifaddr *)ia;
2158 				if ((ia6->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) != 0) {
2159 					return(EADDRNOTAVAIL);
2160 				}
2161 
2162 				/*
2163 				 * Check if the requested source address is
2164 				 * indeed a unicast address assigned to the
2165 				 * node.
2166 				 */
2167 				if (IN6_IS_ADDR_MULTICAST(&opt->ip6po_pktinfo->ipi6_addr))
2168 					return(EADDRNOTAVAIL);
2169 			}
2170 			break;
2171 
2172 		case IPV6_HOPLIMIT:
2173 			if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
2174 				return(EINVAL);
2175 
2176 			opt->ip6po_hlim = *(int *)CMSG_DATA(cm);
2177 			if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255)
2178 				return(EINVAL);
2179 			break;
2180 
2181 		case IPV6_NEXTHOP:
2182 			if (!priv)
2183 				return(EPERM);
2184 
2185 			if (cm->cmsg_len < sizeof(u_char) ||
2186 			    /* check if cmsg_len is large enough for sa_len */
2187 			    cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm)))
2188 				return(EINVAL);
2189 
2190 			opt->ip6po_nexthop = (struct sockaddr *)CMSG_DATA(cm);
2191 
2192 			break;
2193 
2194 		case IPV6_HOPOPTS:
2195 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh)))
2196 				return(EINVAL);
2197 			opt->ip6po_hbh = (struct ip6_hbh *)CMSG_DATA(cm);
2198 			if (cm->cmsg_len !=
2199 			    CMSG_LEN((opt->ip6po_hbh->ip6h_len + 1) << 3))
2200 				return(EINVAL);
2201 			break;
2202 
2203 		case IPV6_DSTOPTS:
2204 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest)))
2205 				return(EINVAL);
2206 
2207 			/*
2208 			 * If there is no routing header yet, the destination
2209 			 * options header should be put on the 1st part.
2210 			 * Otherwise, the header should be on the 2nd part.
2211 			 * (See RFC 2460, section 4.1)
2212 			 */
2213 			if (opt->ip6po_rthdr == NULL) {
2214 				opt->ip6po_dest1 =
2215 					(struct ip6_dest *)CMSG_DATA(cm);
2216 				if (cm->cmsg_len !=
2217 				    CMSG_LEN((opt->ip6po_dest1->ip6d_len + 1)
2218 					     << 3))
2219 					return(EINVAL);
2220 			}
2221 			else {
2222 				opt->ip6po_dest2 =
2223 					(struct ip6_dest *)CMSG_DATA(cm);
2224 				if (cm->cmsg_len !=
2225 				    CMSG_LEN((opt->ip6po_dest2->ip6d_len + 1)
2226 					     << 3))
2227 					return(EINVAL);
2228 			}
2229 			break;
2230 
2231 		case IPV6_RTHDR:
2232 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr)))
2233 				return(EINVAL);
2234 			opt->ip6po_rthdr = (struct ip6_rthdr *)CMSG_DATA(cm);
2235 			if (cm->cmsg_len !=
2236 			    CMSG_LEN((opt->ip6po_rthdr->ip6r_len + 1) << 3))
2237 				return(EINVAL);
2238 			switch(opt->ip6po_rthdr->ip6r_type) {
2239 			case IPV6_RTHDR_TYPE_0:
2240 				if (opt->ip6po_rthdr->ip6r_segleft == 0)
2241 					return(EINVAL);
2242 				break;
2243 			default:
2244 				return(EINVAL);
2245 			}
2246 			break;
2247 
2248 		default:
2249 			return(ENOPROTOOPT);
2250 		}
2251 	}
2252 
2253 	return(0);
2254 }
2255 
2256 /*
2257  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
2258  * packet to the input queue of a specified interface.  Note that this
2259  * calls the output routine of the loopback "driver", but with an interface
2260  * pointer that might NOT be lo0ifp -- easier than replicating that code here.
2261  */
2262 void
2263 ip6_mloopback(ifp, m, dst)
2264 	struct ifnet *ifp;
2265 	struct mbuf *m;
2266 	struct sockaddr_in6 *dst;
2267 {
2268 	struct mbuf *copym;
2269 	struct ip6_hdr *ip6;
2270 
2271 	copym = m_copy(m, 0, M_COPYALL);
2272 	if (copym == NULL)
2273 		return;
2274 
2275 	/*
2276 	 * Make sure to deep-copy IPv6 header portion in case the data
2277 	 * is in an mbuf cluster, so that we can safely override the IPv6
2278 	 * header portion later.
2279 	 */
2280 	if ((copym->m_flags & M_EXT) != 0 ||
2281 	    copym->m_len < sizeof(struct ip6_hdr)) {
2282 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
2283 		if (copym == NULL)
2284 			return;
2285 	}
2286 
2287 #ifdef DIAGNOSTIC
2288 	if (copym->m_len < sizeof(*ip6)) {
2289 		m_freem(copym);
2290 		return;
2291 	}
2292 #endif
2293 
2294 	ip6 = mtod(copym, struct ip6_hdr *);
2295 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
2296 		ip6->ip6_src.s6_addr16[1] = 0;
2297 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
2298 		ip6->ip6_dst.s6_addr16[1] = 0;
2299 
2300 	(void)looutput(ifp, copym, (struct sockaddr *)dst, NULL);
2301 }
2302 
2303 /*
2304  * Chop IPv6 header off from the payload.
2305  */
2306 static int
2307 ip6_splithdr(m, exthdrs)
2308 	struct mbuf *m;
2309 	struct ip6_exthdrs *exthdrs;
2310 {
2311 	struct mbuf *mh;
2312 	struct ip6_hdr *ip6;
2313 
2314 	ip6 = mtod(m, struct ip6_hdr *);
2315 	if (m->m_len > sizeof(*ip6)) {
2316 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
2317 		if (mh == 0) {
2318 			m_freem(m);
2319 			return ENOBUFS;
2320 		}
2321 		M_MOVE_PKTHDR(mh, m);
2322 		MH_ALIGN(mh, sizeof(*ip6));
2323 		m->m_len -= sizeof(*ip6);
2324 		m->m_data += sizeof(*ip6);
2325 		mh->m_next = m;
2326 		m = mh;
2327 		m->m_len = sizeof(*ip6);
2328 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
2329 	}
2330 	exthdrs->ip6e_ip6 = m;
2331 	return 0;
2332 }
2333 
2334 /*
2335  * Compute IPv6 extension header length.
2336  */
2337 # define in6pcb	inpcb
2338 # define in6p_outputopts	inp_outputopts6
2339 int
2340 ip6_optlen(in6p)
2341 	struct in6pcb *in6p;
2342 {
2343 	int len;
2344 
2345 	if (!in6p->in6p_outputopts)
2346 		return 0;
2347 
2348 	len = 0;
2349 #define elen(x) \
2350     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
2351 
2352 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
2353 	len += elen(in6p->in6p_outputopts->ip6po_dest1);
2354 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
2355 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
2356 	return len;
2357 #undef elen
2358 }
2359 # undef in6pcb
2360 # undef in6p_outputopts
2361