1 /* $OpenBSD: ip6_output.c,v 1.105 2008/09/03 08:41:57 mpf Exp $ */ 2 /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 62 */ 63 64 #include "pf.h" 65 66 #include <sys/param.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/errno.h> 70 #include <sys/protosw.h> 71 #include <sys/socket.h> 72 #include <sys/socketvar.h> 73 #include <sys/systm.h> 74 #include <sys/proc.h> 75 76 #include <net/if.h> 77 #include <net/route.h> 78 79 #include <netinet/in.h> 80 #include <netinet/in_var.h> 81 #include <netinet/in_systm.h> 82 #include <netinet/ip.h> 83 #include <netinet/in_pcb.h> 84 85 #include <netinet/ip6.h> 86 #include <netinet/icmp6.h> 87 #include <netinet6/ip6_var.h> 88 #include <netinet6/nd6.h> 89 #include <netinet6/ip6protosw.h> 90 91 #include <crypto/idgen.h> 92 93 #if NPF > 0 94 #include <net/pfvar.h> 95 #endif 96 97 #ifdef IPSEC 98 #include <netinet/ip_ipsp.h> 99 #include <netinet/ip_ah.h> 100 #include <netinet/ip_esp.h> 101 #include <netinet/udp.h> 102 #include <netinet/tcp.h> 103 #include <net/pfkeyv2.h> 104 105 extern u_int8_t get_sa_require(struct inpcb *); 106 107 extern int ipsec_auth_default_level; 108 extern int ipsec_esp_trans_default_level; 109 extern int ipsec_esp_network_default_level; 110 extern int ipsec_ipcomp_default_level; 111 #endif /* IPSEC */ 112 113 struct ip6_exthdrs { 114 struct mbuf *ip6e_ip6; 115 struct mbuf *ip6e_hbh; 116 struct mbuf *ip6e_dest1; 117 struct mbuf *ip6e_rthdr; 118 struct mbuf *ip6e_dest2; 119 }; 120 121 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int, int); 122 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *); 123 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct mbuf **); 124 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int, int, 125 int, int); 126 static int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *); 127 static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **); 128 static int ip6_copyexthdr(struct mbuf **, caddr_t, int); 129 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, 130 struct ip6_frag **); 131 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); 132 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); 133 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *, 134 struct ifnet *, struct in6_addr *, u_long *, int *); 135 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); 136 137 /* Context for non-repeating IDs */ 138 struct idgen32_ctx ip6_id_ctx; 139 140 /* 141 * IP6 output. The packet in mbuf chain m contains a skeletal IP6 142 * header (with pri, len, nxt, hlim, src, dst). 143 * This function may modify ver and hlim only. 144 * The mbuf chain containing the packet will be freed. 145 * The mbuf opt, if present, will not be freed. 146 * 147 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and 148 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, 149 * which is rt_rmx.rmx_mtu. 150 * 151 * ifpp - XXX: just for statistics 152 */ 153 int 154 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, 155 int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, 156 struct inpcb *inp) 157 { 158 struct ip6_hdr *ip6, *mhip6; 159 struct ifnet *ifp, *origifp = NULL; 160 struct mbuf *m = m0; 161 int hlen, tlen, len, off; 162 struct route_in6 ip6route; 163 struct rtentry *rt = NULL; 164 struct sockaddr_in6 *dst, dstsock; 165 int error = 0; 166 struct in6_ifaddr *ia = NULL; 167 u_long mtu; 168 int alwaysfrag, dontfrag; 169 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; 170 struct ip6_exthdrs exthdrs; 171 struct in6_addr finaldst; 172 struct route_in6 *ro_pmtu = NULL; 173 int hdrsplit = 0; 174 u_int8_t sproto = 0; 175 #ifdef IPSEC 176 struct m_tag *mtag; 177 union sockaddr_union sdst; 178 struct tdb_ident *tdbi; 179 u_int32_t sspi; 180 struct tdb *tdb; 181 int s; 182 #endif /* IPSEC */ 183 184 #ifdef IPSEC 185 if (inp && (inp->inp_flags & INP_IPV6) == 0) 186 panic("ip6_output: IPv4 pcb is passed"); 187 #endif /* IPSEC */ 188 189 ip6 = mtod(m, struct ip6_hdr *); 190 finaldst = ip6->ip6_dst; 191 192 #define MAKE_EXTHDR(hp, mp) \ 193 do { \ 194 if (hp) { \ 195 struct ip6_ext *eh = (struct ip6_ext *)(hp); \ 196 error = ip6_copyexthdr((mp), (caddr_t)(hp), \ 197 ((eh)->ip6e_len + 1) << 3); \ 198 if (error) \ 199 goto freehdrs; \ 200 } \ 201 } while (0) 202 203 bzero(&exthdrs, sizeof(exthdrs)); 204 205 if (opt) { 206 /* Hop-by-Hop options header */ 207 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); 208 /* Destination options header(1st part) */ 209 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); 210 /* Routing header */ 211 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); 212 /* Destination options header(2nd part) */ 213 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); 214 } 215 216 #ifdef IPSEC 217 if (!ipsec_in_use && !inp) 218 goto done_spd; 219 220 /* 221 * splnet is chosen over spltdb because we are not allowed to 222 * lower the level, and udp6_output calls us in splnet(). XXX check 223 */ 224 s = splnet(); 225 226 /* 227 * Check if there was an outgoing SA bound to the flow 228 * from a transport protocol. 229 */ 230 ip6 = mtod(m, struct ip6_hdr *); 231 232 /* Do we have any pending SAs to apply ? */ 233 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 234 if (mtag != NULL) { 235 #ifdef DIAGNOSTIC 236 if (mtag->m_tag_len != sizeof (struct tdb_ident)) 237 panic("ip6_output: tag of length %d (should be %d", 238 mtag->m_tag_len, sizeof (struct tdb_ident)); 239 #endif 240 tdbi = (struct tdb_ident *)(mtag + 1); 241 tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto); 242 if (tdb == NULL) 243 error = -EINVAL; 244 m_tag_delete(m, mtag); 245 } else 246 tdb = ipsp_spd_lookup(m, AF_INET6, sizeof(struct ip6_hdr), 247 &error, IPSP_DIRECTION_OUT, NULL, inp); 248 249 if (tdb == NULL) { 250 splx(s); 251 252 if (error == 0) { 253 /* 254 * No IPsec processing required, we'll just send the 255 * packet out. 256 */ 257 sproto = 0; 258 259 /* Fall through to routing/multicast handling */ 260 } else { 261 /* 262 * -EINVAL is used to indicate that the packet should 263 * be silently dropped, typically because we've asked 264 * key management for an SA. 265 */ 266 if (error == -EINVAL) /* Should silently drop packet */ 267 error = 0; 268 269 goto freehdrs; 270 } 271 } else { 272 /* Loop detection */ 273 for (mtag = m_tag_first(m); mtag != NULL; 274 mtag = m_tag_next(m, mtag)) { 275 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 276 mtag->m_tag_id != 277 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 278 continue; 279 tdbi = (struct tdb_ident *)(mtag + 1); 280 if (tdbi->spi == tdb->tdb_spi && 281 tdbi->proto == tdb->tdb_sproto && 282 !bcmp(&tdbi->dst, &tdb->tdb_dst, 283 sizeof(union sockaddr_union))) { 284 splx(s); 285 sproto = 0; /* mark as no-IPsec-needed */ 286 goto done_spd; 287 } 288 } 289 290 /* We need to do IPsec */ 291 bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst)); 292 sspi = tdb->tdb_spi; 293 sproto = tdb->tdb_sproto; 294 splx(s); 295 } 296 297 /* Fall through to the routing/multicast handling code */ 298 done_spd: 299 #endif /* IPSEC */ 300 301 /* 302 * Calculate the total length of the extension header chain. 303 * Keep the length of the unfragmentable part for fragmentation. 304 */ 305 optlen = 0; 306 if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; 307 if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; 308 if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; 309 unfragpartlen = optlen + sizeof(struct ip6_hdr); 310 /* NOTE: we don't add AH/ESP length here. do that later. */ 311 if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; 312 313 /* 314 * If we need IPsec, or there is at least one extension header, 315 * separate IP6 header from the payload. 316 */ 317 if ((sproto || optlen) && !hdrsplit) { 318 if ((error = ip6_splithdr(m, &exthdrs)) != 0) { 319 m = NULL; 320 goto freehdrs; 321 } 322 m = exthdrs.ip6e_ip6; 323 hdrsplit++; 324 } 325 326 /* adjust pointer */ 327 ip6 = mtod(m, struct ip6_hdr *); 328 329 /* adjust mbuf packet header length */ 330 m->m_pkthdr.len += optlen; 331 plen = m->m_pkthdr.len - sizeof(*ip6); 332 333 /* If this is a jumbo payload, insert a jumbo payload option. */ 334 if (plen > IPV6_MAXPACKET) { 335 if (!hdrsplit) { 336 if ((error = ip6_splithdr(m, &exthdrs)) != 0) { 337 m = NULL; 338 goto freehdrs; 339 } 340 m = exthdrs.ip6e_ip6; 341 hdrsplit++; 342 } 343 /* adjust pointer */ 344 ip6 = mtod(m, struct ip6_hdr *); 345 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) 346 goto freehdrs; 347 ip6->ip6_plen = 0; 348 } else 349 ip6->ip6_plen = htons(plen); 350 351 /* 352 * Concatenate headers and fill in next header fields. 353 * Here we have, on "m" 354 * IPv6 payload 355 * and we insert headers accordingly. Finally, we should be getting: 356 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] 357 * 358 * during the header composing process, "m" points to IPv6 header. 359 * "mprev" points to an extension header prior to esp. 360 */ 361 { 362 u_char *nexthdrp = &ip6->ip6_nxt; 363 struct mbuf *mprev = m; 364 365 /* 366 * we treat dest2 specially. this makes IPsec processing 367 * much easier. the goal here is to make mprev point the 368 * mbuf prior to dest2. 369 * 370 * result: IPv6 dest2 payload 371 * m and mprev will point to IPv6 header. 372 */ 373 if (exthdrs.ip6e_dest2) { 374 if (!hdrsplit) 375 panic("assumption failed: hdr not split"); 376 exthdrs.ip6e_dest2->m_next = m->m_next; 377 m->m_next = exthdrs.ip6e_dest2; 378 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; 379 ip6->ip6_nxt = IPPROTO_DSTOPTS; 380 } 381 382 #define MAKE_CHAIN(m, mp, p, i)\ 383 do {\ 384 if (m) {\ 385 if (!hdrsplit) \ 386 panic("assumption failed: hdr not split"); \ 387 *mtod((m), u_char *) = *(p);\ 388 *(p) = (i);\ 389 p = mtod((m), u_char *);\ 390 (m)->m_next = (mp)->m_next;\ 391 (mp)->m_next = (m);\ 392 (mp) = (m);\ 393 }\ 394 } while (0) 395 /* 396 * result: IPv6 hbh dest1 rthdr dest2 payload 397 * m will point to IPv6 header. mprev will point to the 398 * extension header prior to dest2 (rthdr in the above case). 399 */ 400 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); 401 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, 402 IPPROTO_DSTOPTS); 403 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, 404 IPPROTO_ROUTING); 405 } 406 407 /* 408 * If there is a routing header, replace the destination address field 409 * with the first hop of the routing header. 410 */ 411 if (exthdrs.ip6e_rthdr) { 412 struct ip6_rthdr *rh; 413 struct ip6_rthdr0 *rh0; 414 struct in6_addr *addr; 415 416 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr, 417 struct ip6_rthdr *)); 418 switch (rh->ip6r_type) { 419 case IPV6_RTHDR_TYPE_0: 420 rh0 = (struct ip6_rthdr0 *)rh; 421 addr = (struct in6_addr *)(rh0 + 1); 422 ip6->ip6_dst = addr[0]; 423 bcopy(&addr[1], &addr[0], 424 sizeof(struct in6_addr) * (rh0->ip6r0_segleft - 1)); 425 addr[rh0->ip6r0_segleft - 1] = finaldst; 426 break; 427 default: /* is it possible? */ 428 error = EINVAL; 429 goto bad; 430 } 431 } 432 433 /* Source address validation */ 434 if (!(flags & IPV6_UNSPECSRC) && 435 IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { 436 /* 437 * XXX: we can probably assume validation in the caller, but 438 * we explicitly check the address here for safety. 439 */ 440 error = EOPNOTSUPP; 441 ip6stat.ip6s_badscope++; 442 goto bad; 443 } 444 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { 445 error = EOPNOTSUPP; 446 ip6stat.ip6s_badscope++; 447 goto bad; 448 } 449 450 ip6stat.ip6s_localout++; 451 452 /* 453 * Route packet. 454 */ 455 /* initialize cached route */ 456 if (ro == 0) { 457 ro = &ip6route; 458 bzero((caddr_t)ro, sizeof(*ro)); 459 } 460 ro_pmtu = ro; 461 if (opt && opt->ip6po_rthdr) 462 ro = &opt->ip6po_route; 463 dst = (struct sockaddr_in6 *)&ro->ro_dst; 464 465 /* 466 * if specified, try to fill in the traffic class field. 467 * do not override if a non-zero value is already set. 468 * we check the diffserv field and the ecn field separately. 469 */ 470 if (opt && opt->ip6po_tclass >= 0) { 471 int mask = 0; 472 473 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) 474 mask |= 0xfc; 475 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) 476 mask |= 0x03; 477 if (mask != 0) 478 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); 479 } 480 481 /* fill in or override the hop limit field, if necessary. */ 482 if (opt && opt->ip6po_hlim != -1) 483 ip6->ip6_hlim = opt->ip6po_hlim & 0xff; 484 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 485 if (im6o != NULL) 486 ip6->ip6_hlim = im6o->im6o_multicast_hlim; 487 else 488 ip6->ip6_hlim = ip6_defmcasthlim; 489 } 490 491 #ifdef IPSEC 492 /* 493 * Check if the packet needs encapsulation. 494 * ipsp_process_packet will never come back to here. 495 */ 496 if (sproto != 0) { 497 s = splnet(); 498 499 /* 500 * XXX what should we do if ip6_hlim == 0 and the 501 * packet gets tunneled? 502 */ 503 504 tdb = gettdb(sspi, &sdst, sproto); 505 if (tdb == NULL) { 506 splx(s); 507 error = EHOSTUNREACH; 508 m_freem(m); 509 goto done; 510 } 511 512 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ 513 514 /* Callee frees mbuf */ 515 /* 516 * if we are source-routing, do not attempt to tunnel the 517 * packet just because ip6_dst is different from what tdb has. 518 * XXX 519 */ 520 error = ipsp_process_packet(m, tdb, AF_INET6, 521 exthdrs.ip6e_rthdr ? 1 : 0); 522 splx(s); 523 524 return error; /* Nothing more to be done */ 525 } 526 #endif /* IPSEC */ 527 528 bzero(&dstsock, sizeof(dstsock)); 529 dstsock.sin6_family = AF_INET6; 530 dstsock.sin6_addr = ip6->ip6_dst; 531 dstsock.sin6_len = sizeof(dstsock); 532 if ((error = in6_selectroute(&dstsock, opt, im6o, ro, &ifp, 533 &rt)) != 0) { 534 switch (error) { 535 case EHOSTUNREACH: 536 ip6stat.ip6s_noroute++; 537 break; 538 case EADDRNOTAVAIL: 539 default: 540 break; /* XXX statistics? */ 541 } 542 if (ifp != NULL) 543 in6_ifstat_inc(ifp, ifs6_out_discard); 544 goto bad; 545 } 546 if (rt == NULL) { 547 /* 548 * If in6_selectroute() does not return a route entry, 549 * dst may not have been updated. 550 */ 551 *dst = dstsock; /* XXX */ 552 } 553 554 /* 555 * then rt (for unicast) and ifp must be non-NULL valid values. 556 */ 557 if (rt) { 558 ia = (struct in6_ifaddr *)(rt->rt_ifa); 559 rt->rt_use++; 560 } 561 562 if ((flags & IPV6_FORWARDING) == 0) { 563 /* XXX: the FORWARDING flag can be set for mrouting. */ 564 in6_ifstat_inc(ifp, ifs6_out_request); 565 } 566 567 /* 568 * The outgoing interface must be in the zone of source and 569 * destination addresses. We should use ia_ifp to support the 570 * case of sending packets to an address of our own. 571 */ 572 if (ia != NULL && ia->ia_ifp) 573 origifp = ia->ia_ifp; 574 else 575 origifp = ifp; 576 577 if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 578 if (opt && opt->ip6po_nextroute.ro_rt) { 579 /* 580 * The nexthop is explicitly specified by the 581 * application. We assume the next hop is an IPv6 582 * address. 583 */ 584 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; 585 } else if ((rt->rt_flags & RTF_GATEWAY)) 586 dst = (struct sockaddr_in6 *)rt->rt_gateway; 587 } 588 589 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 590 /* Unicast */ 591 592 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ 593 } else { 594 /* Multicast */ 595 struct in6_multi *in6m; 596 597 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; 598 599 in6_ifstat_inc(ifp, ifs6_out_mcast); 600 601 /* 602 * Confirm that the outgoing interface supports multicast. 603 */ 604 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 605 ip6stat.ip6s_noroute++; 606 in6_ifstat_inc(ifp, ifs6_out_discard); 607 error = ENETUNREACH; 608 goto bad; 609 } 610 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); 611 if (in6m != NULL && 612 (im6o == NULL || im6o->im6o_multicast_loop)) { 613 /* 614 * If we belong to the destination multicast group 615 * on the outgoing interface, and the caller did not 616 * forbid loopback, loop back a copy. 617 */ 618 ip6_mloopback(ifp, m, dst); 619 } else { 620 /* 621 * If we are acting as a multicast router, perform 622 * multicast forwarding as if the packet had just 623 * arrived on the interface to which we are about 624 * to send. The multicast forwarding function 625 * recursively calls this function, using the 626 * IPV6_FORWARDING flag to prevent infinite recursion. 627 * 628 * Multicasts that are looped back by ip6_mloopback(), 629 * above, will be forwarded by the ip6_input() routine, 630 * if necessary. 631 */ 632 #ifdef MROUTING 633 if (ip6_mforwarding && ip6_mrouter && 634 (flags & IPV6_FORWARDING) == 0) { 635 if (ip6_mforward(ip6, ifp, m) != 0) { 636 m_freem(m); 637 goto done; 638 } 639 } 640 #endif 641 } 642 /* 643 * Multicasts with a hoplimit of zero may be looped back, 644 * above, but must not be transmitted on a network. 645 * Also, multicasts addressed to the loopback interface 646 * are not sent -- the above call to ip6_mloopback() will 647 * loop back a copy if this host actually belongs to the 648 * destination group on the loopback interface. 649 */ 650 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || 651 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { 652 m_freem(m); 653 goto done; 654 } 655 } 656 657 /* 658 * Fill the outgoing interface to tell the upper layer 659 * to increment per-interface statistics. 660 */ 661 if (ifpp) 662 *ifpp = ifp; 663 664 /* Determine path MTU. */ 665 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, 666 &alwaysfrag)) != 0) 667 goto bad; 668 669 /* 670 * The caller of this function may specify to use the minimum MTU 671 * in some cases. 672 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU 673 * setting. The logic is a bit complicated; by default, unicast 674 * packets will follow path MTU while multicast packets will be sent at 675 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets 676 * including unicast ones will be sent at the minimum MTU. Multicast 677 * packets will always be sent at the minimum MTU unless 678 * IP6PO_MINMTU_DISABLE is explicitly specified. 679 * See RFC 3542 for more details. 680 */ 681 if (mtu > IPV6_MMTU) { 682 if ((flags & IPV6_MINMTU)) 683 mtu = IPV6_MMTU; 684 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) 685 mtu = IPV6_MMTU; 686 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && 687 (opt == NULL || 688 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { 689 mtu = IPV6_MMTU; 690 } 691 } 692 693 /* Fake scoped addresses */ 694 if ((ifp->if_flags & IFF_LOOPBACK) != 0) { 695 /* 696 * If source or destination address is a scoped address, and 697 * the packet is going to be sent to a loopback interface, 698 * we should keep the original interface. 699 */ 700 701 /* 702 * XXX: this is a very experimental and temporary solution. 703 * We eventually have sockaddr_in6 and use the sin6_scope_id 704 * field of the structure here. 705 * We rely on the consistency between two scope zone ids 706 * of source add destination, which should already be assured 707 * Larger scopes than link will be supported in the near 708 * future. 709 */ 710 origifp = NULL; 711 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 712 origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])]; 713 else if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 714 origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])]; 715 /* 716 * XXX: origifp can be NULL even in those two cases above. 717 * For example, if we remove the (only) link-local address 718 * from the loopback interface, and try to send a link-local 719 * address without link-id information. Then the source 720 * address is ::1, and the destination address is the 721 * link-local address with its s6_addr16[1] being zero. 722 * What is worse, if the packet goes to the loopback interface 723 * by a default rejected route, the null pointer would be 724 * passed to looutput, and the kernel would hang. 725 * The following last resort would prevent such disaster. 726 */ 727 if (origifp == NULL) 728 origifp = ifp; 729 } else 730 origifp = ifp; 731 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 732 ip6->ip6_src.s6_addr16[1] = 0; 733 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 734 ip6->ip6_dst.s6_addr16[1] = 0; 735 736 /* 737 * If the outgoing packet contains a hop-by-hop options header, 738 * it must be examined and processed even by the source node. 739 * (RFC 2460, section 4.) 740 */ 741 if (exthdrs.ip6e_hbh) { 742 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); 743 u_int32_t dummy1; /* XXX unused */ 744 u_int32_t dummy2; /* XXX unused */ 745 746 /* 747 * XXX: if we have to send an ICMPv6 error to the sender, 748 * we need the M_LOOP flag since icmp6_error() expects 749 * the IPv6 and the hop-by-hop options header are 750 * continuous unless the flag is set. 751 */ 752 m->m_flags |= M_LOOP; 753 m->m_pkthdr.rcvif = ifp; 754 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), 755 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), 756 &dummy1, &dummy2) < 0) { 757 /* m was already freed at this point */ 758 error = EINVAL;/* better error? */ 759 goto done; 760 } 761 m->m_flags &= ~M_LOOP; /* XXX */ 762 m->m_pkthdr.rcvif = NULL; 763 } 764 765 #if NPF > 0 766 if (pf_test6(PF_OUT, ifp, &m, NULL) != PF_PASS) { 767 error = EHOSTUNREACH; 768 m_freem(m); 769 goto done; 770 } 771 if (m == NULL) 772 goto done; 773 ip6 = mtod(m, struct ip6_hdr *); 774 #endif 775 776 /* 777 * Send the packet to the outgoing interface. 778 * If necessary, do IPv6 fragmentation before sending. 779 * 780 * the logic here is rather complex: 781 * 1: normal case (dontfrag == 0, alwaysfrag == 0) 782 * 1-a: send as is if tlen <= path mtu 783 * 1-b: fragment if tlen > path mtu 784 * 785 * 2: if user asks us not to fragment (dontfrag == 1) 786 * 2-a: send as is if tlen <= interface mtu 787 * 2-b: error if tlen > interface mtu 788 * 789 * 3: if we always need to attach fragment header (alwaysfrag == 1) 790 * always fragment 791 * 792 * 4: if dontfrag == 1 && alwaysfrag == 1 793 * error, as we cannot handle this conflicting request 794 */ 795 tlen = m->m_pkthdr.len; 796 797 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) 798 dontfrag = 1; 799 else 800 dontfrag = 0; 801 if (dontfrag && alwaysfrag) { /* case 4 */ 802 /* conflicting request - can't transmit */ 803 error = EMSGSIZE; 804 goto bad; 805 } 806 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */ 807 /* 808 * Even if the DONTFRAG option is specified, we cannot send the 809 * packet when the data length is larger than the MTU of the 810 * outgoing interface. 811 * Notify the error by sending IPV6_PATHMTU ancillary data as 812 * well as returning an error code (the latter is not described 813 * in the API spec.) 814 */ 815 #if 0 816 u_int32_t mtu32; 817 struct ip6ctlparam ip6cp; 818 819 mtu32 = (u_int32_t)mtu; 820 bzero(&ip6cp, sizeof(ip6cp)); 821 ip6cp.ip6c_cmdarg = (void *)&mtu32; 822 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst, 823 (void *)&ip6cp); 824 #endif 825 826 error = EMSGSIZE; 827 goto bad; 828 } 829 830 /* 831 * transmit packet without fragmentation 832 */ 833 if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */ 834 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); 835 goto done; 836 } 837 838 /* 839 * try to fragment the packet. case 1-b and 3 840 */ 841 if (mtu < IPV6_MMTU) { 842 /* path MTU cannot be less than IPV6_MMTU */ 843 error = EMSGSIZE; 844 in6_ifstat_inc(ifp, ifs6_out_fragfail); 845 goto bad; 846 } else if (ip6->ip6_plen == 0) { 847 /* jumbo payload cannot be fragmented */ 848 error = EMSGSIZE; 849 in6_ifstat_inc(ifp, ifs6_out_fragfail); 850 goto bad; 851 } else { 852 struct mbuf **mnext, *m_frgpart; 853 struct ip6_frag *ip6f; 854 u_int32_t id = htonl(ip6_randomid()); 855 u_char nextproto; 856 #if 0 857 struct ip6ctlparam ip6cp; 858 u_int32_t mtu32; 859 #endif 860 861 /* 862 * Too large for the destination or interface; 863 * fragment if possible. 864 * Must be able to put at least 8 bytes per fragment. 865 */ 866 hlen = unfragpartlen; 867 if (mtu > IPV6_MAXPACKET) 868 mtu = IPV6_MAXPACKET; 869 870 #if 0 871 /* Notify a proper path MTU to applications. */ 872 mtu32 = (u_int32_t)mtu; 873 bzero(&ip6cp, sizeof(ip6cp)); 874 ip6cp.ip6c_cmdarg = (void *)&mtu32; 875 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst, 876 (void *)&ip6cp); 877 #endif 878 879 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; 880 if (len < 8) { 881 error = EMSGSIZE; 882 in6_ifstat_inc(ifp, ifs6_out_fragfail); 883 goto bad; 884 } 885 886 mnext = &m->m_nextpkt; 887 888 /* 889 * Change the next header field of the last header in the 890 * unfragmentable part. 891 */ 892 if (exthdrs.ip6e_rthdr) { 893 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); 894 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; 895 } else if (exthdrs.ip6e_dest1) { 896 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); 897 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; 898 } else if (exthdrs.ip6e_hbh) { 899 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); 900 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; 901 } else { 902 nextproto = ip6->ip6_nxt; 903 ip6->ip6_nxt = IPPROTO_FRAGMENT; 904 } 905 906 /* 907 * Loop through length of segment after first fragment, 908 * make new header and copy data of each part and link onto 909 * chain. 910 */ 911 m0 = m; 912 for (off = hlen; off < tlen; off += len) { 913 struct mbuf *mlast; 914 915 MGETHDR(m, M_DONTWAIT, MT_HEADER); 916 if (!m) { 917 error = ENOBUFS; 918 ip6stat.ip6s_odropped++; 919 goto sendorfree; 920 } 921 m->m_pkthdr.rcvif = NULL; 922 m->m_flags = m0->m_flags & M_COPYFLAGS; 923 *mnext = m; 924 mnext = &m->m_nextpkt; 925 m->m_data += max_linkhdr; 926 mhip6 = mtod(m, struct ip6_hdr *); 927 *mhip6 = *ip6; 928 m->m_len = sizeof(*mhip6); 929 error = ip6_insertfraghdr(m0, m, hlen, &ip6f); 930 if (error) { 931 ip6stat.ip6s_odropped++; 932 goto sendorfree; 933 } 934 ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7)); 935 if (off + len >= tlen) 936 len = tlen - off; 937 else 938 ip6f->ip6f_offlg |= IP6F_MORE_FRAG; 939 mhip6->ip6_plen = htons((u_int16_t)(len + hlen + 940 sizeof(*ip6f) - sizeof(struct ip6_hdr))); 941 if ((m_frgpart = m_copy(m0, off, len)) == 0) { 942 error = ENOBUFS; 943 ip6stat.ip6s_odropped++; 944 goto sendorfree; 945 } 946 for (mlast = m; mlast->m_next; mlast = mlast->m_next) 947 ; 948 mlast->m_next = m_frgpart; 949 m->m_pkthdr.len = len + hlen + sizeof(*ip6f); 950 m->m_pkthdr.rcvif = (struct ifnet *)0; 951 ip6f->ip6f_reserved = 0; 952 ip6f->ip6f_ident = id; 953 ip6f->ip6f_nxt = nextproto; 954 ip6stat.ip6s_ofragments++; 955 in6_ifstat_inc(ifp, ifs6_out_fragcreat); 956 } 957 958 in6_ifstat_inc(ifp, ifs6_out_fragok); 959 } 960 961 /* 962 * Remove leading garbages. 963 */ 964 sendorfree: 965 m = m0->m_nextpkt; 966 m0->m_nextpkt = 0; 967 m_freem(m0); 968 for (m0 = m; m; m = m0) { 969 m0 = m->m_nextpkt; 970 m->m_nextpkt = 0; 971 if (error == 0) { 972 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); 973 } else 974 m_freem(m); 975 } 976 977 if (error == 0) 978 ip6stat.ip6s_fragmented++; 979 980 done: 981 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */ 982 RTFREE(ro->ro_rt); 983 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { 984 RTFREE(ro_pmtu->ro_rt); 985 } 986 987 return (error); 988 989 freehdrs: 990 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ 991 m_freem(exthdrs.ip6e_dest1); 992 m_freem(exthdrs.ip6e_rthdr); 993 m_freem(exthdrs.ip6e_dest2); 994 /* FALLTHROUGH */ 995 bad: 996 m_freem(m); 997 goto done; 998 } 999 1000 static int 1001 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) 1002 { 1003 struct mbuf *m; 1004 1005 if (hlen > MCLBYTES) 1006 return (ENOBUFS); /* XXX */ 1007 1008 MGET(m, M_DONTWAIT, MT_DATA); 1009 if (!m) 1010 return (ENOBUFS); 1011 1012 if (hlen > MLEN) { 1013 MCLGET(m, M_DONTWAIT); 1014 if ((m->m_flags & M_EXT) == 0) { 1015 m_free(m); 1016 return (ENOBUFS); 1017 } 1018 } 1019 m->m_len = hlen; 1020 if (hdr) 1021 bcopy(hdr, mtod(m, caddr_t), hlen); 1022 1023 *mp = m; 1024 return (0); 1025 } 1026 1027 /* 1028 * Insert jumbo payload option. 1029 */ 1030 static int 1031 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) 1032 { 1033 struct mbuf *mopt; 1034 u_int8_t *optbuf; 1035 u_int32_t v; 1036 1037 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ 1038 1039 /* 1040 * If there is no hop-by-hop options header, allocate new one. 1041 * If there is one but it doesn't have enough space to store the 1042 * jumbo payload option, allocate a cluster to store the whole options. 1043 * Otherwise, use it to store the options. 1044 */ 1045 if (exthdrs->ip6e_hbh == 0) { 1046 MGET(mopt, M_DONTWAIT, MT_DATA); 1047 if (mopt == 0) 1048 return (ENOBUFS); 1049 mopt->m_len = JUMBOOPTLEN; 1050 optbuf = mtod(mopt, u_int8_t *); 1051 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ 1052 exthdrs->ip6e_hbh = mopt; 1053 } else { 1054 struct ip6_hbh *hbh; 1055 1056 mopt = exthdrs->ip6e_hbh; 1057 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { 1058 /* 1059 * XXX assumption: 1060 * - exthdrs->ip6e_hbh is not referenced from places 1061 * other than exthdrs. 1062 * - exthdrs->ip6e_hbh is not an mbuf chain. 1063 */ 1064 int oldoptlen = mopt->m_len; 1065 struct mbuf *n; 1066 1067 /* 1068 * XXX: give up if the whole (new) hbh header does 1069 * not fit even in an mbuf cluster. 1070 */ 1071 if (oldoptlen + JUMBOOPTLEN > MCLBYTES) 1072 return (ENOBUFS); 1073 1074 /* 1075 * As a consequence, we must always prepare a cluster 1076 * at this point. 1077 */ 1078 MGET(n, M_DONTWAIT, MT_DATA); 1079 if (n) { 1080 MCLGET(n, M_DONTWAIT); 1081 if ((n->m_flags & M_EXT) == 0) { 1082 m_freem(n); 1083 n = NULL; 1084 } 1085 } 1086 if (!n) 1087 return (ENOBUFS); 1088 n->m_len = oldoptlen + JUMBOOPTLEN; 1089 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), 1090 oldoptlen); 1091 optbuf = mtod(n, u_int8_t *) + oldoptlen; 1092 m_freem(mopt); 1093 mopt = exthdrs->ip6e_hbh = n; 1094 } else { 1095 optbuf = mtod(mopt, u_int8_t *) + mopt->m_len; 1096 mopt->m_len += JUMBOOPTLEN; 1097 } 1098 optbuf[0] = IP6OPT_PADN; 1099 optbuf[1] = 0; 1100 1101 /* 1102 * Adjust the header length according to the pad and 1103 * the jumbo payload option. 1104 */ 1105 hbh = mtod(mopt, struct ip6_hbh *); 1106 hbh->ip6h_len += (JUMBOOPTLEN >> 3); 1107 } 1108 1109 /* fill in the option. */ 1110 optbuf[2] = IP6OPT_JUMBO; 1111 optbuf[3] = 4; 1112 v = (u_int32_t)htonl(plen + JUMBOOPTLEN); 1113 bcopy(&v, &optbuf[4], sizeof(u_int32_t)); 1114 1115 /* finally, adjust the packet header length */ 1116 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; 1117 1118 return (0); 1119 #undef JUMBOOPTLEN 1120 } 1121 1122 /* 1123 * Insert fragment header and copy unfragmentable header portions. 1124 */ 1125 static int 1126 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, 1127 struct ip6_frag **frghdrp) 1128 { 1129 struct mbuf *n, *mlast; 1130 1131 if (hlen > sizeof(struct ip6_hdr)) { 1132 n = m_copym(m0, sizeof(struct ip6_hdr), 1133 hlen - sizeof(struct ip6_hdr), M_DONTWAIT); 1134 if (n == 0) 1135 return (ENOBUFS); 1136 m->m_next = n; 1137 } else 1138 n = m; 1139 1140 /* Search for the last mbuf of unfragmentable part. */ 1141 for (mlast = n; mlast->m_next; mlast = mlast->m_next) 1142 ; 1143 1144 if ((mlast->m_flags & M_EXT) == 0 && 1145 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { 1146 /* use the trailing space of the last mbuf for the fragment hdr */ 1147 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + 1148 mlast->m_len); 1149 mlast->m_len += sizeof(struct ip6_frag); 1150 m->m_pkthdr.len += sizeof(struct ip6_frag); 1151 } else { 1152 /* allocate a new mbuf for the fragment header */ 1153 struct mbuf *mfrg; 1154 1155 MGET(mfrg, M_DONTWAIT, MT_DATA); 1156 if (mfrg == 0) 1157 return (ENOBUFS); 1158 mfrg->m_len = sizeof(struct ip6_frag); 1159 *frghdrp = mtod(mfrg, struct ip6_frag *); 1160 mlast->m_next = mfrg; 1161 } 1162 1163 return (0); 1164 } 1165 1166 static int 1167 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, 1168 struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, 1169 int *alwaysfragp) 1170 { 1171 u_int32_t mtu = 0; 1172 int alwaysfrag = 0; 1173 int error = 0; 1174 1175 if (ro_pmtu != ro) { 1176 /* The first hop and the final destination may differ. */ 1177 struct sockaddr_in6 *sa6_dst = 1178 (struct sockaddr_in6 *)&ro_pmtu->ro_dst; 1179 if (ro_pmtu->ro_rt && 1180 ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 || 1181 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { 1182 RTFREE(ro_pmtu->ro_rt); 1183 ro_pmtu->ro_rt = (struct rtentry *)NULL; 1184 } 1185 if (ro_pmtu->ro_rt == 0) { 1186 bzero(sa6_dst, sizeof(*sa6_dst)); 1187 sa6_dst->sin6_family = AF_INET6; 1188 sa6_dst->sin6_len = sizeof(struct sockaddr_in6); 1189 sa6_dst->sin6_addr = *dst; 1190 1191 rtalloc((struct route *)ro_pmtu); 1192 } 1193 } 1194 if (ro_pmtu->ro_rt) { 1195 u_int32_t ifmtu; 1196 1197 if (ifp == NULL) 1198 ifp = ro_pmtu->ro_rt->rt_ifp; 1199 ifmtu = IN6_LINKMTU(ifp); 1200 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; 1201 if (mtu == 0) 1202 mtu = ifmtu; 1203 else if (mtu < IPV6_MMTU) { 1204 /* 1205 * RFC2460 section 5, last paragraph: 1206 * if we record ICMPv6 too big message with 1207 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU 1208 * or smaller, with fragment header attached. 1209 * (fragment header is needed regardless from the 1210 * packet size, for translators to identify packets) 1211 */ 1212 alwaysfrag = 1; 1213 mtu = IPV6_MMTU; 1214 } else if (mtu > ifmtu) { 1215 /* 1216 * The MTU on the route is larger than the MTU on 1217 * the interface! This shouldn't happen, unless the 1218 * MTU of the interface has been changed after the 1219 * interface was brought up. Change the MTU in the 1220 * route to match the interface MTU (as long as the 1221 * field isn't locked). 1222 */ 1223 mtu = ifmtu; 1224 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) 1225 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; 1226 } 1227 } else if (ifp) { 1228 mtu = IN6_LINKMTU(ifp); 1229 } else 1230 error = EHOSTUNREACH; /* XXX */ 1231 1232 *mtup = mtu; 1233 if (alwaysfragp) 1234 *alwaysfragp = alwaysfrag; 1235 return (error); 1236 } 1237 1238 /* 1239 * IP6 socket option processing. 1240 */ 1241 int 1242 ip6_ctloutput(int op, struct socket *so, int level, int optname, 1243 struct mbuf **mp) 1244 { 1245 int privileged, optdatalen, uproto; 1246 void *optdata; 1247 struct inpcb *inp = sotoinpcb(so); 1248 struct mbuf *m = *mp; 1249 int error, optval; 1250 int optlen; 1251 #ifdef IPSEC 1252 struct proc *p = curproc; /* XXX */ 1253 struct tdb *tdb; 1254 struct tdb_ident *tdbip, tdbi; 1255 int s; 1256 #endif 1257 1258 optlen = m ? m->m_len : 0; 1259 error = optval = 0; 1260 1261 privileged = (inp->inp_socket->so_state & SS_PRIV); 1262 uproto = (int)so->so_proto->pr_protocol; 1263 1264 if (level == IPPROTO_IPV6) { 1265 switch (op) { 1266 case PRCO_SETOPT: 1267 switch (optname) { 1268 case IPV6_2292PKTOPTIONS: 1269 { 1270 error = ip6_pcbopts(&inp->inp_outputopts6, 1271 m, so); 1272 break; 1273 } 1274 1275 /* 1276 * Use of some Hop-by-Hop options or some 1277 * Destination options, might require special 1278 * privilege. That is, normal applications 1279 * (without special privilege) might be forbidden 1280 * from setting certain options in outgoing packets, 1281 * and might never see certain options in received 1282 * packets. [RFC 2292 Section 6] 1283 * KAME specific note: 1284 * KAME prevents non-privileged users from sending or 1285 * receiving ANY hbh/dst options in order to avoid 1286 * overhead of parsing options in the kernel. 1287 */ 1288 case IPV6_RECVHOPOPTS: 1289 case IPV6_RECVDSTOPTS: 1290 case IPV6_RECVRTHDRDSTOPTS: 1291 if (!privileged) { 1292 error = EPERM; 1293 break; 1294 } 1295 /* FALLTHROUGH */ 1296 case IPV6_UNICAST_HOPS: 1297 case IPV6_HOPLIMIT: 1298 case IPV6_FAITH: 1299 1300 case IPV6_RECVPKTINFO: 1301 case IPV6_RECVHOPLIMIT: 1302 case IPV6_RECVRTHDR: 1303 case IPV6_RECVPATHMTU: 1304 case IPV6_RECVTCLASS: 1305 case IPV6_V6ONLY: 1306 case IPV6_AUTOFLOWLABEL: 1307 if (optlen != sizeof(int)) { 1308 error = EINVAL; 1309 break; 1310 } 1311 optval = *mtod(m, int *); 1312 switch (optname) { 1313 1314 case IPV6_UNICAST_HOPS: 1315 if (optval < -1 || optval >= 256) 1316 error = EINVAL; 1317 else { 1318 /* -1 = kernel default */ 1319 inp->inp_hops = optval; 1320 } 1321 break; 1322 #define OPTSET(bit) \ 1323 do { \ 1324 if (optval) \ 1325 inp->inp_flags |= (bit); \ 1326 else \ 1327 inp->inp_flags &= ~(bit); \ 1328 } while (/*CONSTCOND*/ 0) 1329 #define OPTSET2292(bit) \ 1330 do { \ 1331 inp->inp_flags |= IN6P_RFC2292; \ 1332 if (optval) \ 1333 inp->inp_flags |= (bit); \ 1334 else \ 1335 inp->inp_flags &= ~(bit); \ 1336 } while (/*CONSTCOND*/ 0) 1337 #define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0) 1338 1339 case IPV6_RECVPKTINFO: 1340 /* cannot mix with RFC2292 */ 1341 if (OPTBIT(IN6P_RFC2292)) { 1342 error = EINVAL; 1343 break; 1344 } 1345 OPTSET(IN6P_PKTINFO); 1346 break; 1347 1348 case IPV6_HOPLIMIT: 1349 { 1350 struct ip6_pktopts **optp; 1351 1352 /* cannot mix with RFC2292 */ 1353 if (OPTBIT(IN6P_RFC2292)) { 1354 error = EINVAL; 1355 break; 1356 } 1357 optp = &inp->inp_outputopts6; 1358 error = ip6_pcbopt(IPV6_HOPLIMIT, 1359 (u_char *)&optval, 1360 sizeof(optval), 1361 optp, 1362 privileged, uproto); 1363 break; 1364 } 1365 1366 case IPV6_RECVHOPLIMIT: 1367 /* cannot mix with RFC2292 */ 1368 if (OPTBIT(IN6P_RFC2292)) { 1369 error = EINVAL; 1370 break; 1371 } 1372 OPTSET(IN6P_HOPLIMIT); 1373 break; 1374 1375 case IPV6_RECVHOPOPTS: 1376 /* cannot mix with RFC2292 */ 1377 if (OPTBIT(IN6P_RFC2292)) { 1378 error = EINVAL; 1379 break; 1380 } 1381 OPTSET(IN6P_HOPOPTS); 1382 break; 1383 1384 case IPV6_RECVDSTOPTS: 1385 /* cannot mix with RFC2292 */ 1386 if (OPTBIT(IN6P_RFC2292)) { 1387 error = EINVAL; 1388 break; 1389 } 1390 OPTSET(IN6P_DSTOPTS); 1391 break; 1392 1393 case IPV6_RECVRTHDRDSTOPTS: 1394 /* cannot mix with RFC2292 */ 1395 if (OPTBIT(IN6P_RFC2292)) { 1396 error = EINVAL; 1397 break; 1398 } 1399 OPTSET(IN6P_RTHDRDSTOPTS); 1400 break; 1401 1402 case IPV6_RECVRTHDR: 1403 /* cannot mix with RFC2292 */ 1404 if (OPTBIT(IN6P_RFC2292)) { 1405 error = EINVAL; 1406 break; 1407 } 1408 OPTSET(IN6P_RTHDR); 1409 break; 1410 1411 case IPV6_FAITH: 1412 OPTSET(IN6P_FAITH); 1413 break; 1414 1415 case IPV6_RECVPATHMTU: 1416 /* 1417 * We ignore this option for TCP 1418 * sockets. 1419 * (RFC3542 leaves this case 1420 * unspecified.) 1421 */ 1422 if (uproto != IPPROTO_TCP) 1423 OPTSET(IN6P_MTU); 1424 break; 1425 1426 case IPV6_V6ONLY: 1427 /* 1428 * make setsockopt(IPV6_V6ONLY) 1429 * available only prior to bind(2). 1430 * see ipng mailing list, Jun 22 2001. 1431 */ 1432 if (inp->inp_lport || 1433 !IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) { 1434 error = EINVAL; 1435 break; 1436 } 1437 if ((ip6_v6only && optval) || 1438 (!ip6_v6only && !optval)) 1439 error = 0; 1440 else 1441 error = EINVAL; 1442 break; 1443 case IPV6_RECVTCLASS: 1444 /* cannot mix with RFC2292 XXX */ 1445 if (OPTBIT(IN6P_RFC2292)) { 1446 error = EINVAL; 1447 break; 1448 } 1449 OPTSET(IN6P_TCLASS); 1450 break; 1451 case IPV6_AUTOFLOWLABEL: 1452 OPTSET(IN6P_AUTOFLOWLABEL); 1453 break; 1454 1455 } 1456 break; 1457 1458 case IPV6_TCLASS: 1459 case IPV6_DONTFRAG: 1460 case IPV6_USE_MIN_MTU: 1461 if (optlen != sizeof(optval)) { 1462 error = EINVAL; 1463 break; 1464 } 1465 optval = *mtod(m, int *); 1466 { 1467 struct ip6_pktopts **optp; 1468 optp = &inp->inp_outputopts6; 1469 error = ip6_pcbopt(optname, 1470 (u_char *)&optval, 1471 sizeof(optval), 1472 optp, 1473 privileged, uproto); 1474 break; 1475 } 1476 1477 case IPV6_2292PKTINFO: 1478 case IPV6_2292HOPLIMIT: 1479 case IPV6_2292HOPOPTS: 1480 case IPV6_2292DSTOPTS: 1481 case IPV6_2292RTHDR: 1482 /* RFC 2292 */ 1483 if (optlen != sizeof(int)) { 1484 error = EINVAL; 1485 break; 1486 } 1487 optval = *mtod(m, int *); 1488 switch (optname) { 1489 case IPV6_2292PKTINFO: 1490 OPTSET2292(IN6P_PKTINFO); 1491 break; 1492 case IPV6_2292HOPLIMIT: 1493 OPTSET2292(IN6P_HOPLIMIT); 1494 break; 1495 case IPV6_2292HOPOPTS: 1496 /* 1497 * Check super-user privilege. 1498 * See comments for IPV6_RECVHOPOPTS. 1499 */ 1500 if (!privileged) 1501 return (EPERM); 1502 OPTSET2292(IN6P_HOPOPTS); 1503 break; 1504 case IPV6_2292DSTOPTS: 1505 if (!privileged) 1506 return (EPERM); 1507 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ 1508 break; 1509 case IPV6_2292RTHDR: 1510 OPTSET2292(IN6P_RTHDR); 1511 break; 1512 } 1513 break; 1514 case IPV6_PKTINFO: 1515 case IPV6_HOPOPTS: 1516 case IPV6_RTHDR: 1517 case IPV6_DSTOPTS: 1518 case IPV6_RTHDRDSTOPTS: 1519 case IPV6_NEXTHOP: 1520 { 1521 /* new advanced API (RFC3542) */ 1522 u_char *optbuf; 1523 int optbuflen; 1524 struct ip6_pktopts **optp; 1525 1526 /* cannot mix with RFC2292 */ 1527 if (OPTBIT(IN6P_RFC2292)) { 1528 error = EINVAL; 1529 break; 1530 } 1531 1532 if (m && m->m_next) { 1533 error = EINVAL; /* XXX */ 1534 break; 1535 } 1536 if (m) { 1537 optbuf = mtod(m, u_char *); 1538 optbuflen = m->m_len; 1539 } else { 1540 optbuf = NULL; 1541 optbuflen = 0; 1542 } 1543 optp = &inp->inp_outputopts6; 1544 error = ip6_pcbopt(optname, 1545 optbuf, optbuflen, 1546 optp, privileged, uproto); 1547 break; 1548 } 1549 #undef OPTSET 1550 1551 case IPV6_MULTICAST_IF: 1552 case IPV6_MULTICAST_HOPS: 1553 case IPV6_MULTICAST_LOOP: 1554 case IPV6_JOIN_GROUP: 1555 case IPV6_LEAVE_GROUP: 1556 error = ip6_setmoptions(optname, 1557 &inp->inp_moptions6, 1558 m); 1559 break; 1560 1561 case IPV6_PORTRANGE: 1562 optval = *mtod(m, int *); 1563 1564 switch (optval) { 1565 case IPV6_PORTRANGE_DEFAULT: 1566 inp->inp_flags &= ~(IN6P_LOWPORT); 1567 inp->inp_flags &= ~(IN6P_HIGHPORT); 1568 break; 1569 1570 case IPV6_PORTRANGE_HIGH: 1571 inp->inp_flags &= ~(IN6P_LOWPORT); 1572 inp->inp_flags |= IN6P_HIGHPORT; 1573 break; 1574 1575 case IPV6_PORTRANGE_LOW: 1576 inp->inp_flags &= ~(IN6P_HIGHPORT); 1577 inp->inp_flags |= IN6P_LOWPORT; 1578 break; 1579 1580 default: 1581 error = EINVAL; 1582 break; 1583 } 1584 break; 1585 1586 case IPSEC6_OUTSA: 1587 #ifndef IPSEC 1588 error = EINVAL; 1589 #else 1590 s = spltdb(); 1591 if (m == 0 || m->m_len != sizeof(struct tdb_ident)) { 1592 error = EINVAL; 1593 } else { 1594 tdbip = mtod(m, struct tdb_ident *); 1595 tdb = gettdb(tdbip->spi, &tdbip->dst, 1596 tdbip->proto); 1597 if (tdb == NULL) 1598 error = ESRCH; 1599 else 1600 tdb_add_inp(tdb, inp, 0); 1601 } 1602 splx(s); 1603 #endif 1604 break; 1605 1606 case IPV6_AUTH_LEVEL: 1607 case IPV6_ESP_TRANS_LEVEL: 1608 case IPV6_ESP_NETWORK_LEVEL: 1609 case IPV6_IPCOMP_LEVEL: 1610 #ifndef IPSEC 1611 error = EINVAL; 1612 #else 1613 if (m == 0 || m->m_len != sizeof(int)) { 1614 error = EINVAL; 1615 break; 1616 } 1617 optval = *mtod(m, int *); 1618 1619 if (optval < IPSEC_LEVEL_BYPASS || 1620 optval > IPSEC_LEVEL_UNIQUE) { 1621 error = EINVAL; 1622 break; 1623 } 1624 1625 switch (optname) { 1626 case IPV6_AUTH_LEVEL: 1627 if (optval < ipsec_auth_default_level && 1628 suser(p, 0)) { 1629 error = EACCES; 1630 break; 1631 } 1632 inp->inp_seclevel[SL_AUTH] = optval; 1633 break; 1634 1635 case IPV6_ESP_TRANS_LEVEL: 1636 if (optval < ipsec_esp_trans_default_level && 1637 suser(p, 0)) { 1638 error = EACCES; 1639 break; 1640 } 1641 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1642 break; 1643 1644 case IPV6_ESP_NETWORK_LEVEL: 1645 if (optval < ipsec_esp_network_default_level && 1646 suser(p, 0)) { 1647 error = EACCES; 1648 break; 1649 } 1650 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1651 break; 1652 1653 case IPV6_IPCOMP_LEVEL: 1654 if (optval < ipsec_ipcomp_default_level && 1655 suser(p, 0)) { 1656 error = EACCES; 1657 break; 1658 } 1659 inp->inp_seclevel[SL_IPCOMP] = optval; 1660 break; 1661 } 1662 if (!error) 1663 inp->inp_secrequire = get_sa_require(inp); 1664 #endif 1665 break; 1666 1667 default: 1668 error = ENOPROTOOPT; 1669 break; 1670 } 1671 if (m) 1672 (void)m_free(m); 1673 break; 1674 1675 case PRCO_GETOPT: 1676 switch (optname) { 1677 1678 case IPV6_2292PKTOPTIONS: 1679 /* 1680 * RFC3542 (effectively) deprecated the 1681 * semantics of the 2292-style pktoptions. 1682 * Since it was not reliable in nature (i.e., 1683 * applications had to expect the lack of some 1684 * information after all), it would make sense 1685 * to simplify this part by always returning 1686 * empty data. 1687 */ 1688 *mp = m_get(M_WAIT, MT_SOOPTS); 1689 (*mp)->m_len = 0; 1690 break; 1691 1692 case IPV6_RECVHOPOPTS: 1693 case IPV6_RECVDSTOPTS: 1694 case IPV6_RECVRTHDRDSTOPTS: 1695 case IPV6_UNICAST_HOPS: 1696 case IPV6_RECVPKTINFO: 1697 case IPV6_RECVHOPLIMIT: 1698 case IPV6_RECVRTHDR: 1699 case IPV6_RECVPATHMTU: 1700 1701 case IPV6_FAITH: 1702 case IPV6_V6ONLY: 1703 case IPV6_PORTRANGE: 1704 case IPV6_RECVTCLASS: 1705 case IPV6_AUTOFLOWLABEL: 1706 switch (optname) { 1707 1708 case IPV6_RECVHOPOPTS: 1709 optval = OPTBIT(IN6P_HOPOPTS); 1710 break; 1711 1712 case IPV6_RECVDSTOPTS: 1713 optval = OPTBIT(IN6P_DSTOPTS); 1714 break; 1715 1716 case IPV6_RECVRTHDRDSTOPTS: 1717 optval = OPTBIT(IN6P_RTHDRDSTOPTS); 1718 break; 1719 1720 case IPV6_UNICAST_HOPS: 1721 optval = inp->inp_hops; 1722 break; 1723 1724 case IPV6_RECVPKTINFO: 1725 optval = OPTBIT(IN6P_PKTINFO); 1726 break; 1727 1728 case IPV6_RECVHOPLIMIT: 1729 optval = OPTBIT(IN6P_HOPLIMIT); 1730 break; 1731 1732 case IPV6_RECVRTHDR: 1733 optval = OPTBIT(IN6P_RTHDR); 1734 break; 1735 1736 case IPV6_RECVPATHMTU: 1737 optval = OPTBIT(IN6P_MTU); 1738 break; 1739 1740 case IPV6_FAITH: 1741 optval = OPTBIT(IN6P_FAITH); 1742 break; 1743 1744 case IPV6_V6ONLY: 1745 optval = (ip6_v6only != 0); /* XXX */ 1746 break; 1747 1748 case IPV6_PORTRANGE: 1749 { 1750 int flags; 1751 flags = inp->inp_flags; 1752 if (flags & IN6P_HIGHPORT) 1753 optval = IPV6_PORTRANGE_HIGH; 1754 else if (flags & IN6P_LOWPORT) 1755 optval = IPV6_PORTRANGE_LOW; 1756 else 1757 optval = 0; 1758 break; 1759 } 1760 case IPV6_RECVTCLASS: 1761 optval = OPTBIT(IN6P_TCLASS); 1762 break; 1763 1764 case IPV6_AUTOFLOWLABEL: 1765 optval = OPTBIT(IN6P_AUTOFLOWLABEL); 1766 break; 1767 } 1768 if (error) 1769 break; 1770 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1771 m->m_len = sizeof(int); 1772 *mtod(m, int *) = optval; 1773 break; 1774 1775 case IPV6_PATHMTU: 1776 { 1777 u_long pmtu = 0; 1778 struct ip6_mtuinfo mtuinfo; 1779 struct route_in6 *ro = (struct route_in6 *)&inp->inp_route6; 1780 1781 if (!(so->so_state & SS_ISCONNECTED)) 1782 return (ENOTCONN); 1783 /* 1784 * XXX: we dot not consider the case of source 1785 * routing, or optional information to specify 1786 * the outgoing interface. 1787 */ 1788 error = ip6_getpmtu(ro, NULL, NULL, 1789 &inp->inp_faddr6, &pmtu, NULL); 1790 if (error) 1791 break; 1792 if (pmtu > IPV6_MAXPACKET) 1793 pmtu = IPV6_MAXPACKET; 1794 1795 bzero(&mtuinfo, sizeof(mtuinfo)); 1796 mtuinfo.ip6m_mtu = (u_int32_t)pmtu; 1797 optdata = (void *)&mtuinfo; 1798 optdatalen = sizeof(mtuinfo); 1799 if (optdatalen > MCLBYTES) 1800 return (EMSGSIZE); /* XXX */ 1801 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1802 if (optdatalen > MLEN) 1803 MCLGET(m, M_WAIT); 1804 m->m_len = optdatalen; 1805 bcopy(optdata, mtod(m, void *), optdatalen); 1806 break; 1807 } 1808 1809 case IPV6_2292PKTINFO: 1810 case IPV6_2292HOPLIMIT: 1811 case IPV6_2292HOPOPTS: 1812 case IPV6_2292RTHDR: 1813 case IPV6_2292DSTOPTS: 1814 switch (optname) { 1815 case IPV6_2292PKTINFO: 1816 optval = OPTBIT(IN6P_PKTINFO); 1817 break; 1818 case IPV6_2292HOPLIMIT: 1819 optval = OPTBIT(IN6P_HOPLIMIT); 1820 break; 1821 case IPV6_2292HOPOPTS: 1822 optval = OPTBIT(IN6P_HOPOPTS); 1823 break; 1824 case IPV6_2292RTHDR: 1825 optval = OPTBIT(IN6P_RTHDR); 1826 break; 1827 case IPV6_2292DSTOPTS: 1828 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); 1829 break; 1830 } 1831 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1832 m->m_len = sizeof(int); 1833 *mtod(m, int *) = optval; 1834 break; 1835 case IPV6_PKTINFO: 1836 case IPV6_HOPOPTS: 1837 case IPV6_RTHDR: 1838 case IPV6_DSTOPTS: 1839 case IPV6_RTHDRDSTOPTS: 1840 case IPV6_NEXTHOP: 1841 case IPV6_TCLASS: 1842 case IPV6_DONTFRAG: 1843 case IPV6_USE_MIN_MTU: 1844 error = ip6_getpcbopt(inp->inp_outputopts6, 1845 optname, mp); 1846 break; 1847 1848 case IPV6_MULTICAST_IF: 1849 case IPV6_MULTICAST_HOPS: 1850 case IPV6_MULTICAST_LOOP: 1851 case IPV6_JOIN_GROUP: 1852 case IPV6_LEAVE_GROUP: 1853 error = ip6_getmoptions(optname, 1854 inp->inp_moptions6, mp); 1855 break; 1856 1857 case IPSEC6_OUTSA: 1858 #ifndef IPSEC 1859 error = EINVAL; 1860 #else 1861 s = spltdb(); 1862 if (inp->inp_tdb_out == NULL) { 1863 error = ENOENT; 1864 } else { 1865 tdbi.spi = inp->inp_tdb_out->tdb_spi; 1866 tdbi.dst = inp->inp_tdb_out->tdb_dst; 1867 tdbi.proto = inp->inp_tdb_out->tdb_sproto; 1868 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1869 m->m_len = sizeof(tdbi); 1870 bcopy((caddr_t)&tdbi, mtod(m, caddr_t), 1871 (unsigned)m->m_len); 1872 } 1873 splx(s); 1874 #endif 1875 break; 1876 1877 case IPV6_AUTH_LEVEL: 1878 case IPV6_ESP_TRANS_LEVEL: 1879 case IPV6_ESP_NETWORK_LEVEL: 1880 case IPV6_IPCOMP_LEVEL: 1881 #ifndef IPSEC 1882 m->m_len = sizeof(int); 1883 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1884 #else 1885 m->m_len = sizeof(int); 1886 switch (optname) { 1887 case IPV6_AUTH_LEVEL: 1888 optval = inp->inp_seclevel[SL_AUTH]; 1889 break; 1890 1891 case IPV6_ESP_TRANS_LEVEL: 1892 optval = 1893 inp->inp_seclevel[SL_ESP_TRANS]; 1894 break; 1895 1896 case IPV6_ESP_NETWORK_LEVEL: 1897 optval = 1898 inp->inp_seclevel[SL_ESP_NETWORK]; 1899 break; 1900 1901 case IPV6_IPCOMP_LEVEL: 1902 optval = inp->inp_seclevel[SL_IPCOMP]; 1903 break; 1904 } 1905 *mtod(m, int *) = optval; 1906 #endif 1907 break; 1908 1909 default: 1910 error = ENOPROTOOPT; 1911 break; 1912 } 1913 break; 1914 } 1915 } else { 1916 error = EINVAL; 1917 if (op == PRCO_SETOPT && *mp) 1918 (void)m_free(*mp); 1919 } 1920 return (error); 1921 } 1922 1923 int 1924 ip6_raw_ctloutput(int op, struct socket *so, int level, int optname, 1925 struct mbuf **mp) 1926 { 1927 int error = 0, optval, optlen; 1928 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); 1929 struct inpcb *inp = sotoinpcb(so); 1930 struct mbuf *m = *mp; 1931 1932 optlen = m ? m->m_len : 0; 1933 1934 if (level != IPPROTO_IPV6) { 1935 if (op == PRCO_SETOPT && *mp) 1936 (void)m_free(*mp); 1937 return (EINVAL); 1938 } 1939 1940 switch (optname) { 1941 case IPV6_CHECKSUM: 1942 /* 1943 * For ICMPv6 sockets, no modification allowed for checksum 1944 * offset, permit "no change" values to help existing apps. 1945 * 1946 * RFC3542 says: "An attempt to set IPV6_CHECKSUM 1947 * for an ICMPv6 socket will fail." 1948 * The current behavior does not meet RFC3542. 1949 */ 1950 switch (op) { 1951 case PRCO_SETOPT: 1952 if (optlen != sizeof(int)) { 1953 error = EINVAL; 1954 break; 1955 } 1956 optval = *mtod(m, int *); 1957 if ((optval % 2) != 0) { 1958 /* the API assumes even offset values */ 1959 error = EINVAL; 1960 } else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { 1961 if (optval != icmp6off) 1962 error = EINVAL; 1963 } else 1964 inp->in6p_cksum = optval; 1965 break; 1966 1967 case PRCO_GETOPT: 1968 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) 1969 optval = icmp6off; 1970 else 1971 optval = inp->in6p_cksum; 1972 1973 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1974 m->m_len = sizeof(int); 1975 *mtod(m, int *) = optval; 1976 break; 1977 1978 default: 1979 error = EINVAL; 1980 break; 1981 } 1982 break; 1983 1984 default: 1985 error = ENOPROTOOPT; 1986 break; 1987 } 1988 1989 if (op == PRCO_SETOPT && m) 1990 (void)m_free(m); 1991 1992 return (error); 1993 } 1994 1995 /* 1996 * Set up IP6 options in pcb for insertion in output packets. 1997 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1998 * with destination address if source routed. 1999 */ 2000 static int 2001 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so) 2002 { 2003 struct ip6_pktopts *opt = *pktopt; 2004 int error = 0; 2005 struct proc *p = curproc; /* XXX */ 2006 int priv = 0; 2007 2008 /* turn off any old options. */ 2009 if (opt) 2010 ip6_clearpktopts(opt, -1); 2011 else 2012 opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); 2013 *pktopt = 0; 2014 2015 if (!m || m->m_len == 0) { 2016 /* 2017 * Only turning off any previous options, regardless of 2018 * whether the opt is just created or given. 2019 */ 2020 free(opt, M_IP6OPT); 2021 return (0); 2022 } 2023 2024 /* set options specified by user. */ 2025 if (p && !suser(p, 0)) 2026 priv = 1; 2027 if ((error = ip6_setpktopts(m, opt, NULL, priv, 2028 so->so_proto->pr_protocol)) != 0) { 2029 ip6_clearpktopts(opt, -1); /* XXX discard all options */ 2030 free(opt, M_IP6OPT); 2031 return (error); 2032 } 2033 *pktopt = opt; 2034 return (0); 2035 } 2036 2037 /* 2038 * initialize ip6_pktopts. beware that there are non-zero default values in 2039 * the struct. 2040 */ 2041 void 2042 ip6_initpktopts(struct ip6_pktopts *opt) 2043 { 2044 2045 bzero(opt, sizeof(*opt)); 2046 opt->ip6po_hlim = -1; /* -1 means default hop limit */ 2047 opt->ip6po_tclass = -1; /* -1 means default traffic class */ 2048 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; 2049 } 2050 2051 #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* XXX */ 2052 static int 2053 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, 2054 int priv, int uproto) 2055 { 2056 struct ip6_pktopts *opt; 2057 2058 if (*pktopt == NULL) { 2059 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, 2060 M_WAITOK); 2061 ip6_initpktopts(*pktopt); 2062 } 2063 opt = *pktopt; 2064 2065 return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto)); 2066 } 2067 2068 static int 2069 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct mbuf **mp) 2070 { 2071 void *optdata = NULL; 2072 int optdatalen = 0; 2073 struct ip6_ext *ip6e; 2074 int error = 0; 2075 struct in6_pktinfo null_pktinfo; 2076 int deftclass = 0, on; 2077 int defminmtu = IP6PO_MINMTU_MCASTONLY; 2078 struct mbuf *m; 2079 2080 switch (optname) { 2081 case IPV6_PKTINFO: 2082 if (pktopt && pktopt->ip6po_pktinfo) 2083 optdata = (void *)pktopt->ip6po_pktinfo; 2084 else { 2085 /* XXX: we don't have to do this every time... */ 2086 bzero(&null_pktinfo, sizeof(null_pktinfo)); 2087 optdata = (void *)&null_pktinfo; 2088 } 2089 optdatalen = sizeof(struct in6_pktinfo); 2090 break; 2091 case IPV6_TCLASS: 2092 if (pktopt && pktopt->ip6po_tclass >= 0) 2093 optdata = (void *)&pktopt->ip6po_tclass; 2094 else 2095 optdata = (void *)&deftclass; 2096 optdatalen = sizeof(int); 2097 break; 2098 case IPV6_HOPOPTS: 2099 if (pktopt && pktopt->ip6po_hbh) { 2100 optdata = (void *)pktopt->ip6po_hbh; 2101 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; 2102 optdatalen = (ip6e->ip6e_len + 1) << 3; 2103 } 2104 break; 2105 case IPV6_RTHDR: 2106 if (pktopt && pktopt->ip6po_rthdr) { 2107 optdata = (void *)pktopt->ip6po_rthdr; 2108 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; 2109 optdatalen = (ip6e->ip6e_len + 1) << 3; 2110 } 2111 break; 2112 case IPV6_RTHDRDSTOPTS: 2113 if (pktopt && pktopt->ip6po_dest1) { 2114 optdata = (void *)pktopt->ip6po_dest1; 2115 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; 2116 optdatalen = (ip6e->ip6e_len + 1) << 3; 2117 } 2118 break; 2119 case IPV6_DSTOPTS: 2120 if (pktopt && pktopt->ip6po_dest2) { 2121 optdata = (void *)pktopt->ip6po_dest2; 2122 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; 2123 optdatalen = (ip6e->ip6e_len + 1) << 3; 2124 } 2125 break; 2126 case IPV6_NEXTHOP: 2127 if (pktopt && pktopt->ip6po_nexthop) { 2128 optdata = (void *)pktopt->ip6po_nexthop; 2129 optdatalen = pktopt->ip6po_nexthop->sa_len; 2130 } 2131 break; 2132 case IPV6_USE_MIN_MTU: 2133 if (pktopt) 2134 optdata = (void *)&pktopt->ip6po_minmtu; 2135 else 2136 optdata = (void *)&defminmtu; 2137 optdatalen = sizeof(int); 2138 break; 2139 case IPV6_DONTFRAG: 2140 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) 2141 on = 1; 2142 else 2143 on = 0; 2144 optdata = (void *)&on; 2145 optdatalen = sizeof(on); 2146 break; 2147 default: /* should not happen */ 2148 #ifdef DIAGNOSTIC 2149 panic("ip6_getpcbopt: unexpected option\n"); 2150 #endif 2151 return (ENOPROTOOPT); 2152 } 2153 2154 if (optdatalen > MCLBYTES) 2155 return (EMSGSIZE); /* XXX */ 2156 *mp = m = m_get(M_WAIT, MT_SOOPTS); 2157 if (optdatalen > MLEN) 2158 MCLGET(m, M_WAIT); 2159 m->m_len = optdatalen; 2160 if (optdatalen) 2161 bcopy(optdata, mtod(m, void *), optdatalen); 2162 2163 return (error); 2164 } 2165 2166 void 2167 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) 2168 { 2169 if (optname == -1 || optname == IPV6_PKTINFO) { 2170 if (pktopt->ip6po_pktinfo) 2171 free(pktopt->ip6po_pktinfo, M_IP6OPT); 2172 pktopt->ip6po_pktinfo = NULL; 2173 } 2174 if (optname == -1 || optname == IPV6_HOPLIMIT) 2175 pktopt->ip6po_hlim = -1; 2176 if (optname == -1 || optname == IPV6_TCLASS) 2177 pktopt->ip6po_tclass = -1; 2178 if (optname == -1 || optname == IPV6_NEXTHOP) { 2179 if (pktopt->ip6po_nextroute.ro_rt) { 2180 RTFREE(pktopt->ip6po_nextroute.ro_rt); 2181 pktopt->ip6po_nextroute.ro_rt = NULL; 2182 } 2183 if (pktopt->ip6po_nexthop) 2184 free(pktopt->ip6po_nexthop, M_IP6OPT); 2185 pktopt->ip6po_nexthop = NULL; 2186 } 2187 if (optname == -1 || optname == IPV6_HOPOPTS) { 2188 if (pktopt->ip6po_hbh) 2189 free(pktopt->ip6po_hbh, M_IP6OPT); 2190 pktopt->ip6po_hbh = NULL; 2191 } 2192 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { 2193 if (pktopt->ip6po_dest1) 2194 free(pktopt->ip6po_dest1, M_IP6OPT); 2195 pktopt->ip6po_dest1 = NULL; 2196 } 2197 if (optname == -1 || optname == IPV6_RTHDR) { 2198 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) 2199 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); 2200 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; 2201 if (pktopt->ip6po_route.ro_rt) { 2202 RTFREE(pktopt->ip6po_route.ro_rt); 2203 pktopt->ip6po_route.ro_rt = NULL; 2204 } 2205 } 2206 if (optname == -1 || optname == IPV6_DSTOPTS) { 2207 if (pktopt->ip6po_dest2) 2208 free(pktopt->ip6po_dest2, M_IP6OPT); 2209 pktopt->ip6po_dest2 = NULL; 2210 } 2211 } 2212 2213 #define PKTOPT_EXTHDRCPY(type) \ 2214 do {\ 2215 if (src->type) {\ 2216 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ 2217 dst->type = malloc(hlen, M_IP6OPT, canwait);\ 2218 if (dst->type == NULL && canwait == M_NOWAIT)\ 2219 goto bad;\ 2220 bcopy(src->type, dst->type, hlen);\ 2221 }\ 2222 } while (/*CONSTCOND*/ 0) 2223 2224 static int 2225 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) 2226 { 2227 dst->ip6po_hlim = src->ip6po_hlim; 2228 dst->ip6po_tclass = src->ip6po_tclass; 2229 dst->ip6po_flags = src->ip6po_flags; 2230 if (src->ip6po_pktinfo) { 2231 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), 2232 M_IP6OPT, canwait); 2233 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT) 2234 goto bad; 2235 *dst->ip6po_pktinfo = *src->ip6po_pktinfo; 2236 } 2237 if (src->ip6po_nexthop) { 2238 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, 2239 M_IP6OPT, canwait); 2240 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT) 2241 goto bad; 2242 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, 2243 src->ip6po_nexthop->sa_len); 2244 } 2245 PKTOPT_EXTHDRCPY(ip6po_hbh); 2246 PKTOPT_EXTHDRCPY(ip6po_dest1); 2247 PKTOPT_EXTHDRCPY(ip6po_dest2); 2248 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ 2249 return (0); 2250 2251 bad: 2252 ip6_clearpktopts(dst, -1); 2253 return (ENOBUFS); 2254 } 2255 #undef PKTOPT_EXTHDRCPY 2256 2257 void 2258 ip6_freepcbopts(struct ip6_pktopts *pktopt) 2259 { 2260 if (pktopt == NULL) 2261 return; 2262 2263 ip6_clearpktopts(pktopt, -1); 2264 2265 free(pktopt, M_IP6OPT); 2266 } 2267 2268 /* 2269 * Set the IP6 multicast options in response to user setsockopt(). 2270 */ 2271 static int 2272 ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m) 2273 { 2274 int error = 0; 2275 u_int loop, ifindex; 2276 struct ipv6_mreq *mreq; 2277 struct ifnet *ifp; 2278 struct ip6_moptions *im6o = *im6op; 2279 struct route_in6 ro; 2280 struct sockaddr_in6 *dst; 2281 struct in6_multi_mship *imm; 2282 struct proc *p = curproc; /* XXX */ 2283 2284 if (im6o == NULL) { 2285 /* 2286 * No multicast option buffer attached to the pcb; 2287 * allocate one and initialize to default values. 2288 */ 2289 im6o = (struct ip6_moptions *) 2290 malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK); 2291 2292 if (im6o == NULL) 2293 return (ENOBUFS); 2294 *im6op = im6o; 2295 im6o->im6o_multicast_ifp = NULL; 2296 im6o->im6o_multicast_hlim = ip6_defmcasthlim; 2297 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP; 2298 LIST_INIT(&im6o->im6o_memberships); 2299 } 2300 2301 switch (optname) { 2302 2303 case IPV6_MULTICAST_IF: 2304 /* 2305 * Select the interface for outgoing multicast packets. 2306 */ 2307 if (m == NULL || m->m_len != sizeof(u_int)) { 2308 error = EINVAL; 2309 break; 2310 } 2311 bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex)); 2312 if (ifindex == 0) 2313 ifp = NULL; 2314 else { 2315 if (ifindex < 0 || if_indexlim <= ifindex || 2316 !ifindex2ifnet[ifindex]) { 2317 error = ENXIO; /* XXX EINVAL? */ 2318 break; 2319 } 2320 ifp = ifindex2ifnet[ifindex]; 2321 if (ifp == NULL || 2322 (ifp->if_flags & IFF_MULTICAST) == 0) { 2323 error = EADDRNOTAVAIL; 2324 break; 2325 } 2326 } 2327 im6o->im6o_multicast_ifp = ifp; 2328 break; 2329 2330 case IPV6_MULTICAST_HOPS: 2331 { 2332 /* 2333 * Set the IP6 hoplimit for outgoing multicast packets. 2334 */ 2335 int optval; 2336 if (m == NULL || m->m_len != sizeof(int)) { 2337 error = EINVAL; 2338 break; 2339 } 2340 bcopy(mtod(m, u_int *), &optval, sizeof(optval)); 2341 if (optval < -1 || optval >= 256) 2342 error = EINVAL; 2343 else if (optval == -1) 2344 im6o->im6o_multicast_hlim = ip6_defmcasthlim; 2345 else 2346 im6o->im6o_multicast_hlim = optval; 2347 break; 2348 } 2349 2350 case IPV6_MULTICAST_LOOP: 2351 /* 2352 * Set the loopback flag for outgoing multicast packets. 2353 * Must be zero or one. 2354 */ 2355 if (m == NULL || m->m_len != sizeof(u_int)) { 2356 error = EINVAL; 2357 break; 2358 } 2359 bcopy(mtod(m, u_int *), &loop, sizeof(loop)); 2360 if (loop > 1) { 2361 error = EINVAL; 2362 break; 2363 } 2364 im6o->im6o_multicast_loop = loop; 2365 break; 2366 2367 case IPV6_JOIN_GROUP: 2368 /* 2369 * Add a multicast group membership. 2370 * Group must be a valid IP6 multicast address. 2371 */ 2372 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { 2373 error = EINVAL; 2374 break; 2375 } 2376 mreq = mtod(m, struct ipv6_mreq *); 2377 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { 2378 /* 2379 * We use the unspecified address to specify to accept 2380 * all multicast addresses. Only super user is allowed 2381 * to do this. 2382 */ 2383 if (suser(p, 0)) 2384 { 2385 error = EACCES; 2386 break; 2387 } 2388 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { 2389 error = EINVAL; 2390 break; 2391 } 2392 2393 /* 2394 * If no interface was explicitly specified, choose an 2395 * appropriate one according to the given multicast address. 2396 */ 2397 if (mreq->ipv6mr_interface == 0) { 2398 /* 2399 * Look up the routing table for the 2400 * address, and choose the outgoing interface. 2401 * XXX: is it a good approach? 2402 */ 2403 ro.ro_rt = NULL; 2404 dst = (struct sockaddr_in6 *)&ro.ro_dst; 2405 bzero(dst, sizeof(*dst)); 2406 dst->sin6_len = sizeof(struct sockaddr_in6); 2407 dst->sin6_family = AF_INET6; 2408 dst->sin6_addr = mreq->ipv6mr_multiaddr; 2409 rtalloc((struct route *)&ro); 2410 if (ro.ro_rt == NULL) { 2411 error = EADDRNOTAVAIL; 2412 break; 2413 } 2414 ifp = ro.ro_rt->rt_ifp; 2415 rtfree(ro.ro_rt); 2416 } else { 2417 /* 2418 * If the interface is specified, validate it. 2419 */ 2420 if (mreq->ipv6mr_interface < 0 || 2421 if_indexlim <= mreq->ipv6mr_interface || 2422 !ifindex2ifnet[mreq->ipv6mr_interface]) { 2423 error = ENXIO; /* XXX EINVAL? */ 2424 break; 2425 } 2426 ifp = ifindex2ifnet[mreq->ipv6mr_interface]; 2427 } 2428 2429 /* 2430 * See if we found an interface, and confirm that it 2431 * supports multicast 2432 */ 2433 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 2434 error = EADDRNOTAVAIL; 2435 break; 2436 } 2437 /* 2438 * Put interface index into the multicast address, 2439 * if the address has link/interface-local scope. 2440 */ 2441 if (IN6_IS_SCOPE_EMBED(&mreq->ipv6mr_multiaddr)) { 2442 mreq->ipv6mr_multiaddr.s6_addr16[1] = 2443 htons(ifp->if_index); 2444 } 2445 /* 2446 * See if the membership already exists. 2447 */ 2448 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) 2449 if (imm->i6mm_maddr->in6m_ifp == ifp && 2450 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, 2451 &mreq->ipv6mr_multiaddr)) 2452 break; 2453 if (imm != NULL) { 2454 error = EADDRINUSE; 2455 break; 2456 } 2457 /* 2458 * Everything looks good; add a new record to the multicast 2459 * address list for the given interface. 2460 */ 2461 imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr, &error); 2462 if (!imm) 2463 break; 2464 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 2465 break; 2466 2467 case IPV6_LEAVE_GROUP: 2468 /* 2469 * Drop a multicast group membership. 2470 * Group must be a valid IP6 multicast address. 2471 */ 2472 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { 2473 error = EINVAL; 2474 break; 2475 } 2476 mreq = mtod(m, struct ipv6_mreq *); 2477 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { 2478 if (suser(p, 0)) 2479 { 2480 error = EACCES; 2481 break; 2482 } 2483 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { 2484 error = EINVAL; 2485 break; 2486 } 2487 /* 2488 * If an interface address was specified, get a pointer 2489 * to its ifnet structure. 2490 */ 2491 if (mreq->ipv6mr_interface == 0) 2492 ifp = NULL; 2493 else { 2494 if (mreq->ipv6mr_interface < 0 || 2495 if_indexlim <= mreq->ipv6mr_interface || 2496 !ifindex2ifnet[mreq->ipv6mr_interface]) { 2497 error = ENXIO; /* XXX EINVAL? */ 2498 break; 2499 } 2500 ifp = ifindex2ifnet[mreq->ipv6mr_interface]; 2501 } 2502 2503 /* 2504 * Put interface index into the multicast address, 2505 * if the address has link-local scope. 2506 */ 2507 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) { 2508 mreq->ipv6mr_multiaddr.s6_addr16[1] = 2509 htons(mreq->ipv6mr_interface); 2510 } 2511 /* 2512 * Find the membership in the membership list. 2513 */ 2514 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) { 2515 if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) && 2516 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, 2517 &mreq->ipv6mr_multiaddr)) 2518 break; 2519 } 2520 if (imm == NULL) { 2521 /* Unable to resolve interface */ 2522 error = EADDRNOTAVAIL; 2523 break; 2524 } 2525 /* 2526 * Give up the multicast address record to which the 2527 * membership points. 2528 */ 2529 LIST_REMOVE(imm, i6mm_chain); 2530 in6_leavegroup(imm); 2531 break; 2532 2533 default: 2534 error = EOPNOTSUPP; 2535 break; 2536 } 2537 2538 /* 2539 * If all options have default values, no need to keep the option 2540 * structure. 2541 */ 2542 if (im6o->im6o_multicast_ifp == NULL && 2543 im6o->im6o_multicast_hlim == ip6_defmcasthlim && 2544 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP && 2545 LIST_EMPTY(&im6o->im6o_memberships)) { 2546 free(*im6op, M_IPMOPTS); 2547 *im6op = NULL; 2548 } 2549 2550 return (error); 2551 } 2552 2553 /* 2554 * Return the IP6 multicast options in response to user getsockopt(). 2555 */ 2556 static int 2557 ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp) 2558 { 2559 u_int *hlim, *loop, *ifindex; 2560 2561 *mp = m_get(M_WAIT, MT_SOOPTS); 2562 2563 switch (optname) { 2564 2565 case IPV6_MULTICAST_IF: 2566 ifindex = mtod(*mp, u_int *); 2567 (*mp)->m_len = sizeof(u_int); 2568 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) 2569 *ifindex = 0; 2570 else 2571 *ifindex = im6o->im6o_multicast_ifp->if_index; 2572 return (0); 2573 2574 case IPV6_MULTICAST_HOPS: 2575 hlim = mtod(*mp, u_int *); 2576 (*mp)->m_len = sizeof(u_int); 2577 if (im6o == NULL) 2578 *hlim = ip6_defmcasthlim; 2579 else 2580 *hlim = im6o->im6o_multicast_hlim; 2581 return (0); 2582 2583 case IPV6_MULTICAST_LOOP: 2584 loop = mtod(*mp, u_int *); 2585 (*mp)->m_len = sizeof(u_int); 2586 if (im6o == NULL) 2587 *loop = ip6_defmcasthlim; 2588 else 2589 *loop = im6o->im6o_multicast_loop; 2590 return (0); 2591 2592 default: 2593 return (EOPNOTSUPP); 2594 } 2595 } 2596 2597 /* 2598 * Discard the IP6 multicast options. 2599 */ 2600 void 2601 ip6_freemoptions(struct ip6_moptions *im6o) 2602 { 2603 struct in6_multi_mship *imm; 2604 2605 if (im6o == NULL) 2606 return; 2607 2608 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 2609 imm = LIST_FIRST(&im6o->im6o_memberships); 2610 LIST_REMOVE(imm, i6mm_chain); 2611 in6_leavegroup(imm); 2612 } 2613 free(im6o, M_IPMOPTS); 2614 } 2615 2616 /* 2617 * Set IPv6 outgoing packet options based on advanced API. 2618 */ 2619 int 2620 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, 2621 struct ip6_pktopts *stickyopt, int priv, int uproto) 2622 { 2623 u_int clen; 2624 struct cmsghdr *cm = 0; 2625 caddr_t cmsgs; 2626 int error; 2627 2628 if (control == NULL || opt == NULL) 2629 return (EINVAL); 2630 2631 ip6_initpktopts(opt); 2632 if (stickyopt) { 2633 int error; 2634 2635 /* 2636 * If stickyopt is provided, make a local copy of the options 2637 * for this particular packet, then override them by ancillary 2638 * objects. 2639 * XXX: copypktopts() does not copy the cached route to a next 2640 * hop (if any). This is not very good in terms of efficiency, 2641 * but we can allow this since this option should be rarely 2642 * used. 2643 */ 2644 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) 2645 return (error); 2646 } 2647 2648 /* 2649 * XXX: Currently, we assume all the optional information is stored 2650 * in a single mbuf. 2651 */ 2652 if (control->m_next) 2653 return (EINVAL); 2654 2655 clen = control->m_len; 2656 cmsgs = mtod(control, caddr_t); 2657 do { 2658 if (clen < CMSG_LEN(0)) 2659 return (EINVAL); 2660 cm = (struct cmsghdr *)cmsgs; 2661 if (cm->cmsg_len < CMSG_LEN(0) || 2662 CMSG_ALIGN(cm->cmsg_len) > clen) 2663 return (EINVAL); 2664 if (cm->cmsg_level == IPPROTO_IPV6) { 2665 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), 2666 cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto); 2667 if (error) 2668 return (error); 2669 } 2670 2671 clen -= CMSG_ALIGN(cm->cmsg_len); 2672 cmsgs += CMSG_ALIGN(cm->cmsg_len); 2673 } while (clen); 2674 2675 return (0); 2676 } 2677 2678 /* 2679 * Set a particular packet option, as a sticky option or an ancillary data 2680 * item. "len" can be 0 only when it's a sticky option. 2681 * We have 4 cases of combination of "sticky" and "cmsg": 2682 * "sticky=0, cmsg=0": impossible 2683 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data 2684 * "sticky=1, cmsg=0": RFC3542 socket option 2685 * "sticky=1, cmsg=1": RFC2292 socket option 2686 */ 2687 static int 2688 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, 2689 int priv, int sticky, int cmsg, int uproto) 2690 { 2691 int minmtupolicy; 2692 2693 if (!sticky && !cmsg) { 2694 #ifdef DIAGNOSTIC 2695 printf("ip6_setpktopt: impossible case\n"); 2696 #endif 2697 return (EINVAL); 2698 } 2699 2700 /* 2701 * IPV6_2292xxx is for backward compatibility to RFC2292, and should 2702 * not be specified in the context of RFC3542. Conversely, 2703 * RFC3542 types should not be specified in the context of RFC2292. 2704 */ 2705 if (!cmsg) { 2706 switch (optname) { 2707 case IPV6_2292PKTINFO: 2708 case IPV6_2292HOPLIMIT: 2709 case IPV6_2292NEXTHOP: 2710 case IPV6_2292HOPOPTS: 2711 case IPV6_2292DSTOPTS: 2712 case IPV6_2292RTHDR: 2713 case IPV6_2292PKTOPTIONS: 2714 return (ENOPROTOOPT); 2715 } 2716 } 2717 if (sticky && cmsg) { 2718 switch (optname) { 2719 case IPV6_PKTINFO: 2720 case IPV6_HOPLIMIT: 2721 case IPV6_NEXTHOP: 2722 case IPV6_HOPOPTS: 2723 case IPV6_DSTOPTS: 2724 case IPV6_RTHDRDSTOPTS: 2725 case IPV6_RTHDR: 2726 case IPV6_USE_MIN_MTU: 2727 case IPV6_DONTFRAG: 2728 case IPV6_TCLASS: 2729 return (ENOPROTOOPT); 2730 } 2731 } 2732 2733 switch (optname) { 2734 case IPV6_2292PKTINFO: 2735 case IPV6_PKTINFO: 2736 { 2737 struct ifnet *ifp = NULL; 2738 struct in6_pktinfo *pktinfo; 2739 2740 if (len != sizeof(struct in6_pktinfo)) 2741 return (EINVAL); 2742 2743 pktinfo = (struct in6_pktinfo *)buf; 2744 2745 /* 2746 * An application can clear any sticky IPV6_PKTINFO option by 2747 * doing a "regular" setsockopt with ipi6_addr being 2748 * in6addr_any and ipi6_ifindex being zero. 2749 * [RFC 3542, Section 6] 2750 */ 2751 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo && 2752 pktinfo->ipi6_ifindex == 0 && 2753 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { 2754 ip6_clearpktopts(opt, optname); 2755 break; 2756 } 2757 2758 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO && 2759 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { 2760 return (EINVAL); 2761 } 2762 2763 /* validate the interface index if specified. */ 2764 if (pktinfo->ipi6_ifindex >= if_indexlim || 2765 pktinfo->ipi6_ifindex < 0) { 2766 return (ENXIO); 2767 } 2768 if (pktinfo->ipi6_ifindex) { 2769 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex]; 2770 if (ifp == NULL) 2771 return (ENXIO); 2772 } 2773 2774 /* 2775 * We store the address anyway, and let in6_selectsrc() 2776 * validate the specified address. This is because ipi6_addr 2777 * may not have enough information about its scope zone, and 2778 * we may need additional information (such as outgoing 2779 * interface or the scope zone of a destination address) to 2780 * disambiguate the scope. 2781 * XXX: the delay of the validation may confuse the 2782 * application when it is used as a sticky option. 2783 */ 2784 if (opt->ip6po_pktinfo == NULL) { 2785 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo), 2786 M_IP6OPT, M_NOWAIT); 2787 if (opt->ip6po_pktinfo == NULL) 2788 return (ENOBUFS); 2789 } 2790 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); 2791 break; 2792 } 2793 2794 case IPV6_2292HOPLIMIT: 2795 case IPV6_HOPLIMIT: 2796 { 2797 int *hlimp; 2798 2799 /* 2800 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT 2801 * to simplify the ordering among hoplimit options. 2802 */ 2803 if (optname == IPV6_HOPLIMIT && sticky) 2804 return (ENOPROTOOPT); 2805 2806 if (len != sizeof(int)) 2807 return (EINVAL); 2808 hlimp = (int *)buf; 2809 if (*hlimp < -1 || *hlimp > 255) 2810 return (EINVAL); 2811 2812 opt->ip6po_hlim = *hlimp; 2813 break; 2814 } 2815 2816 case IPV6_TCLASS: 2817 { 2818 int tclass; 2819 2820 if (len != sizeof(int)) 2821 return (EINVAL); 2822 tclass = *(int *)buf; 2823 if (tclass < -1 || tclass > 255) 2824 return (EINVAL); 2825 2826 opt->ip6po_tclass = tclass; 2827 break; 2828 } 2829 2830 case IPV6_2292NEXTHOP: 2831 case IPV6_NEXTHOP: 2832 if (!priv) 2833 return (EPERM); 2834 2835 if (len == 0) { /* just remove the option */ 2836 ip6_clearpktopts(opt, IPV6_NEXTHOP); 2837 break; 2838 } 2839 2840 /* check if cmsg_len is large enough for sa_len */ 2841 if (len < sizeof(struct sockaddr) || len < *buf) 2842 return (EINVAL); 2843 2844 switch (((struct sockaddr *)buf)->sa_family) { 2845 case AF_INET6: 2846 { 2847 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf; 2848 2849 if (sa6->sin6_len != sizeof(struct sockaddr_in6)) 2850 return (EINVAL); 2851 2852 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || 2853 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { 2854 return (EINVAL); 2855 } 2856 if (IN6_IS_SCOPE_EMBED(&sa6->sin6_addr)) { 2857 if (sa6->sin6_scope_id < 0 || 2858 if_indexlim <= sa6->sin6_scope_id || 2859 !ifindex2ifnet[sa6->sin6_scope_id]) 2860 return (EINVAL); 2861 sa6->sin6_addr.s6_addr16[1] = 2862 htonl(sa6->sin6_scope_id); 2863 } else if (sa6->sin6_scope_id) 2864 return (EINVAL); 2865 break; 2866 } 2867 case AF_LINK: /* eventually be supported? */ 2868 default: 2869 return (EAFNOSUPPORT); 2870 } 2871 2872 /* turn off the previous option, then set the new option. */ 2873 ip6_clearpktopts(opt, IPV6_NEXTHOP); 2874 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT); 2875 if (opt->ip6po_nexthop == NULL) 2876 return (ENOBUFS); 2877 bcopy(buf, opt->ip6po_nexthop, *buf); 2878 break; 2879 2880 case IPV6_2292HOPOPTS: 2881 case IPV6_HOPOPTS: 2882 { 2883 struct ip6_hbh *hbh; 2884 int hbhlen; 2885 2886 /* 2887 * XXX: We don't allow a non-privileged user to set ANY HbH 2888 * options, since per-option restriction has too much 2889 * overhead. 2890 */ 2891 if (!priv) 2892 return (EPERM); 2893 2894 if (len == 0) { 2895 ip6_clearpktopts(opt, IPV6_HOPOPTS); 2896 break; /* just remove the option */ 2897 } 2898 2899 /* message length validation */ 2900 if (len < sizeof(struct ip6_hbh)) 2901 return (EINVAL); 2902 hbh = (struct ip6_hbh *)buf; 2903 hbhlen = (hbh->ip6h_len + 1) << 3; 2904 if (len != hbhlen) 2905 return (EINVAL); 2906 2907 /* turn off the previous option, then set the new option. */ 2908 ip6_clearpktopts(opt, IPV6_HOPOPTS); 2909 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT); 2910 if (opt->ip6po_hbh == NULL) 2911 return (ENOBUFS); 2912 bcopy(hbh, opt->ip6po_hbh, hbhlen); 2913 2914 break; 2915 } 2916 2917 case IPV6_2292DSTOPTS: 2918 case IPV6_DSTOPTS: 2919 case IPV6_RTHDRDSTOPTS: 2920 { 2921 struct ip6_dest *dest, **newdest = NULL; 2922 int destlen; 2923 2924 if (!priv) /* XXX: see the comment for IPV6_HOPOPTS */ 2925 return (EPERM); 2926 2927 if (len == 0) { 2928 ip6_clearpktopts(opt, optname); 2929 break; /* just remove the option */ 2930 } 2931 2932 /* message length validation */ 2933 if (len < sizeof(struct ip6_dest)) 2934 return (EINVAL); 2935 dest = (struct ip6_dest *)buf; 2936 destlen = (dest->ip6d_len + 1) << 3; 2937 if (len != destlen) 2938 return (EINVAL); 2939 /* 2940 * Determine the position that the destination options header 2941 * should be inserted; before or after the routing header. 2942 */ 2943 switch (optname) { 2944 case IPV6_2292DSTOPTS: 2945 /* 2946 * The old advanced API is ambiguous on this point. 2947 * Our approach is to determine the position based 2948 * according to the existence of a routing header. 2949 * Note, however, that this depends on the order of the 2950 * extension headers in the ancillary data; the 1st 2951 * part of the destination options header must appear 2952 * before the routing header in the ancillary data, 2953 * too. 2954 * RFC3542 solved the ambiguity by introducing 2955 * separate ancillary data or option types. 2956 */ 2957 if (opt->ip6po_rthdr == NULL) 2958 newdest = &opt->ip6po_dest1; 2959 else 2960 newdest = &opt->ip6po_dest2; 2961 break; 2962 case IPV6_RTHDRDSTOPTS: 2963 newdest = &opt->ip6po_dest1; 2964 break; 2965 case IPV6_DSTOPTS: 2966 newdest = &opt->ip6po_dest2; 2967 break; 2968 } 2969 2970 /* turn off the previous option, then set the new option. */ 2971 ip6_clearpktopts(opt, optname); 2972 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT); 2973 if (*newdest == NULL) 2974 return (ENOBUFS); 2975 bcopy(dest, *newdest, destlen); 2976 2977 break; 2978 } 2979 2980 case IPV6_2292RTHDR: 2981 case IPV6_RTHDR: 2982 { 2983 struct ip6_rthdr *rth; 2984 int rthlen; 2985 2986 if (len == 0) { 2987 ip6_clearpktopts(opt, IPV6_RTHDR); 2988 break; /* just remove the option */ 2989 } 2990 2991 /* message length validation */ 2992 if (len < sizeof(struct ip6_rthdr)) 2993 return (EINVAL); 2994 rth = (struct ip6_rthdr *)buf; 2995 rthlen = (rth->ip6r_len + 1) << 3; 2996 if (len != rthlen) 2997 return (EINVAL); 2998 2999 switch (rth->ip6r_type) { 3000 case IPV6_RTHDR_TYPE_0: 3001 if (rth->ip6r_len == 0) /* must contain one addr */ 3002 return (EINVAL); 3003 if (rth->ip6r_len % 2) /* length must be even */ 3004 return (EINVAL); 3005 if (rth->ip6r_len / 2 != rth->ip6r_segleft) 3006 return (EINVAL); 3007 break; 3008 default: 3009 return (EINVAL); /* not supported */ 3010 } 3011 /* turn off the previous option */ 3012 ip6_clearpktopts(opt, IPV6_RTHDR); 3013 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT); 3014 if (opt->ip6po_rthdr == NULL) 3015 return (ENOBUFS); 3016 bcopy(rth, opt->ip6po_rthdr, rthlen); 3017 break; 3018 } 3019 3020 case IPV6_USE_MIN_MTU: 3021 if (len != sizeof(int)) 3022 return (EINVAL); 3023 minmtupolicy = *(int *)buf; 3024 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && 3025 minmtupolicy != IP6PO_MINMTU_DISABLE && 3026 minmtupolicy != IP6PO_MINMTU_ALL) { 3027 return (EINVAL); 3028 } 3029 opt->ip6po_minmtu = minmtupolicy; 3030 break; 3031 3032 case IPV6_DONTFRAG: 3033 if (len != sizeof(int)) 3034 return (EINVAL); 3035 3036 if (uproto == IPPROTO_TCP || *(int *)buf == 0) { 3037 /* 3038 * we ignore this option for TCP sockets. 3039 * (RFC3542 leaves this case unspecified.) 3040 */ 3041 opt->ip6po_flags &= ~IP6PO_DONTFRAG; 3042 } else 3043 opt->ip6po_flags |= IP6PO_DONTFRAG; 3044 break; 3045 3046 default: 3047 return (ENOPROTOOPT); 3048 } /* end of switch */ 3049 3050 return (0); 3051 } 3052 3053 /* 3054 * Routine called from ip6_output() to loop back a copy of an IP6 multicast 3055 * packet to the input queue of a specified interface. Note that this 3056 * calls the output routine of the loopback "driver", but with an interface 3057 * pointer that might NOT be lo0ifp -- easier than replicating that code here. 3058 */ 3059 void 3060 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) 3061 { 3062 struct mbuf *copym; 3063 struct ip6_hdr *ip6; 3064 3065 /* 3066 * Duplicate the packet. 3067 */ 3068 copym = m_copy(m, 0, M_COPYALL); 3069 if (copym == NULL) 3070 return; 3071 3072 /* 3073 * Make sure to deep-copy IPv6 header portion in case the data 3074 * is in an mbuf cluster, so that we can safely override the IPv6 3075 * header portion later. 3076 */ 3077 if ((copym->m_flags & M_EXT) != 0 || 3078 copym->m_len < sizeof(struct ip6_hdr)) { 3079 copym = m_pullup(copym, sizeof(struct ip6_hdr)); 3080 if (copym == NULL) 3081 return; 3082 } 3083 3084 #ifdef DIAGNOSTIC 3085 if (copym->m_len < sizeof(*ip6)) { 3086 m_freem(copym); 3087 return; 3088 } 3089 #endif 3090 3091 ip6 = mtod(copym, struct ip6_hdr *); 3092 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 3093 ip6->ip6_src.s6_addr16[1] = 0; 3094 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 3095 ip6->ip6_dst.s6_addr16[1] = 0; 3096 3097 (void)looutput(ifp, copym, (struct sockaddr *)dst, NULL); 3098 } 3099 3100 /* 3101 * Chop IPv6 header off from the payload. 3102 */ 3103 static int 3104 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) 3105 { 3106 struct mbuf *mh; 3107 struct ip6_hdr *ip6; 3108 3109 ip6 = mtod(m, struct ip6_hdr *); 3110 if (m->m_len > sizeof(*ip6)) { 3111 MGETHDR(mh, M_DONTWAIT, MT_HEADER); 3112 if (mh == 0) { 3113 m_freem(m); 3114 return ENOBUFS; 3115 } 3116 M_MOVE_PKTHDR(mh, m); 3117 MH_ALIGN(mh, sizeof(*ip6)); 3118 m->m_len -= sizeof(*ip6); 3119 m->m_data += sizeof(*ip6); 3120 mh->m_next = m; 3121 m = mh; 3122 m->m_len = sizeof(*ip6); 3123 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); 3124 } 3125 exthdrs->ip6e_ip6 = m; 3126 return 0; 3127 } 3128 3129 /* 3130 * Compute IPv6 extension header length. 3131 */ 3132 int 3133 ip6_optlen(struct inpcb *inp) 3134 { 3135 int len; 3136 3137 if (!inp->inp_outputopts6) 3138 return 0; 3139 3140 len = 0; 3141 #define elen(x) \ 3142 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) 3143 3144 len += elen(inp->inp_outputopts6->ip6po_hbh); 3145 len += elen(inp->inp_outputopts6->ip6po_dest1); 3146 len += elen(inp->inp_outputopts6->ip6po_rthdr); 3147 len += elen(inp->inp_outputopts6->ip6po_dest2); 3148 return len; 3149 #undef elen 3150 } 3151 3152 u_int32_t 3153 ip6_randomid(void) 3154 { 3155 return idgen32(&ip6_id_ctx); 3156 } 3157 3158 void 3159 ip6_randomid_init(void) 3160 { 3161 idgen32_init(&ip6_id_ctx); 3162 } 3163 3164