1 /* $OpenBSD: ip6_output.c,v 1.106 2008/10/22 14:36:08 markus Exp $ */ 2 /* $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1988, 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 62 */ 63 64 #include "pf.h" 65 66 #include <sys/param.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/errno.h> 70 #include <sys/protosw.h> 71 #include <sys/socket.h> 72 #include <sys/socketvar.h> 73 #include <sys/systm.h> 74 #include <sys/proc.h> 75 76 #include <net/if.h> 77 #include <net/if_enc.h> 78 #include <net/route.h> 79 80 #include <netinet/in.h> 81 #include <netinet/in_var.h> 82 #include <netinet/in_systm.h> 83 #include <netinet/ip.h> 84 #include <netinet/in_pcb.h> 85 86 #include <netinet/ip6.h> 87 #include <netinet/icmp6.h> 88 #include <netinet6/ip6_var.h> 89 #include <netinet6/nd6.h> 90 #include <netinet6/ip6protosw.h> 91 92 #include <crypto/idgen.h> 93 94 #if NPF > 0 95 #include <net/pfvar.h> 96 #endif 97 98 #ifdef IPSEC 99 #include <netinet/ip_ipsp.h> 100 #include <netinet/ip_ah.h> 101 #include <netinet/ip_esp.h> 102 #include <netinet/udp.h> 103 #include <netinet/tcp.h> 104 #include <net/pfkeyv2.h> 105 106 extern u_int8_t get_sa_require(struct inpcb *); 107 108 extern int ipsec_auth_default_level; 109 extern int ipsec_esp_trans_default_level; 110 extern int ipsec_esp_network_default_level; 111 extern int ipsec_ipcomp_default_level; 112 #endif /* IPSEC */ 113 114 struct ip6_exthdrs { 115 struct mbuf *ip6e_ip6; 116 struct mbuf *ip6e_hbh; 117 struct mbuf *ip6e_dest1; 118 struct mbuf *ip6e_rthdr; 119 struct mbuf *ip6e_dest2; 120 }; 121 122 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int, int); 123 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *); 124 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct mbuf **); 125 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int, int, 126 int, int); 127 static int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *); 128 static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **); 129 static int ip6_copyexthdr(struct mbuf **, caddr_t, int); 130 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, 131 struct ip6_frag **); 132 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); 133 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); 134 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *, 135 struct ifnet *, struct in6_addr *, u_long *, int *); 136 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); 137 138 /* Context for non-repeating IDs */ 139 struct idgen32_ctx ip6_id_ctx; 140 141 /* 142 * IP6 output. The packet in mbuf chain m contains a skeletal IP6 143 * header (with pri, len, nxt, hlim, src, dst). 144 * This function may modify ver and hlim only. 145 * The mbuf chain containing the packet will be freed. 146 * The mbuf opt, if present, will not be freed. 147 * 148 * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and 149 * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, 150 * which is rt_rmx.rmx_mtu. 151 * 152 * ifpp - XXX: just for statistics 153 */ 154 int 155 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, 156 int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, 157 struct inpcb *inp) 158 { 159 struct ip6_hdr *ip6, *mhip6; 160 struct ifnet *ifp, *origifp = NULL; 161 struct mbuf *m = m0; 162 int hlen, tlen, len, off; 163 struct route_in6 ip6route; 164 struct rtentry *rt = NULL; 165 struct sockaddr_in6 *dst, dstsock; 166 int error = 0; 167 struct in6_ifaddr *ia = NULL; 168 u_long mtu; 169 int alwaysfrag, dontfrag; 170 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; 171 struct ip6_exthdrs exthdrs; 172 struct in6_addr finaldst; 173 struct route_in6 *ro_pmtu = NULL; 174 int hdrsplit = 0; 175 u_int8_t sproto = 0; 176 #ifdef IPSEC 177 struct m_tag *mtag; 178 union sockaddr_union sdst; 179 struct tdb_ident *tdbi; 180 u_int32_t sspi; 181 struct tdb *tdb; 182 int s; 183 #endif /* IPSEC */ 184 185 #ifdef IPSEC 186 if (inp && (inp->inp_flags & INP_IPV6) == 0) 187 panic("ip6_output: IPv4 pcb is passed"); 188 #endif /* IPSEC */ 189 190 ip6 = mtod(m, struct ip6_hdr *); 191 finaldst = ip6->ip6_dst; 192 193 #define MAKE_EXTHDR(hp, mp) \ 194 do { \ 195 if (hp) { \ 196 struct ip6_ext *eh = (struct ip6_ext *)(hp); \ 197 error = ip6_copyexthdr((mp), (caddr_t)(hp), \ 198 ((eh)->ip6e_len + 1) << 3); \ 199 if (error) \ 200 goto freehdrs; \ 201 } \ 202 } while (0) 203 204 bzero(&exthdrs, sizeof(exthdrs)); 205 206 if (opt) { 207 /* Hop-by-Hop options header */ 208 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); 209 /* Destination options header(1st part) */ 210 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); 211 /* Routing header */ 212 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); 213 /* Destination options header(2nd part) */ 214 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); 215 } 216 217 #ifdef IPSEC 218 if (!ipsec_in_use && !inp) 219 goto done_spd; 220 221 /* 222 * splnet is chosen over spltdb because we are not allowed to 223 * lower the level, and udp6_output calls us in splnet(). XXX check 224 */ 225 s = splnet(); 226 227 /* 228 * Check if there was an outgoing SA bound to the flow 229 * from a transport protocol. 230 */ 231 ip6 = mtod(m, struct ip6_hdr *); 232 233 /* Do we have any pending SAs to apply ? */ 234 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 235 if (mtag != NULL) { 236 #ifdef DIAGNOSTIC 237 if (mtag->m_tag_len != sizeof (struct tdb_ident)) 238 panic("ip6_output: tag of length %d (should be %d", 239 mtag->m_tag_len, sizeof (struct tdb_ident)); 240 #endif 241 tdbi = (struct tdb_ident *)(mtag + 1); 242 tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto); 243 if (tdb == NULL) 244 error = -EINVAL; 245 m_tag_delete(m, mtag); 246 } else 247 tdb = ipsp_spd_lookup(m, AF_INET6, sizeof(struct ip6_hdr), 248 &error, IPSP_DIRECTION_OUT, NULL, inp); 249 250 if (tdb == NULL) { 251 splx(s); 252 253 if (error == 0) { 254 /* 255 * No IPsec processing required, we'll just send the 256 * packet out. 257 */ 258 sproto = 0; 259 260 /* Fall through to routing/multicast handling */ 261 } else { 262 /* 263 * -EINVAL is used to indicate that the packet should 264 * be silently dropped, typically because we've asked 265 * key management for an SA. 266 */ 267 if (error == -EINVAL) /* Should silently drop packet */ 268 error = 0; 269 270 goto freehdrs; 271 } 272 } else { 273 /* Loop detection */ 274 for (mtag = m_tag_first(m); mtag != NULL; 275 mtag = m_tag_next(m, mtag)) { 276 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 277 mtag->m_tag_id != 278 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 279 continue; 280 tdbi = (struct tdb_ident *)(mtag + 1); 281 if (tdbi->spi == tdb->tdb_spi && 282 tdbi->proto == tdb->tdb_sproto && 283 !bcmp(&tdbi->dst, &tdb->tdb_dst, 284 sizeof(union sockaddr_union))) { 285 splx(s); 286 sproto = 0; /* mark as no-IPsec-needed */ 287 goto done_spd; 288 } 289 } 290 291 /* We need to do IPsec */ 292 bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst)); 293 sspi = tdb->tdb_spi; 294 sproto = tdb->tdb_sproto; 295 splx(s); 296 } 297 298 /* Fall through to the routing/multicast handling code */ 299 done_spd: 300 #endif /* IPSEC */ 301 302 /* 303 * Calculate the total length of the extension header chain. 304 * Keep the length of the unfragmentable part for fragmentation. 305 */ 306 optlen = 0; 307 if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; 308 if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; 309 if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; 310 unfragpartlen = optlen + sizeof(struct ip6_hdr); 311 /* NOTE: we don't add AH/ESP length here. do that later. */ 312 if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; 313 314 /* 315 * If we need IPsec, or there is at least one extension header, 316 * separate IP6 header from the payload. 317 */ 318 if ((sproto || optlen) && !hdrsplit) { 319 if ((error = ip6_splithdr(m, &exthdrs)) != 0) { 320 m = NULL; 321 goto freehdrs; 322 } 323 m = exthdrs.ip6e_ip6; 324 hdrsplit++; 325 } 326 327 /* adjust pointer */ 328 ip6 = mtod(m, struct ip6_hdr *); 329 330 /* adjust mbuf packet header length */ 331 m->m_pkthdr.len += optlen; 332 plen = m->m_pkthdr.len - sizeof(*ip6); 333 334 /* If this is a jumbo payload, insert a jumbo payload option. */ 335 if (plen > IPV6_MAXPACKET) { 336 if (!hdrsplit) { 337 if ((error = ip6_splithdr(m, &exthdrs)) != 0) { 338 m = NULL; 339 goto freehdrs; 340 } 341 m = exthdrs.ip6e_ip6; 342 hdrsplit++; 343 } 344 /* adjust pointer */ 345 ip6 = mtod(m, struct ip6_hdr *); 346 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) 347 goto freehdrs; 348 ip6->ip6_plen = 0; 349 } else 350 ip6->ip6_plen = htons(plen); 351 352 /* 353 * Concatenate headers and fill in next header fields. 354 * Here we have, on "m" 355 * IPv6 payload 356 * and we insert headers accordingly. Finally, we should be getting: 357 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] 358 * 359 * during the header composing process, "m" points to IPv6 header. 360 * "mprev" points to an extension header prior to esp. 361 */ 362 { 363 u_char *nexthdrp = &ip6->ip6_nxt; 364 struct mbuf *mprev = m; 365 366 /* 367 * we treat dest2 specially. this makes IPsec processing 368 * much easier. the goal here is to make mprev point the 369 * mbuf prior to dest2. 370 * 371 * result: IPv6 dest2 payload 372 * m and mprev will point to IPv6 header. 373 */ 374 if (exthdrs.ip6e_dest2) { 375 if (!hdrsplit) 376 panic("assumption failed: hdr not split"); 377 exthdrs.ip6e_dest2->m_next = m->m_next; 378 m->m_next = exthdrs.ip6e_dest2; 379 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; 380 ip6->ip6_nxt = IPPROTO_DSTOPTS; 381 } 382 383 #define MAKE_CHAIN(m, mp, p, i)\ 384 do {\ 385 if (m) {\ 386 if (!hdrsplit) \ 387 panic("assumption failed: hdr not split"); \ 388 *mtod((m), u_char *) = *(p);\ 389 *(p) = (i);\ 390 p = mtod((m), u_char *);\ 391 (m)->m_next = (mp)->m_next;\ 392 (mp)->m_next = (m);\ 393 (mp) = (m);\ 394 }\ 395 } while (0) 396 /* 397 * result: IPv6 hbh dest1 rthdr dest2 payload 398 * m will point to IPv6 header. mprev will point to the 399 * extension header prior to dest2 (rthdr in the above case). 400 */ 401 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); 402 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, 403 IPPROTO_DSTOPTS); 404 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, 405 IPPROTO_ROUTING); 406 } 407 408 /* 409 * If there is a routing header, replace the destination address field 410 * with the first hop of the routing header. 411 */ 412 if (exthdrs.ip6e_rthdr) { 413 struct ip6_rthdr *rh; 414 struct ip6_rthdr0 *rh0; 415 struct in6_addr *addr; 416 417 rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr, 418 struct ip6_rthdr *)); 419 switch (rh->ip6r_type) { 420 case IPV6_RTHDR_TYPE_0: 421 rh0 = (struct ip6_rthdr0 *)rh; 422 addr = (struct in6_addr *)(rh0 + 1); 423 ip6->ip6_dst = addr[0]; 424 bcopy(&addr[1], &addr[0], 425 sizeof(struct in6_addr) * (rh0->ip6r0_segleft - 1)); 426 addr[rh0->ip6r0_segleft - 1] = finaldst; 427 break; 428 default: /* is it possible? */ 429 error = EINVAL; 430 goto bad; 431 } 432 } 433 434 /* Source address validation */ 435 if (!(flags & IPV6_UNSPECSRC) && 436 IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { 437 /* 438 * XXX: we can probably assume validation in the caller, but 439 * we explicitly check the address here for safety. 440 */ 441 error = EOPNOTSUPP; 442 ip6stat.ip6s_badscope++; 443 goto bad; 444 } 445 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { 446 error = EOPNOTSUPP; 447 ip6stat.ip6s_badscope++; 448 goto bad; 449 } 450 451 ip6stat.ip6s_localout++; 452 453 /* 454 * Route packet. 455 */ 456 /* initialize cached route */ 457 if (ro == 0) { 458 ro = &ip6route; 459 bzero((caddr_t)ro, sizeof(*ro)); 460 } 461 ro_pmtu = ro; 462 if (opt && opt->ip6po_rthdr) 463 ro = &opt->ip6po_route; 464 dst = (struct sockaddr_in6 *)&ro->ro_dst; 465 466 /* 467 * if specified, try to fill in the traffic class field. 468 * do not override if a non-zero value is already set. 469 * we check the diffserv field and the ecn field separately. 470 */ 471 if (opt && opt->ip6po_tclass >= 0) { 472 int mask = 0; 473 474 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) 475 mask |= 0xfc; 476 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) 477 mask |= 0x03; 478 if (mask != 0) 479 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); 480 } 481 482 /* fill in or override the hop limit field, if necessary. */ 483 if (opt && opt->ip6po_hlim != -1) 484 ip6->ip6_hlim = opt->ip6po_hlim & 0xff; 485 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 486 if (im6o != NULL) 487 ip6->ip6_hlim = im6o->im6o_multicast_hlim; 488 else 489 ip6->ip6_hlim = ip6_defmcasthlim; 490 } 491 492 #ifdef IPSEC 493 /* 494 * Check if the packet needs encapsulation. 495 * ipsp_process_packet will never come back to here. 496 */ 497 if (sproto != 0) { 498 s = splnet(); 499 500 #if NPF > 0 501 if (pf_test6(PF_OUT, &encif[0].sc_if, &m, NULL) != PF_PASS) { 502 splx(s); 503 error = EHOSTUNREACH; 504 m_freem(m); 505 goto done; 506 } 507 if (m == NULL) { 508 splx(s); 509 goto done; 510 } 511 ip6 = mtod(m, struct ip6_hdr *); 512 #endif 513 /* 514 * XXX what should we do if ip6_hlim == 0 and the 515 * packet gets tunneled? 516 */ 517 518 tdb = gettdb(sspi, &sdst, sproto); 519 if (tdb == NULL) { 520 splx(s); 521 error = EHOSTUNREACH; 522 m_freem(m); 523 goto done; 524 } 525 526 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ 527 528 /* Callee frees mbuf */ 529 /* 530 * if we are source-routing, do not attempt to tunnel the 531 * packet just because ip6_dst is different from what tdb has. 532 * XXX 533 */ 534 error = ipsp_process_packet(m, tdb, AF_INET6, 535 exthdrs.ip6e_rthdr ? 1 : 0); 536 splx(s); 537 538 return error; /* Nothing more to be done */ 539 } 540 #endif /* IPSEC */ 541 542 bzero(&dstsock, sizeof(dstsock)); 543 dstsock.sin6_family = AF_INET6; 544 dstsock.sin6_addr = ip6->ip6_dst; 545 dstsock.sin6_len = sizeof(dstsock); 546 if ((error = in6_selectroute(&dstsock, opt, im6o, ro, &ifp, 547 &rt)) != 0) { 548 switch (error) { 549 case EHOSTUNREACH: 550 ip6stat.ip6s_noroute++; 551 break; 552 case EADDRNOTAVAIL: 553 default: 554 break; /* XXX statistics? */ 555 } 556 if (ifp != NULL) 557 in6_ifstat_inc(ifp, ifs6_out_discard); 558 goto bad; 559 } 560 if (rt == NULL) { 561 /* 562 * If in6_selectroute() does not return a route entry, 563 * dst may not have been updated. 564 */ 565 *dst = dstsock; /* XXX */ 566 } 567 568 /* 569 * then rt (for unicast) and ifp must be non-NULL valid values. 570 */ 571 if (rt) { 572 ia = (struct in6_ifaddr *)(rt->rt_ifa); 573 rt->rt_use++; 574 } 575 576 if ((flags & IPV6_FORWARDING) == 0) { 577 /* XXX: the FORWARDING flag can be set for mrouting. */ 578 in6_ifstat_inc(ifp, ifs6_out_request); 579 } 580 581 /* 582 * The outgoing interface must be in the zone of source and 583 * destination addresses. We should use ia_ifp to support the 584 * case of sending packets to an address of our own. 585 */ 586 if (ia != NULL && ia->ia_ifp) 587 origifp = ia->ia_ifp; 588 else 589 origifp = ifp; 590 591 if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 592 if (opt && opt->ip6po_nextroute.ro_rt) { 593 /* 594 * The nexthop is explicitly specified by the 595 * application. We assume the next hop is an IPv6 596 * address. 597 */ 598 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; 599 } else if ((rt->rt_flags & RTF_GATEWAY)) 600 dst = (struct sockaddr_in6 *)rt->rt_gateway; 601 } 602 603 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { 604 /* Unicast */ 605 606 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ 607 } else { 608 /* Multicast */ 609 struct in6_multi *in6m; 610 611 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; 612 613 in6_ifstat_inc(ifp, ifs6_out_mcast); 614 615 /* 616 * Confirm that the outgoing interface supports multicast. 617 */ 618 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 619 ip6stat.ip6s_noroute++; 620 in6_ifstat_inc(ifp, ifs6_out_discard); 621 error = ENETUNREACH; 622 goto bad; 623 } 624 IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); 625 if (in6m != NULL && 626 (im6o == NULL || im6o->im6o_multicast_loop)) { 627 /* 628 * If we belong to the destination multicast group 629 * on the outgoing interface, and the caller did not 630 * forbid loopback, loop back a copy. 631 */ 632 ip6_mloopback(ifp, m, dst); 633 } else { 634 /* 635 * If we are acting as a multicast router, perform 636 * multicast forwarding as if the packet had just 637 * arrived on the interface to which we are about 638 * to send. The multicast forwarding function 639 * recursively calls this function, using the 640 * IPV6_FORWARDING flag to prevent infinite recursion. 641 * 642 * Multicasts that are looped back by ip6_mloopback(), 643 * above, will be forwarded by the ip6_input() routine, 644 * if necessary. 645 */ 646 #ifdef MROUTING 647 if (ip6_mforwarding && ip6_mrouter && 648 (flags & IPV6_FORWARDING) == 0) { 649 if (ip6_mforward(ip6, ifp, m) != 0) { 650 m_freem(m); 651 goto done; 652 } 653 } 654 #endif 655 } 656 /* 657 * Multicasts with a hoplimit of zero may be looped back, 658 * above, but must not be transmitted on a network. 659 * Also, multicasts addressed to the loopback interface 660 * are not sent -- the above call to ip6_mloopback() will 661 * loop back a copy if this host actually belongs to the 662 * destination group on the loopback interface. 663 */ 664 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || 665 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { 666 m_freem(m); 667 goto done; 668 } 669 } 670 671 /* 672 * Fill the outgoing interface to tell the upper layer 673 * to increment per-interface statistics. 674 */ 675 if (ifpp) 676 *ifpp = ifp; 677 678 /* Determine path MTU. */ 679 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu, 680 &alwaysfrag)) != 0) 681 goto bad; 682 683 /* 684 * The caller of this function may specify to use the minimum MTU 685 * in some cases. 686 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU 687 * setting. The logic is a bit complicated; by default, unicast 688 * packets will follow path MTU while multicast packets will be sent at 689 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets 690 * including unicast ones will be sent at the minimum MTU. Multicast 691 * packets will always be sent at the minimum MTU unless 692 * IP6PO_MINMTU_DISABLE is explicitly specified. 693 * See RFC 3542 for more details. 694 */ 695 if (mtu > IPV6_MMTU) { 696 if ((flags & IPV6_MINMTU)) 697 mtu = IPV6_MMTU; 698 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) 699 mtu = IPV6_MMTU; 700 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && 701 (opt == NULL || 702 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { 703 mtu = IPV6_MMTU; 704 } 705 } 706 707 /* Fake scoped addresses */ 708 if ((ifp->if_flags & IFF_LOOPBACK) != 0) { 709 /* 710 * If source or destination address is a scoped address, and 711 * the packet is going to be sent to a loopback interface, 712 * we should keep the original interface. 713 */ 714 715 /* 716 * XXX: this is a very experimental and temporary solution. 717 * We eventually have sockaddr_in6 and use the sin6_scope_id 718 * field of the structure here. 719 * We rely on the consistency between two scope zone ids 720 * of source add destination, which should already be assured 721 * Larger scopes than link will be supported in the near 722 * future. 723 */ 724 origifp = NULL; 725 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 726 origifp = ifindex2ifnet[ntohs(ip6->ip6_src.s6_addr16[1])]; 727 else if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 728 origifp = ifindex2ifnet[ntohs(ip6->ip6_dst.s6_addr16[1])]; 729 /* 730 * XXX: origifp can be NULL even in those two cases above. 731 * For example, if we remove the (only) link-local address 732 * from the loopback interface, and try to send a link-local 733 * address without link-id information. Then the source 734 * address is ::1, and the destination address is the 735 * link-local address with its s6_addr16[1] being zero. 736 * What is worse, if the packet goes to the loopback interface 737 * by a default rejected route, the null pointer would be 738 * passed to looutput, and the kernel would hang. 739 * The following last resort would prevent such disaster. 740 */ 741 if (origifp == NULL) 742 origifp = ifp; 743 } else 744 origifp = ifp; 745 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 746 ip6->ip6_src.s6_addr16[1] = 0; 747 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 748 ip6->ip6_dst.s6_addr16[1] = 0; 749 750 /* 751 * If the outgoing packet contains a hop-by-hop options header, 752 * it must be examined and processed even by the source node. 753 * (RFC 2460, section 4.) 754 */ 755 if (exthdrs.ip6e_hbh) { 756 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); 757 u_int32_t dummy1; /* XXX unused */ 758 u_int32_t dummy2; /* XXX unused */ 759 760 /* 761 * XXX: if we have to send an ICMPv6 error to the sender, 762 * we need the M_LOOP flag since icmp6_error() expects 763 * the IPv6 and the hop-by-hop options header are 764 * continuous unless the flag is set. 765 */ 766 m->m_flags |= M_LOOP; 767 m->m_pkthdr.rcvif = ifp; 768 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), 769 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), 770 &dummy1, &dummy2) < 0) { 771 /* m was already freed at this point */ 772 error = EINVAL;/* better error? */ 773 goto done; 774 } 775 m->m_flags &= ~M_LOOP; /* XXX */ 776 m->m_pkthdr.rcvif = NULL; 777 } 778 779 #if NPF > 0 780 if (pf_test6(PF_OUT, ifp, &m, NULL) != PF_PASS) { 781 error = EHOSTUNREACH; 782 m_freem(m); 783 goto done; 784 } 785 if (m == NULL) 786 goto done; 787 ip6 = mtod(m, struct ip6_hdr *); 788 #endif 789 790 /* 791 * Send the packet to the outgoing interface. 792 * If necessary, do IPv6 fragmentation before sending. 793 * 794 * the logic here is rather complex: 795 * 1: normal case (dontfrag == 0, alwaysfrag == 0) 796 * 1-a: send as is if tlen <= path mtu 797 * 1-b: fragment if tlen > path mtu 798 * 799 * 2: if user asks us not to fragment (dontfrag == 1) 800 * 2-a: send as is if tlen <= interface mtu 801 * 2-b: error if tlen > interface mtu 802 * 803 * 3: if we always need to attach fragment header (alwaysfrag == 1) 804 * always fragment 805 * 806 * 4: if dontfrag == 1 && alwaysfrag == 1 807 * error, as we cannot handle this conflicting request 808 */ 809 tlen = m->m_pkthdr.len; 810 811 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) 812 dontfrag = 1; 813 else 814 dontfrag = 0; 815 if (dontfrag && alwaysfrag) { /* case 4 */ 816 /* conflicting request - can't transmit */ 817 error = EMSGSIZE; 818 goto bad; 819 } 820 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */ 821 /* 822 * Even if the DONTFRAG option is specified, we cannot send the 823 * packet when the data length is larger than the MTU of the 824 * outgoing interface. 825 * Notify the error by sending IPV6_PATHMTU ancillary data as 826 * well as returning an error code (the latter is not described 827 * in the API spec.) 828 */ 829 #if 0 830 u_int32_t mtu32; 831 struct ip6ctlparam ip6cp; 832 833 mtu32 = (u_int32_t)mtu; 834 bzero(&ip6cp, sizeof(ip6cp)); 835 ip6cp.ip6c_cmdarg = (void *)&mtu32; 836 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst, 837 (void *)&ip6cp); 838 #endif 839 840 error = EMSGSIZE; 841 goto bad; 842 } 843 844 /* 845 * transmit packet without fragmentation 846 */ 847 if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */ 848 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); 849 goto done; 850 } 851 852 /* 853 * try to fragment the packet. case 1-b and 3 854 */ 855 if (mtu < IPV6_MMTU) { 856 /* path MTU cannot be less than IPV6_MMTU */ 857 error = EMSGSIZE; 858 in6_ifstat_inc(ifp, ifs6_out_fragfail); 859 goto bad; 860 } else if (ip6->ip6_plen == 0) { 861 /* jumbo payload cannot be fragmented */ 862 error = EMSGSIZE; 863 in6_ifstat_inc(ifp, ifs6_out_fragfail); 864 goto bad; 865 } else { 866 struct mbuf **mnext, *m_frgpart; 867 struct ip6_frag *ip6f; 868 u_int32_t id = htonl(ip6_randomid()); 869 u_char nextproto; 870 #if 0 871 struct ip6ctlparam ip6cp; 872 u_int32_t mtu32; 873 #endif 874 875 /* 876 * Too large for the destination or interface; 877 * fragment if possible. 878 * Must be able to put at least 8 bytes per fragment. 879 */ 880 hlen = unfragpartlen; 881 if (mtu > IPV6_MAXPACKET) 882 mtu = IPV6_MAXPACKET; 883 884 #if 0 885 /* Notify a proper path MTU to applications. */ 886 mtu32 = (u_int32_t)mtu; 887 bzero(&ip6cp, sizeof(ip6cp)); 888 ip6cp.ip6c_cmdarg = (void *)&mtu32; 889 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst, 890 (void *)&ip6cp); 891 #endif 892 893 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; 894 if (len < 8) { 895 error = EMSGSIZE; 896 in6_ifstat_inc(ifp, ifs6_out_fragfail); 897 goto bad; 898 } 899 900 mnext = &m->m_nextpkt; 901 902 /* 903 * Change the next header field of the last header in the 904 * unfragmentable part. 905 */ 906 if (exthdrs.ip6e_rthdr) { 907 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); 908 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; 909 } else if (exthdrs.ip6e_dest1) { 910 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); 911 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; 912 } else if (exthdrs.ip6e_hbh) { 913 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); 914 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; 915 } else { 916 nextproto = ip6->ip6_nxt; 917 ip6->ip6_nxt = IPPROTO_FRAGMENT; 918 } 919 920 /* 921 * Loop through length of segment after first fragment, 922 * make new header and copy data of each part and link onto 923 * chain. 924 */ 925 m0 = m; 926 for (off = hlen; off < tlen; off += len) { 927 struct mbuf *mlast; 928 929 MGETHDR(m, M_DONTWAIT, MT_HEADER); 930 if (!m) { 931 error = ENOBUFS; 932 ip6stat.ip6s_odropped++; 933 goto sendorfree; 934 } 935 m->m_pkthdr.rcvif = NULL; 936 m->m_flags = m0->m_flags & M_COPYFLAGS; 937 *mnext = m; 938 mnext = &m->m_nextpkt; 939 m->m_data += max_linkhdr; 940 mhip6 = mtod(m, struct ip6_hdr *); 941 *mhip6 = *ip6; 942 m->m_len = sizeof(*mhip6); 943 error = ip6_insertfraghdr(m0, m, hlen, &ip6f); 944 if (error) { 945 ip6stat.ip6s_odropped++; 946 goto sendorfree; 947 } 948 ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7)); 949 if (off + len >= tlen) 950 len = tlen - off; 951 else 952 ip6f->ip6f_offlg |= IP6F_MORE_FRAG; 953 mhip6->ip6_plen = htons((u_int16_t)(len + hlen + 954 sizeof(*ip6f) - sizeof(struct ip6_hdr))); 955 if ((m_frgpart = m_copy(m0, off, len)) == 0) { 956 error = ENOBUFS; 957 ip6stat.ip6s_odropped++; 958 goto sendorfree; 959 } 960 for (mlast = m; mlast->m_next; mlast = mlast->m_next) 961 ; 962 mlast->m_next = m_frgpart; 963 m->m_pkthdr.len = len + hlen + sizeof(*ip6f); 964 m->m_pkthdr.rcvif = (struct ifnet *)0; 965 ip6f->ip6f_reserved = 0; 966 ip6f->ip6f_ident = id; 967 ip6f->ip6f_nxt = nextproto; 968 ip6stat.ip6s_ofragments++; 969 in6_ifstat_inc(ifp, ifs6_out_fragcreat); 970 } 971 972 in6_ifstat_inc(ifp, ifs6_out_fragok); 973 } 974 975 /* 976 * Remove leading garbages. 977 */ 978 sendorfree: 979 m = m0->m_nextpkt; 980 m0->m_nextpkt = 0; 981 m_freem(m0); 982 for (m0 = m; m; m = m0) { 983 m0 = m->m_nextpkt; 984 m->m_nextpkt = 0; 985 if (error == 0) { 986 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); 987 } else 988 m_freem(m); 989 } 990 991 if (error == 0) 992 ip6stat.ip6s_fragmented++; 993 994 done: 995 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */ 996 RTFREE(ro->ro_rt); 997 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { 998 RTFREE(ro_pmtu->ro_rt); 999 } 1000 1001 return (error); 1002 1003 freehdrs: 1004 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ 1005 m_freem(exthdrs.ip6e_dest1); 1006 m_freem(exthdrs.ip6e_rthdr); 1007 m_freem(exthdrs.ip6e_dest2); 1008 /* FALLTHROUGH */ 1009 bad: 1010 m_freem(m); 1011 goto done; 1012 } 1013 1014 static int 1015 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) 1016 { 1017 struct mbuf *m; 1018 1019 if (hlen > MCLBYTES) 1020 return (ENOBUFS); /* XXX */ 1021 1022 MGET(m, M_DONTWAIT, MT_DATA); 1023 if (!m) 1024 return (ENOBUFS); 1025 1026 if (hlen > MLEN) { 1027 MCLGET(m, M_DONTWAIT); 1028 if ((m->m_flags & M_EXT) == 0) { 1029 m_free(m); 1030 return (ENOBUFS); 1031 } 1032 } 1033 m->m_len = hlen; 1034 if (hdr) 1035 bcopy(hdr, mtod(m, caddr_t), hlen); 1036 1037 *mp = m; 1038 return (0); 1039 } 1040 1041 /* 1042 * Insert jumbo payload option. 1043 */ 1044 static int 1045 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) 1046 { 1047 struct mbuf *mopt; 1048 u_int8_t *optbuf; 1049 u_int32_t v; 1050 1051 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ 1052 1053 /* 1054 * If there is no hop-by-hop options header, allocate new one. 1055 * If there is one but it doesn't have enough space to store the 1056 * jumbo payload option, allocate a cluster to store the whole options. 1057 * Otherwise, use it to store the options. 1058 */ 1059 if (exthdrs->ip6e_hbh == 0) { 1060 MGET(mopt, M_DONTWAIT, MT_DATA); 1061 if (mopt == 0) 1062 return (ENOBUFS); 1063 mopt->m_len = JUMBOOPTLEN; 1064 optbuf = mtod(mopt, u_int8_t *); 1065 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ 1066 exthdrs->ip6e_hbh = mopt; 1067 } else { 1068 struct ip6_hbh *hbh; 1069 1070 mopt = exthdrs->ip6e_hbh; 1071 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { 1072 /* 1073 * XXX assumption: 1074 * - exthdrs->ip6e_hbh is not referenced from places 1075 * other than exthdrs. 1076 * - exthdrs->ip6e_hbh is not an mbuf chain. 1077 */ 1078 int oldoptlen = mopt->m_len; 1079 struct mbuf *n; 1080 1081 /* 1082 * XXX: give up if the whole (new) hbh header does 1083 * not fit even in an mbuf cluster. 1084 */ 1085 if (oldoptlen + JUMBOOPTLEN > MCLBYTES) 1086 return (ENOBUFS); 1087 1088 /* 1089 * As a consequence, we must always prepare a cluster 1090 * at this point. 1091 */ 1092 MGET(n, M_DONTWAIT, MT_DATA); 1093 if (n) { 1094 MCLGET(n, M_DONTWAIT); 1095 if ((n->m_flags & M_EXT) == 0) { 1096 m_freem(n); 1097 n = NULL; 1098 } 1099 } 1100 if (!n) 1101 return (ENOBUFS); 1102 n->m_len = oldoptlen + JUMBOOPTLEN; 1103 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), 1104 oldoptlen); 1105 optbuf = mtod(n, u_int8_t *) + oldoptlen; 1106 m_freem(mopt); 1107 mopt = exthdrs->ip6e_hbh = n; 1108 } else { 1109 optbuf = mtod(mopt, u_int8_t *) + mopt->m_len; 1110 mopt->m_len += JUMBOOPTLEN; 1111 } 1112 optbuf[0] = IP6OPT_PADN; 1113 optbuf[1] = 0; 1114 1115 /* 1116 * Adjust the header length according to the pad and 1117 * the jumbo payload option. 1118 */ 1119 hbh = mtod(mopt, struct ip6_hbh *); 1120 hbh->ip6h_len += (JUMBOOPTLEN >> 3); 1121 } 1122 1123 /* fill in the option. */ 1124 optbuf[2] = IP6OPT_JUMBO; 1125 optbuf[3] = 4; 1126 v = (u_int32_t)htonl(plen + JUMBOOPTLEN); 1127 bcopy(&v, &optbuf[4], sizeof(u_int32_t)); 1128 1129 /* finally, adjust the packet header length */ 1130 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; 1131 1132 return (0); 1133 #undef JUMBOOPTLEN 1134 } 1135 1136 /* 1137 * Insert fragment header and copy unfragmentable header portions. 1138 */ 1139 static int 1140 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, 1141 struct ip6_frag **frghdrp) 1142 { 1143 struct mbuf *n, *mlast; 1144 1145 if (hlen > sizeof(struct ip6_hdr)) { 1146 n = m_copym(m0, sizeof(struct ip6_hdr), 1147 hlen - sizeof(struct ip6_hdr), M_DONTWAIT); 1148 if (n == 0) 1149 return (ENOBUFS); 1150 m->m_next = n; 1151 } else 1152 n = m; 1153 1154 /* Search for the last mbuf of unfragmentable part. */ 1155 for (mlast = n; mlast->m_next; mlast = mlast->m_next) 1156 ; 1157 1158 if ((mlast->m_flags & M_EXT) == 0 && 1159 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { 1160 /* use the trailing space of the last mbuf for the fragment hdr */ 1161 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + 1162 mlast->m_len); 1163 mlast->m_len += sizeof(struct ip6_frag); 1164 m->m_pkthdr.len += sizeof(struct ip6_frag); 1165 } else { 1166 /* allocate a new mbuf for the fragment header */ 1167 struct mbuf *mfrg; 1168 1169 MGET(mfrg, M_DONTWAIT, MT_DATA); 1170 if (mfrg == 0) 1171 return (ENOBUFS); 1172 mfrg->m_len = sizeof(struct ip6_frag); 1173 *frghdrp = mtod(mfrg, struct ip6_frag *); 1174 mlast->m_next = mfrg; 1175 } 1176 1177 return (0); 1178 } 1179 1180 static int 1181 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, 1182 struct ifnet *ifp, struct in6_addr *dst, u_long *mtup, 1183 int *alwaysfragp) 1184 { 1185 u_int32_t mtu = 0; 1186 int alwaysfrag = 0; 1187 int error = 0; 1188 1189 if (ro_pmtu != ro) { 1190 /* The first hop and the final destination may differ. */ 1191 struct sockaddr_in6 *sa6_dst = 1192 (struct sockaddr_in6 *)&ro_pmtu->ro_dst; 1193 if (ro_pmtu->ro_rt && 1194 ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 || 1195 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { 1196 RTFREE(ro_pmtu->ro_rt); 1197 ro_pmtu->ro_rt = (struct rtentry *)NULL; 1198 } 1199 if (ro_pmtu->ro_rt == 0) { 1200 bzero(sa6_dst, sizeof(*sa6_dst)); 1201 sa6_dst->sin6_family = AF_INET6; 1202 sa6_dst->sin6_len = sizeof(struct sockaddr_in6); 1203 sa6_dst->sin6_addr = *dst; 1204 1205 rtalloc((struct route *)ro_pmtu); 1206 } 1207 } 1208 if (ro_pmtu->ro_rt) { 1209 u_int32_t ifmtu; 1210 1211 if (ifp == NULL) 1212 ifp = ro_pmtu->ro_rt->rt_ifp; 1213 ifmtu = IN6_LINKMTU(ifp); 1214 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; 1215 if (mtu == 0) 1216 mtu = ifmtu; 1217 else if (mtu < IPV6_MMTU) { 1218 /* 1219 * RFC2460 section 5, last paragraph: 1220 * if we record ICMPv6 too big message with 1221 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU 1222 * or smaller, with fragment header attached. 1223 * (fragment header is needed regardless from the 1224 * packet size, for translators to identify packets) 1225 */ 1226 alwaysfrag = 1; 1227 mtu = IPV6_MMTU; 1228 } else if (mtu > ifmtu) { 1229 /* 1230 * The MTU on the route is larger than the MTU on 1231 * the interface! This shouldn't happen, unless the 1232 * MTU of the interface has been changed after the 1233 * interface was brought up. Change the MTU in the 1234 * route to match the interface MTU (as long as the 1235 * field isn't locked). 1236 */ 1237 mtu = ifmtu; 1238 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) 1239 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; 1240 } 1241 } else if (ifp) { 1242 mtu = IN6_LINKMTU(ifp); 1243 } else 1244 error = EHOSTUNREACH; /* XXX */ 1245 1246 *mtup = mtu; 1247 if (alwaysfragp) 1248 *alwaysfragp = alwaysfrag; 1249 return (error); 1250 } 1251 1252 /* 1253 * IP6 socket option processing. 1254 */ 1255 int 1256 ip6_ctloutput(int op, struct socket *so, int level, int optname, 1257 struct mbuf **mp) 1258 { 1259 int privileged, optdatalen, uproto; 1260 void *optdata; 1261 struct inpcb *inp = sotoinpcb(so); 1262 struct mbuf *m = *mp; 1263 int error, optval; 1264 int optlen; 1265 #ifdef IPSEC 1266 struct proc *p = curproc; /* XXX */ 1267 struct tdb *tdb; 1268 struct tdb_ident *tdbip, tdbi; 1269 int s; 1270 #endif 1271 1272 optlen = m ? m->m_len : 0; 1273 error = optval = 0; 1274 1275 privileged = (inp->inp_socket->so_state & SS_PRIV); 1276 uproto = (int)so->so_proto->pr_protocol; 1277 1278 if (level == IPPROTO_IPV6) { 1279 switch (op) { 1280 case PRCO_SETOPT: 1281 switch (optname) { 1282 case IPV6_2292PKTOPTIONS: 1283 { 1284 error = ip6_pcbopts(&inp->inp_outputopts6, 1285 m, so); 1286 break; 1287 } 1288 1289 /* 1290 * Use of some Hop-by-Hop options or some 1291 * Destination options, might require special 1292 * privilege. That is, normal applications 1293 * (without special privilege) might be forbidden 1294 * from setting certain options in outgoing packets, 1295 * and might never see certain options in received 1296 * packets. [RFC 2292 Section 6] 1297 * KAME specific note: 1298 * KAME prevents non-privileged users from sending or 1299 * receiving ANY hbh/dst options in order to avoid 1300 * overhead of parsing options in the kernel. 1301 */ 1302 case IPV6_RECVHOPOPTS: 1303 case IPV6_RECVDSTOPTS: 1304 case IPV6_RECVRTHDRDSTOPTS: 1305 if (!privileged) { 1306 error = EPERM; 1307 break; 1308 } 1309 /* FALLTHROUGH */ 1310 case IPV6_UNICAST_HOPS: 1311 case IPV6_HOPLIMIT: 1312 case IPV6_FAITH: 1313 1314 case IPV6_RECVPKTINFO: 1315 case IPV6_RECVHOPLIMIT: 1316 case IPV6_RECVRTHDR: 1317 case IPV6_RECVPATHMTU: 1318 case IPV6_RECVTCLASS: 1319 case IPV6_V6ONLY: 1320 case IPV6_AUTOFLOWLABEL: 1321 if (optlen != sizeof(int)) { 1322 error = EINVAL; 1323 break; 1324 } 1325 optval = *mtod(m, int *); 1326 switch (optname) { 1327 1328 case IPV6_UNICAST_HOPS: 1329 if (optval < -1 || optval >= 256) 1330 error = EINVAL; 1331 else { 1332 /* -1 = kernel default */ 1333 inp->inp_hops = optval; 1334 } 1335 break; 1336 #define OPTSET(bit) \ 1337 do { \ 1338 if (optval) \ 1339 inp->inp_flags |= (bit); \ 1340 else \ 1341 inp->inp_flags &= ~(bit); \ 1342 } while (/*CONSTCOND*/ 0) 1343 #define OPTSET2292(bit) \ 1344 do { \ 1345 inp->inp_flags |= IN6P_RFC2292; \ 1346 if (optval) \ 1347 inp->inp_flags |= (bit); \ 1348 else \ 1349 inp->inp_flags &= ~(bit); \ 1350 } while (/*CONSTCOND*/ 0) 1351 #define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0) 1352 1353 case IPV6_RECVPKTINFO: 1354 /* cannot mix with RFC2292 */ 1355 if (OPTBIT(IN6P_RFC2292)) { 1356 error = EINVAL; 1357 break; 1358 } 1359 OPTSET(IN6P_PKTINFO); 1360 break; 1361 1362 case IPV6_HOPLIMIT: 1363 { 1364 struct ip6_pktopts **optp; 1365 1366 /* cannot mix with RFC2292 */ 1367 if (OPTBIT(IN6P_RFC2292)) { 1368 error = EINVAL; 1369 break; 1370 } 1371 optp = &inp->inp_outputopts6; 1372 error = ip6_pcbopt(IPV6_HOPLIMIT, 1373 (u_char *)&optval, 1374 sizeof(optval), 1375 optp, 1376 privileged, uproto); 1377 break; 1378 } 1379 1380 case IPV6_RECVHOPLIMIT: 1381 /* cannot mix with RFC2292 */ 1382 if (OPTBIT(IN6P_RFC2292)) { 1383 error = EINVAL; 1384 break; 1385 } 1386 OPTSET(IN6P_HOPLIMIT); 1387 break; 1388 1389 case IPV6_RECVHOPOPTS: 1390 /* cannot mix with RFC2292 */ 1391 if (OPTBIT(IN6P_RFC2292)) { 1392 error = EINVAL; 1393 break; 1394 } 1395 OPTSET(IN6P_HOPOPTS); 1396 break; 1397 1398 case IPV6_RECVDSTOPTS: 1399 /* cannot mix with RFC2292 */ 1400 if (OPTBIT(IN6P_RFC2292)) { 1401 error = EINVAL; 1402 break; 1403 } 1404 OPTSET(IN6P_DSTOPTS); 1405 break; 1406 1407 case IPV6_RECVRTHDRDSTOPTS: 1408 /* cannot mix with RFC2292 */ 1409 if (OPTBIT(IN6P_RFC2292)) { 1410 error = EINVAL; 1411 break; 1412 } 1413 OPTSET(IN6P_RTHDRDSTOPTS); 1414 break; 1415 1416 case IPV6_RECVRTHDR: 1417 /* cannot mix with RFC2292 */ 1418 if (OPTBIT(IN6P_RFC2292)) { 1419 error = EINVAL; 1420 break; 1421 } 1422 OPTSET(IN6P_RTHDR); 1423 break; 1424 1425 case IPV6_FAITH: 1426 OPTSET(IN6P_FAITH); 1427 break; 1428 1429 case IPV6_RECVPATHMTU: 1430 /* 1431 * We ignore this option for TCP 1432 * sockets. 1433 * (RFC3542 leaves this case 1434 * unspecified.) 1435 */ 1436 if (uproto != IPPROTO_TCP) 1437 OPTSET(IN6P_MTU); 1438 break; 1439 1440 case IPV6_V6ONLY: 1441 /* 1442 * make setsockopt(IPV6_V6ONLY) 1443 * available only prior to bind(2). 1444 * see ipng mailing list, Jun 22 2001. 1445 */ 1446 if (inp->inp_lport || 1447 !IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) { 1448 error = EINVAL; 1449 break; 1450 } 1451 if ((ip6_v6only && optval) || 1452 (!ip6_v6only && !optval)) 1453 error = 0; 1454 else 1455 error = EINVAL; 1456 break; 1457 case IPV6_RECVTCLASS: 1458 /* cannot mix with RFC2292 XXX */ 1459 if (OPTBIT(IN6P_RFC2292)) { 1460 error = EINVAL; 1461 break; 1462 } 1463 OPTSET(IN6P_TCLASS); 1464 break; 1465 case IPV6_AUTOFLOWLABEL: 1466 OPTSET(IN6P_AUTOFLOWLABEL); 1467 break; 1468 1469 } 1470 break; 1471 1472 case IPV6_TCLASS: 1473 case IPV6_DONTFRAG: 1474 case IPV6_USE_MIN_MTU: 1475 if (optlen != sizeof(optval)) { 1476 error = EINVAL; 1477 break; 1478 } 1479 optval = *mtod(m, int *); 1480 { 1481 struct ip6_pktopts **optp; 1482 optp = &inp->inp_outputopts6; 1483 error = ip6_pcbopt(optname, 1484 (u_char *)&optval, 1485 sizeof(optval), 1486 optp, 1487 privileged, uproto); 1488 break; 1489 } 1490 1491 case IPV6_2292PKTINFO: 1492 case IPV6_2292HOPLIMIT: 1493 case IPV6_2292HOPOPTS: 1494 case IPV6_2292DSTOPTS: 1495 case IPV6_2292RTHDR: 1496 /* RFC 2292 */ 1497 if (optlen != sizeof(int)) { 1498 error = EINVAL; 1499 break; 1500 } 1501 optval = *mtod(m, int *); 1502 switch (optname) { 1503 case IPV6_2292PKTINFO: 1504 OPTSET2292(IN6P_PKTINFO); 1505 break; 1506 case IPV6_2292HOPLIMIT: 1507 OPTSET2292(IN6P_HOPLIMIT); 1508 break; 1509 case IPV6_2292HOPOPTS: 1510 /* 1511 * Check super-user privilege. 1512 * See comments for IPV6_RECVHOPOPTS. 1513 */ 1514 if (!privileged) 1515 return (EPERM); 1516 OPTSET2292(IN6P_HOPOPTS); 1517 break; 1518 case IPV6_2292DSTOPTS: 1519 if (!privileged) 1520 return (EPERM); 1521 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ 1522 break; 1523 case IPV6_2292RTHDR: 1524 OPTSET2292(IN6P_RTHDR); 1525 break; 1526 } 1527 break; 1528 case IPV6_PKTINFO: 1529 case IPV6_HOPOPTS: 1530 case IPV6_RTHDR: 1531 case IPV6_DSTOPTS: 1532 case IPV6_RTHDRDSTOPTS: 1533 case IPV6_NEXTHOP: 1534 { 1535 /* new advanced API (RFC3542) */ 1536 u_char *optbuf; 1537 int optbuflen; 1538 struct ip6_pktopts **optp; 1539 1540 /* cannot mix with RFC2292 */ 1541 if (OPTBIT(IN6P_RFC2292)) { 1542 error = EINVAL; 1543 break; 1544 } 1545 1546 if (m && m->m_next) { 1547 error = EINVAL; /* XXX */ 1548 break; 1549 } 1550 if (m) { 1551 optbuf = mtod(m, u_char *); 1552 optbuflen = m->m_len; 1553 } else { 1554 optbuf = NULL; 1555 optbuflen = 0; 1556 } 1557 optp = &inp->inp_outputopts6; 1558 error = ip6_pcbopt(optname, 1559 optbuf, optbuflen, 1560 optp, privileged, uproto); 1561 break; 1562 } 1563 #undef OPTSET 1564 1565 case IPV6_MULTICAST_IF: 1566 case IPV6_MULTICAST_HOPS: 1567 case IPV6_MULTICAST_LOOP: 1568 case IPV6_JOIN_GROUP: 1569 case IPV6_LEAVE_GROUP: 1570 error = ip6_setmoptions(optname, 1571 &inp->inp_moptions6, 1572 m); 1573 break; 1574 1575 case IPV6_PORTRANGE: 1576 optval = *mtod(m, int *); 1577 1578 switch (optval) { 1579 case IPV6_PORTRANGE_DEFAULT: 1580 inp->inp_flags &= ~(IN6P_LOWPORT); 1581 inp->inp_flags &= ~(IN6P_HIGHPORT); 1582 break; 1583 1584 case IPV6_PORTRANGE_HIGH: 1585 inp->inp_flags &= ~(IN6P_LOWPORT); 1586 inp->inp_flags |= IN6P_HIGHPORT; 1587 break; 1588 1589 case IPV6_PORTRANGE_LOW: 1590 inp->inp_flags &= ~(IN6P_HIGHPORT); 1591 inp->inp_flags |= IN6P_LOWPORT; 1592 break; 1593 1594 default: 1595 error = EINVAL; 1596 break; 1597 } 1598 break; 1599 1600 case IPSEC6_OUTSA: 1601 #ifndef IPSEC 1602 error = EINVAL; 1603 #else 1604 s = spltdb(); 1605 if (m == 0 || m->m_len != sizeof(struct tdb_ident)) { 1606 error = EINVAL; 1607 } else { 1608 tdbip = mtod(m, struct tdb_ident *); 1609 tdb = gettdb(tdbip->spi, &tdbip->dst, 1610 tdbip->proto); 1611 if (tdb == NULL) 1612 error = ESRCH; 1613 else 1614 tdb_add_inp(tdb, inp, 0); 1615 } 1616 splx(s); 1617 #endif 1618 break; 1619 1620 case IPV6_AUTH_LEVEL: 1621 case IPV6_ESP_TRANS_LEVEL: 1622 case IPV6_ESP_NETWORK_LEVEL: 1623 case IPV6_IPCOMP_LEVEL: 1624 #ifndef IPSEC 1625 error = EINVAL; 1626 #else 1627 if (m == 0 || m->m_len != sizeof(int)) { 1628 error = EINVAL; 1629 break; 1630 } 1631 optval = *mtod(m, int *); 1632 1633 if (optval < IPSEC_LEVEL_BYPASS || 1634 optval > IPSEC_LEVEL_UNIQUE) { 1635 error = EINVAL; 1636 break; 1637 } 1638 1639 switch (optname) { 1640 case IPV6_AUTH_LEVEL: 1641 if (optval < ipsec_auth_default_level && 1642 suser(p, 0)) { 1643 error = EACCES; 1644 break; 1645 } 1646 inp->inp_seclevel[SL_AUTH] = optval; 1647 break; 1648 1649 case IPV6_ESP_TRANS_LEVEL: 1650 if (optval < ipsec_esp_trans_default_level && 1651 suser(p, 0)) { 1652 error = EACCES; 1653 break; 1654 } 1655 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1656 break; 1657 1658 case IPV6_ESP_NETWORK_LEVEL: 1659 if (optval < ipsec_esp_network_default_level && 1660 suser(p, 0)) { 1661 error = EACCES; 1662 break; 1663 } 1664 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1665 break; 1666 1667 case IPV6_IPCOMP_LEVEL: 1668 if (optval < ipsec_ipcomp_default_level && 1669 suser(p, 0)) { 1670 error = EACCES; 1671 break; 1672 } 1673 inp->inp_seclevel[SL_IPCOMP] = optval; 1674 break; 1675 } 1676 if (!error) 1677 inp->inp_secrequire = get_sa_require(inp); 1678 #endif 1679 break; 1680 1681 default: 1682 error = ENOPROTOOPT; 1683 break; 1684 } 1685 if (m) 1686 (void)m_free(m); 1687 break; 1688 1689 case PRCO_GETOPT: 1690 switch (optname) { 1691 1692 case IPV6_2292PKTOPTIONS: 1693 /* 1694 * RFC3542 (effectively) deprecated the 1695 * semantics of the 2292-style pktoptions. 1696 * Since it was not reliable in nature (i.e., 1697 * applications had to expect the lack of some 1698 * information after all), it would make sense 1699 * to simplify this part by always returning 1700 * empty data. 1701 */ 1702 *mp = m_get(M_WAIT, MT_SOOPTS); 1703 (*mp)->m_len = 0; 1704 break; 1705 1706 case IPV6_RECVHOPOPTS: 1707 case IPV6_RECVDSTOPTS: 1708 case IPV6_RECVRTHDRDSTOPTS: 1709 case IPV6_UNICAST_HOPS: 1710 case IPV6_RECVPKTINFO: 1711 case IPV6_RECVHOPLIMIT: 1712 case IPV6_RECVRTHDR: 1713 case IPV6_RECVPATHMTU: 1714 1715 case IPV6_FAITH: 1716 case IPV6_V6ONLY: 1717 case IPV6_PORTRANGE: 1718 case IPV6_RECVTCLASS: 1719 case IPV6_AUTOFLOWLABEL: 1720 switch (optname) { 1721 1722 case IPV6_RECVHOPOPTS: 1723 optval = OPTBIT(IN6P_HOPOPTS); 1724 break; 1725 1726 case IPV6_RECVDSTOPTS: 1727 optval = OPTBIT(IN6P_DSTOPTS); 1728 break; 1729 1730 case IPV6_RECVRTHDRDSTOPTS: 1731 optval = OPTBIT(IN6P_RTHDRDSTOPTS); 1732 break; 1733 1734 case IPV6_UNICAST_HOPS: 1735 optval = inp->inp_hops; 1736 break; 1737 1738 case IPV6_RECVPKTINFO: 1739 optval = OPTBIT(IN6P_PKTINFO); 1740 break; 1741 1742 case IPV6_RECVHOPLIMIT: 1743 optval = OPTBIT(IN6P_HOPLIMIT); 1744 break; 1745 1746 case IPV6_RECVRTHDR: 1747 optval = OPTBIT(IN6P_RTHDR); 1748 break; 1749 1750 case IPV6_RECVPATHMTU: 1751 optval = OPTBIT(IN6P_MTU); 1752 break; 1753 1754 case IPV6_FAITH: 1755 optval = OPTBIT(IN6P_FAITH); 1756 break; 1757 1758 case IPV6_V6ONLY: 1759 optval = (ip6_v6only != 0); /* XXX */ 1760 break; 1761 1762 case IPV6_PORTRANGE: 1763 { 1764 int flags; 1765 flags = inp->inp_flags; 1766 if (flags & IN6P_HIGHPORT) 1767 optval = IPV6_PORTRANGE_HIGH; 1768 else if (flags & IN6P_LOWPORT) 1769 optval = IPV6_PORTRANGE_LOW; 1770 else 1771 optval = 0; 1772 break; 1773 } 1774 case IPV6_RECVTCLASS: 1775 optval = OPTBIT(IN6P_TCLASS); 1776 break; 1777 1778 case IPV6_AUTOFLOWLABEL: 1779 optval = OPTBIT(IN6P_AUTOFLOWLABEL); 1780 break; 1781 } 1782 if (error) 1783 break; 1784 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1785 m->m_len = sizeof(int); 1786 *mtod(m, int *) = optval; 1787 break; 1788 1789 case IPV6_PATHMTU: 1790 { 1791 u_long pmtu = 0; 1792 struct ip6_mtuinfo mtuinfo; 1793 struct route_in6 *ro = (struct route_in6 *)&inp->inp_route6; 1794 1795 if (!(so->so_state & SS_ISCONNECTED)) 1796 return (ENOTCONN); 1797 /* 1798 * XXX: we dot not consider the case of source 1799 * routing, or optional information to specify 1800 * the outgoing interface. 1801 */ 1802 error = ip6_getpmtu(ro, NULL, NULL, 1803 &inp->inp_faddr6, &pmtu, NULL); 1804 if (error) 1805 break; 1806 if (pmtu > IPV6_MAXPACKET) 1807 pmtu = IPV6_MAXPACKET; 1808 1809 bzero(&mtuinfo, sizeof(mtuinfo)); 1810 mtuinfo.ip6m_mtu = (u_int32_t)pmtu; 1811 optdata = (void *)&mtuinfo; 1812 optdatalen = sizeof(mtuinfo); 1813 if (optdatalen > MCLBYTES) 1814 return (EMSGSIZE); /* XXX */ 1815 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1816 if (optdatalen > MLEN) 1817 MCLGET(m, M_WAIT); 1818 m->m_len = optdatalen; 1819 bcopy(optdata, mtod(m, void *), optdatalen); 1820 break; 1821 } 1822 1823 case IPV6_2292PKTINFO: 1824 case IPV6_2292HOPLIMIT: 1825 case IPV6_2292HOPOPTS: 1826 case IPV6_2292RTHDR: 1827 case IPV6_2292DSTOPTS: 1828 switch (optname) { 1829 case IPV6_2292PKTINFO: 1830 optval = OPTBIT(IN6P_PKTINFO); 1831 break; 1832 case IPV6_2292HOPLIMIT: 1833 optval = OPTBIT(IN6P_HOPLIMIT); 1834 break; 1835 case IPV6_2292HOPOPTS: 1836 optval = OPTBIT(IN6P_HOPOPTS); 1837 break; 1838 case IPV6_2292RTHDR: 1839 optval = OPTBIT(IN6P_RTHDR); 1840 break; 1841 case IPV6_2292DSTOPTS: 1842 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); 1843 break; 1844 } 1845 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1846 m->m_len = sizeof(int); 1847 *mtod(m, int *) = optval; 1848 break; 1849 case IPV6_PKTINFO: 1850 case IPV6_HOPOPTS: 1851 case IPV6_RTHDR: 1852 case IPV6_DSTOPTS: 1853 case IPV6_RTHDRDSTOPTS: 1854 case IPV6_NEXTHOP: 1855 case IPV6_TCLASS: 1856 case IPV6_DONTFRAG: 1857 case IPV6_USE_MIN_MTU: 1858 error = ip6_getpcbopt(inp->inp_outputopts6, 1859 optname, mp); 1860 break; 1861 1862 case IPV6_MULTICAST_IF: 1863 case IPV6_MULTICAST_HOPS: 1864 case IPV6_MULTICAST_LOOP: 1865 case IPV6_JOIN_GROUP: 1866 case IPV6_LEAVE_GROUP: 1867 error = ip6_getmoptions(optname, 1868 inp->inp_moptions6, mp); 1869 break; 1870 1871 case IPSEC6_OUTSA: 1872 #ifndef IPSEC 1873 error = EINVAL; 1874 #else 1875 s = spltdb(); 1876 if (inp->inp_tdb_out == NULL) { 1877 error = ENOENT; 1878 } else { 1879 tdbi.spi = inp->inp_tdb_out->tdb_spi; 1880 tdbi.dst = inp->inp_tdb_out->tdb_dst; 1881 tdbi.proto = inp->inp_tdb_out->tdb_sproto; 1882 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1883 m->m_len = sizeof(tdbi); 1884 bcopy((caddr_t)&tdbi, mtod(m, caddr_t), 1885 (unsigned)m->m_len); 1886 } 1887 splx(s); 1888 #endif 1889 break; 1890 1891 case IPV6_AUTH_LEVEL: 1892 case IPV6_ESP_TRANS_LEVEL: 1893 case IPV6_ESP_NETWORK_LEVEL: 1894 case IPV6_IPCOMP_LEVEL: 1895 #ifndef IPSEC 1896 m->m_len = sizeof(int); 1897 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1898 #else 1899 m->m_len = sizeof(int); 1900 switch (optname) { 1901 case IPV6_AUTH_LEVEL: 1902 optval = inp->inp_seclevel[SL_AUTH]; 1903 break; 1904 1905 case IPV6_ESP_TRANS_LEVEL: 1906 optval = 1907 inp->inp_seclevel[SL_ESP_TRANS]; 1908 break; 1909 1910 case IPV6_ESP_NETWORK_LEVEL: 1911 optval = 1912 inp->inp_seclevel[SL_ESP_NETWORK]; 1913 break; 1914 1915 case IPV6_IPCOMP_LEVEL: 1916 optval = inp->inp_seclevel[SL_IPCOMP]; 1917 break; 1918 } 1919 *mtod(m, int *) = optval; 1920 #endif 1921 break; 1922 1923 default: 1924 error = ENOPROTOOPT; 1925 break; 1926 } 1927 break; 1928 } 1929 } else { 1930 error = EINVAL; 1931 if (op == PRCO_SETOPT && *mp) 1932 (void)m_free(*mp); 1933 } 1934 return (error); 1935 } 1936 1937 int 1938 ip6_raw_ctloutput(int op, struct socket *so, int level, int optname, 1939 struct mbuf **mp) 1940 { 1941 int error = 0, optval, optlen; 1942 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); 1943 struct inpcb *inp = sotoinpcb(so); 1944 struct mbuf *m = *mp; 1945 1946 optlen = m ? m->m_len : 0; 1947 1948 if (level != IPPROTO_IPV6) { 1949 if (op == PRCO_SETOPT && *mp) 1950 (void)m_free(*mp); 1951 return (EINVAL); 1952 } 1953 1954 switch (optname) { 1955 case IPV6_CHECKSUM: 1956 /* 1957 * For ICMPv6 sockets, no modification allowed for checksum 1958 * offset, permit "no change" values to help existing apps. 1959 * 1960 * RFC3542 says: "An attempt to set IPV6_CHECKSUM 1961 * for an ICMPv6 socket will fail." 1962 * The current behavior does not meet RFC3542. 1963 */ 1964 switch (op) { 1965 case PRCO_SETOPT: 1966 if (optlen != sizeof(int)) { 1967 error = EINVAL; 1968 break; 1969 } 1970 optval = *mtod(m, int *); 1971 if ((optval % 2) != 0) { 1972 /* the API assumes even offset values */ 1973 error = EINVAL; 1974 } else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { 1975 if (optval != icmp6off) 1976 error = EINVAL; 1977 } else 1978 inp->in6p_cksum = optval; 1979 break; 1980 1981 case PRCO_GETOPT: 1982 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) 1983 optval = icmp6off; 1984 else 1985 optval = inp->in6p_cksum; 1986 1987 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1988 m->m_len = sizeof(int); 1989 *mtod(m, int *) = optval; 1990 break; 1991 1992 default: 1993 error = EINVAL; 1994 break; 1995 } 1996 break; 1997 1998 default: 1999 error = ENOPROTOOPT; 2000 break; 2001 } 2002 2003 if (op == PRCO_SETOPT && m) 2004 (void)m_free(m); 2005 2006 return (error); 2007 } 2008 2009 /* 2010 * Set up IP6 options in pcb for insertion in output packets. 2011 * Store in mbuf with pointer in pcbopt, adding pseudo-option 2012 * with destination address if source routed. 2013 */ 2014 static int 2015 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so) 2016 { 2017 struct ip6_pktopts *opt = *pktopt; 2018 int error = 0; 2019 struct proc *p = curproc; /* XXX */ 2020 int priv = 0; 2021 2022 /* turn off any old options. */ 2023 if (opt) 2024 ip6_clearpktopts(opt, -1); 2025 else 2026 opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); 2027 *pktopt = 0; 2028 2029 if (!m || m->m_len == 0) { 2030 /* 2031 * Only turning off any previous options, regardless of 2032 * whether the opt is just created or given. 2033 */ 2034 free(opt, M_IP6OPT); 2035 return (0); 2036 } 2037 2038 /* set options specified by user. */ 2039 if (p && !suser(p, 0)) 2040 priv = 1; 2041 if ((error = ip6_setpktopts(m, opt, NULL, priv, 2042 so->so_proto->pr_protocol)) != 0) { 2043 ip6_clearpktopts(opt, -1); /* XXX discard all options */ 2044 free(opt, M_IP6OPT); 2045 return (error); 2046 } 2047 *pktopt = opt; 2048 return (0); 2049 } 2050 2051 /* 2052 * initialize ip6_pktopts. beware that there are non-zero default values in 2053 * the struct. 2054 */ 2055 void 2056 ip6_initpktopts(struct ip6_pktopts *opt) 2057 { 2058 2059 bzero(opt, sizeof(*opt)); 2060 opt->ip6po_hlim = -1; /* -1 means default hop limit */ 2061 opt->ip6po_tclass = -1; /* -1 means default traffic class */ 2062 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; 2063 } 2064 2065 #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* XXX */ 2066 static int 2067 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, 2068 int priv, int uproto) 2069 { 2070 struct ip6_pktopts *opt; 2071 2072 if (*pktopt == NULL) { 2073 *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, 2074 M_WAITOK); 2075 ip6_initpktopts(*pktopt); 2076 } 2077 opt = *pktopt; 2078 2079 return (ip6_setpktopt(optname, buf, len, opt, priv, 1, 0, uproto)); 2080 } 2081 2082 static int 2083 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct mbuf **mp) 2084 { 2085 void *optdata = NULL; 2086 int optdatalen = 0; 2087 struct ip6_ext *ip6e; 2088 int error = 0; 2089 struct in6_pktinfo null_pktinfo; 2090 int deftclass = 0, on; 2091 int defminmtu = IP6PO_MINMTU_MCASTONLY; 2092 struct mbuf *m; 2093 2094 switch (optname) { 2095 case IPV6_PKTINFO: 2096 if (pktopt && pktopt->ip6po_pktinfo) 2097 optdata = (void *)pktopt->ip6po_pktinfo; 2098 else { 2099 /* XXX: we don't have to do this every time... */ 2100 bzero(&null_pktinfo, sizeof(null_pktinfo)); 2101 optdata = (void *)&null_pktinfo; 2102 } 2103 optdatalen = sizeof(struct in6_pktinfo); 2104 break; 2105 case IPV6_TCLASS: 2106 if (pktopt && pktopt->ip6po_tclass >= 0) 2107 optdata = (void *)&pktopt->ip6po_tclass; 2108 else 2109 optdata = (void *)&deftclass; 2110 optdatalen = sizeof(int); 2111 break; 2112 case IPV6_HOPOPTS: 2113 if (pktopt && pktopt->ip6po_hbh) { 2114 optdata = (void *)pktopt->ip6po_hbh; 2115 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; 2116 optdatalen = (ip6e->ip6e_len + 1) << 3; 2117 } 2118 break; 2119 case IPV6_RTHDR: 2120 if (pktopt && pktopt->ip6po_rthdr) { 2121 optdata = (void *)pktopt->ip6po_rthdr; 2122 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; 2123 optdatalen = (ip6e->ip6e_len + 1) << 3; 2124 } 2125 break; 2126 case IPV6_RTHDRDSTOPTS: 2127 if (pktopt && pktopt->ip6po_dest1) { 2128 optdata = (void *)pktopt->ip6po_dest1; 2129 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; 2130 optdatalen = (ip6e->ip6e_len + 1) << 3; 2131 } 2132 break; 2133 case IPV6_DSTOPTS: 2134 if (pktopt && pktopt->ip6po_dest2) { 2135 optdata = (void *)pktopt->ip6po_dest2; 2136 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; 2137 optdatalen = (ip6e->ip6e_len + 1) << 3; 2138 } 2139 break; 2140 case IPV6_NEXTHOP: 2141 if (pktopt && pktopt->ip6po_nexthop) { 2142 optdata = (void *)pktopt->ip6po_nexthop; 2143 optdatalen = pktopt->ip6po_nexthop->sa_len; 2144 } 2145 break; 2146 case IPV6_USE_MIN_MTU: 2147 if (pktopt) 2148 optdata = (void *)&pktopt->ip6po_minmtu; 2149 else 2150 optdata = (void *)&defminmtu; 2151 optdatalen = sizeof(int); 2152 break; 2153 case IPV6_DONTFRAG: 2154 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) 2155 on = 1; 2156 else 2157 on = 0; 2158 optdata = (void *)&on; 2159 optdatalen = sizeof(on); 2160 break; 2161 default: /* should not happen */ 2162 #ifdef DIAGNOSTIC 2163 panic("ip6_getpcbopt: unexpected option\n"); 2164 #endif 2165 return (ENOPROTOOPT); 2166 } 2167 2168 if (optdatalen > MCLBYTES) 2169 return (EMSGSIZE); /* XXX */ 2170 *mp = m = m_get(M_WAIT, MT_SOOPTS); 2171 if (optdatalen > MLEN) 2172 MCLGET(m, M_WAIT); 2173 m->m_len = optdatalen; 2174 if (optdatalen) 2175 bcopy(optdata, mtod(m, void *), optdatalen); 2176 2177 return (error); 2178 } 2179 2180 void 2181 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) 2182 { 2183 if (optname == -1 || optname == IPV6_PKTINFO) { 2184 if (pktopt->ip6po_pktinfo) 2185 free(pktopt->ip6po_pktinfo, M_IP6OPT); 2186 pktopt->ip6po_pktinfo = NULL; 2187 } 2188 if (optname == -1 || optname == IPV6_HOPLIMIT) 2189 pktopt->ip6po_hlim = -1; 2190 if (optname == -1 || optname == IPV6_TCLASS) 2191 pktopt->ip6po_tclass = -1; 2192 if (optname == -1 || optname == IPV6_NEXTHOP) { 2193 if (pktopt->ip6po_nextroute.ro_rt) { 2194 RTFREE(pktopt->ip6po_nextroute.ro_rt); 2195 pktopt->ip6po_nextroute.ro_rt = NULL; 2196 } 2197 if (pktopt->ip6po_nexthop) 2198 free(pktopt->ip6po_nexthop, M_IP6OPT); 2199 pktopt->ip6po_nexthop = NULL; 2200 } 2201 if (optname == -1 || optname == IPV6_HOPOPTS) { 2202 if (pktopt->ip6po_hbh) 2203 free(pktopt->ip6po_hbh, M_IP6OPT); 2204 pktopt->ip6po_hbh = NULL; 2205 } 2206 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { 2207 if (pktopt->ip6po_dest1) 2208 free(pktopt->ip6po_dest1, M_IP6OPT); 2209 pktopt->ip6po_dest1 = NULL; 2210 } 2211 if (optname == -1 || optname == IPV6_RTHDR) { 2212 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) 2213 free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); 2214 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; 2215 if (pktopt->ip6po_route.ro_rt) { 2216 RTFREE(pktopt->ip6po_route.ro_rt); 2217 pktopt->ip6po_route.ro_rt = NULL; 2218 } 2219 } 2220 if (optname == -1 || optname == IPV6_DSTOPTS) { 2221 if (pktopt->ip6po_dest2) 2222 free(pktopt->ip6po_dest2, M_IP6OPT); 2223 pktopt->ip6po_dest2 = NULL; 2224 } 2225 } 2226 2227 #define PKTOPT_EXTHDRCPY(type) \ 2228 do {\ 2229 if (src->type) {\ 2230 int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ 2231 dst->type = malloc(hlen, M_IP6OPT, canwait);\ 2232 if (dst->type == NULL && canwait == M_NOWAIT)\ 2233 goto bad;\ 2234 bcopy(src->type, dst->type, hlen);\ 2235 }\ 2236 } while (/*CONSTCOND*/ 0) 2237 2238 static int 2239 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) 2240 { 2241 dst->ip6po_hlim = src->ip6po_hlim; 2242 dst->ip6po_tclass = src->ip6po_tclass; 2243 dst->ip6po_flags = src->ip6po_flags; 2244 if (src->ip6po_pktinfo) { 2245 dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), 2246 M_IP6OPT, canwait); 2247 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT) 2248 goto bad; 2249 *dst->ip6po_pktinfo = *src->ip6po_pktinfo; 2250 } 2251 if (src->ip6po_nexthop) { 2252 dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, 2253 M_IP6OPT, canwait); 2254 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT) 2255 goto bad; 2256 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, 2257 src->ip6po_nexthop->sa_len); 2258 } 2259 PKTOPT_EXTHDRCPY(ip6po_hbh); 2260 PKTOPT_EXTHDRCPY(ip6po_dest1); 2261 PKTOPT_EXTHDRCPY(ip6po_dest2); 2262 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ 2263 return (0); 2264 2265 bad: 2266 ip6_clearpktopts(dst, -1); 2267 return (ENOBUFS); 2268 } 2269 #undef PKTOPT_EXTHDRCPY 2270 2271 void 2272 ip6_freepcbopts(struct ip6_pktopts *pktopt) 2273 { 2274 if (pktopt == NULL) 2275 return; 2276 2277 ip6_clearpktopts(pktopt, -1); 2278 2279 free(pktopt, M_IP6OPT); 2280 } 2281 2282 /* 2283 * Set the IP6 multicast options in response to user setsockopt(). 2284 */ 2285 static int 2286 ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m) 2287 { 2288 int error = 0; 2289 u_int loop, ifindex; 2290 struct ipv6_mreq *mreq; 2291 struct ifnet *ifp; 2292 struct ip6_moptions *im6o = *im6op; 2293 struct route_in6 ro; 2294 struct sockaddr_in6 *dst; 2295 struct in6_multi_mship *imm; 2296 struct proc *p = curproc; /* XXX */ 2297 2298 if (im6o == NULL) { 2299 /* 2300 * No multicast option buffer attached to the pcb; 2301 * allocate one and initialize to default values. 2302 */ 2303 im6o = (struct ip6_moptions *) 2304 malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK); 2305 2306 if (im6o == NULL) 2307 return (ENOBUFS); 2308 *im6op = im6o; 2309 im6o->im6o_multicast_ifp = NULL; 2310 im6o->im6o_multicast_hlim = ip6_defmcasthlim; 2311 im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP; 2312 LIST_INIT(&im6o->im6o_memberships); 2313 } 2314 2315 switch (optname) { 2316 2317 case IPV6_MULTICAST_IF: 2318 /* 2319 * Select the interface for outgoing multicast packets. 2320 */ 2321 if (m == NULL || m->m_len != sizeof(u_int)) { 2322 error = EINVAL; 2323 break; 2324 } 2325 bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex)); 2326 if (ifindex == 0) 2327 ifp = NULL; 2328 else { 2329 if (ifindex < 0 || if_indexlim <= ifindex || 2330 !ifindex2ifnet[ifindex]) { 2331 error = ENXIO; /* XXX EINVAL? */ 2332 break; 2333 } 2334 ifp = ifindex2ifnet[ifindex]; 2335 if (ifp == NULL || 2336 (ifp->if_flags & IFF_MULTICAST) == 0) { 2337 error = EADDRNOTAVAIL; 2338 break; 2339 } 2340 } 2341 im6o->im6o_multicast_ifp = ifp; 2342 break; 2343 2344 case IPV6_MULTICAST_HOPS: 2345 { 2346 /* 2347 * Set the IP6 hoplimit for outgoing multicast packets. 2348 */ 2349 int optval; 2350 if (m == NULL || m->m_len != sizeof(int)) { 2351 error = EINVAL; 2352 break; 2353 } 2354 bcopy(mtod(m, u_int *), &optval, sizeof(optval)); 2355 if (optval < -1 || optval >= 256) 2356 error = EINVAL; 2357 else if (optval == -1) 2358 im6o->im6o_multicast_hlim = ip6_defmcasthlim; 2359 else 2360 im6o->im6o_multicast_hlim = optval; 2361 break; 2362 } 2363 2364 case IPV6_MULTICAST_LOOP: 2365 /* 2366 * Set the loopback flag for outgoing multicast packets. 2367 * Must be zero or one. 2368 */ 2369 if (m == NULL || m->m_len != sizeof(u_int)) { 2370 error = EINVAL; 2371 break; 2372 } 2373 bcopy(mtod(m, u_int *), &loop, sizeof(loop)); 2374 if (loop > 1) { 2375 error = EINVAL; 2376 break; 2377 } 2378 im6o->im6o_multicast_loop = loop; 2379 break; 2380 2381 case IPV6_JOIN_GROUP: 2382 /* 2383 * Add a multicast group membership. 2384 * Group must be a valid IP6 multicast address. 2385 */ 2386 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { 2387 error = EINVAL; 2388 break; 2389 } 2390 mreq = mtod(m, struct ipv6_mreq *); 2391 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { 2392 /* 2393 * We use the unspecified address to specify to accept 2394 * all multicast addresses. Only super user is allowed 2395 * to do this. 2396 */ 2397 if (suser(p, 0)) 2398 { 2399 error = EACCES; 2400 break; 2401 } 2402 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { 2403 error = EINVAL; 2404 break; 2405 } 2406 2407 /* 2408 * If no interface was explicitly specified, choose an 2409 * appropriate one according to the given multicast address. 2410 */ 2411 if (mreq->ipv6mr_interface == 0) { 2412 /* 2413 * Look up the routing table for the 2414 * address, and choose the outgoing interface. 2415 * XXX: is it a good approach? 2416 */ 2417 ro.ro_rt = NULL; 2418 dst = (struct sockaddr_in6 *)&ro.ro_dst; 2419 bzero(dst, sizeof(*dst)); 2420 dst->sin6_len = sizeof(struct sockaddr_in6); 2421 dst->sin6_family = AF_INET6; 2422 dst->sin6_addr = mreq->ipv6mr_multiaddr; 2423 rtalloc((struct route *)&ro); 2424 if (ro.ro_rt == NULL) { 2425 error = EADDRNOTAVAIL; 2426 break; 2427 } 2428 ifp = ro.ro_rt->rt_ifp; 2429 rtfree(ro.ro_rt); 2430 } else { 2431 /* 2432 * If the interface is specified, validate it. 2433 */ 2434 if (mreq->ipv6mr_interface < 0 || 2435 if_indexlim <= mreq->ipv6mr_interface || 2436 !ifindex2ifnet[mreq->ipv6mr_interface]) { 2437 error = ENXIO; /* XXX EINVAL? */ 2438 break; 2439 } 2440 ifp = ifindex2ifnet[mreq->ipv6mr_interface]; 2441 } 2442 2443 /* 2444 * See if we found an interface, and confirm that it 2445 * supports multicast 2446 */ 2447 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 2448 error = EADDRNOTAVAIL; 2449 break; 2450 } 2451 /* 2452 * Put interface index into the multicast address, 2453 * if the address has link/interface-local scope. 2454 */ 2455 if (IN6_IS_SCOPE_EMBED(&mreq->ipv6mr_multiaddr)) { 2456 mreq->ipv6mr_multiaddr.s6_addr16[1] = 2457 htons(ifp->if_index); 2458 } 2459 /* 2460 * See if the membership already exists. 2461 */ 2462 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) 2463 if (imm->i6mm_maddr->in6m_ifp == ifp && 2464 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, 2465 &mreq->ipv6mr_multiaddr)) 2466 break; 2467 if (imm != NULL) { 2468 error = EADDRINUSE; 2469 break; 2470 } 2471 /* 2472 * Everything looks good; add a new record to the multicast 2473 * address list for the given interface. 2474 */ 2475 imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr, &error); 2476 if (!imm) 2477 break; 2478 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 2479 break; 2480 2481 case IPV6_LEAVE_GROUP: 2482 /* 2483 * Drop a multicast group membership. 2484 * Group must be a valid IP6 multicast address. 2485 */ 2486 if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { 2487 error = EINVAL; 2488 break; 2489 } 2490 mreq = mtod(m, struct ipv6_mreq *); 2491 if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { 2492 if (suser(p, 0)) 2493 { 2494 error = EACCES; 2495 break; 2496 } 2497 } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { 2498 error = EINVAL; 2499 break; 2500 } 2501 /* 2502 * If an interface address was specified, get a pointer 2503 * to its ifnet structure. 2504 */ 2505 if (mreq->ipv6mr_interface == 0) 2506 ifp = NULL; 2507 else { 2508 if (mreq->ipv6mr_interface < 0 || 2509 if_indexlim <= mreq->ipv6mr_interface || 2510 !ifindex2ifnet[mreq->ipv6mr_interface]) { 2511 error = ENXIO; /* XXX EINVAL? */ 2512 break; 2513 } 2514 ifp = ifindex2ifnet[mreq->ipv6mr_interface]; 2515 } 2516 2517 /* 2518 * Put interface index into the multicast address, 2519 * if the address has link-local scope. 2520 */ 2521 if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) { 2522 mreq->ipv6mr_multiaddr.s6_addr16[1] = 2523 htons(mreq->ipv6mr_interface); 2524 } 2525 /* 2526 * Find the membership in the membership list. 2527 */ 2528 LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) { 2529 if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) && 2530 IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, 2531 &mreq->ipv6mr_multiaddr)) 2532 break; 2533 } 2534 if (imm == NULL) { 2535 /* Unable to resolve interface */ 2536 error = EADDRNOTAVAIL; 2537 break; 2538 } 2539 /* 2540 * Give up the multicast address record to which the 2541 * membership points. 2542 */ 2543 LIST_REMOVE(imm, i6mm_chain); 2544 in6_leavegroup(imm); 2545 break; 2546 2547 default: 2548 error = EOPNOTSUPP; 2549 break; 2550 } 2551 2552 /* 2553 * If all options have default values, no need to keep the option 2554 * structure. 2555 */ 2556 if (im6o->im6o_multicast_ifp == NULL && 2557 im6o->im6o_multicast_hlim == ip6_defmcasthlim && 2558 im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP && 2559 LIST_EMPTY(&im6o->im6o_memberships)) { 2560 free(*im6op, M_IPMOPTS); 2561 *im6op = NULL; 2562 } 2563 2564 return (error); 2565 } 2566 2567 /* 2568 * Return the IP6 multicast options in response to user getsockopt(). 2569 */ 2570 static int 2571 ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp) 2572 { 2573 u_int *hlim, *loop, *ifindex; 2574 2575 *mp = m_get(M_WAIT, MT_SOOPTS); 2576 2577 switch (optname) { 2578 2579 case IPV6_MULTICAST_IF: 2580 ifindex = mtod(*mp, u_int *); 2581 (*mp)->m_len = sizeof(u_int); 2582 if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) 2583 *ifindex = 0; 2584 else 2585 *ifindex = im6o->im6o_multicast_ifp->if_index; 2586 return (0); 2587 2588 case IPV6_MULTICAST_HOPS: 2589 hlim = mtod(*mp, u_int *); 2590 (*mp)->m_len = sizeof(u_int); 2591 if (im6o == NULL) 2592 *hlim = ip6_defmcasthlim; 2593 else 2594 *hlim = im6o->im6o_multicast_hlim; 2595 return (0); 2596 2597 case IPV6_MULTICAST_LOOP: 2598 loop = mtod(*mp, u_int *); 2599 (*mp)->m_len = sizeof(u_int); 2600 if (im6o == NULL) 2601 *loop = ip6_defmcasthlim; 2602 else 2603 *loop = im6o->im6o_multicast_loop; 2604 return (0); 2605 2606 default: 2607 return (EOPNOTSUPP); 2608 } 2609 } 2610 2611 /* 2612 * Discard the IP6 multicast options. 2613 */ 2614 void 2615 ip6_freemoptions(struct ip6_moptions *im6o) 2616 { 2617 struct in6_multi_mship *imm; 2618 2619 if (im6o == NULL) 2620 return; 2621 2622 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 2623 imm = LIST_FIRST(&im6o->im6o_memberships); 2624 LIST_REMOVE(imm, i6mm_chain); 2625 in6_leavegroup(imm); 2626 } 2627 free(im6o, M_IPMOPTS); 2628 } 2629 2630 /* 2631 * Set IPv6 outgoing packet options based on advanced API. 2632 */ 2633 int 2634 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, 2635 struct ip6_pktopts *stickyopt, int priv, int uproto) 2636 { 2637 u_int clen; 2638 struct cmsghdr *cm = 0; 2639 caddr_t cmsgs; 2640 int error; 2641 2642 if (control == NULL || opt == NULL) 2643 return (EINVAL); 2644 2645 ip6_initpktopts(opt); 2646 if (stickyopt) { 2647 int error; 2648 2649 /* 2650 * If stickyopt is provided, make a local copy of the options 2651 * for this particular packet, then override them by ancillary 2652 * objects. 2653 * XXX: copypktopts() does not copy the cached route to a next 2654 * hop (if any). This is not very good in terms of efficiency, 2655 * but we can allow this since this option should be rarely 2656 * used. 2657 */ 2658 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) 2659 return (error); 2660 } 2661 2662 /* 2663 * XXX: Currently, we assume all the optional information is stored 2664 * in a single mbuf. 2665 */ 2666 if (control->m_next) 2667 return (EINVAL); 2668 2669 clen = control->m_len; 2670 cmsgs = mtod(control, caddr_t); 2671 do { 2672 if (clen < CMSG_LEN(0)) 2673 return (EINVAL); 2674 cm = (struct cmsghdr *)cmsgs; 2675 if (cm->cmsg_len < CMSG_LEN(0) || 2676 CMSG_ALIGN(cm->cmsg_len) > clen) 2677 return (EINVAL); 2678 if (cm->cmsg_level == IPPROTO_IPV6) { 2679 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), 2680 cm->cmsg_len - CMSG_LEN(0), opt, priv, 0, 1, uproto); 2681 if (error) 2682 return (error); 2683 } 2684 2685 clen -= CMSG_ALIGN(cm->cmsg_len); 2686 cmsgs += CMSG_ALIGN(cm->cmsg_len); 2687 } while (clen); 2688 2689 return (0); 2690 } 2691 2692 /* 2693 * Set a particular packet option, as a sticky option or an ancillary data 2694 * item. "len" can be 0 only when it's a sticky option. 2695 * We have 4 cases of combination of "sticky" and "cmsg": 2696 * "sticky=0, cmsg=0": impossible 2697 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data 2698 * "sticky=1, cmsg=0": RFC3542 socket option 2699 * "sticky=1, cmsg=1": RFC2292 socket option 2700 */ 2701 static int 2702 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, 2703 int priv, int sticky, int cmsg, int uproto) 2704 { 2705 int minmtupolicy; 2706 2707 if (!sticky && !cmsg) { 2708 #ifdef DIAGNOSTIC 2709 printf("ip6_setpktopt: impossible case\n"); 2710 #endif 2711 return (EINVAL); 2712 } 2713 2714 /* 2715 * IPV6_2292xxx is for backward compatibility to RFC2292, and should 2716 * not be specified in the context of RFC3542. Conversely, 2717 * RFC3542 types should not be specified in the context of RFC2292. 2718 */ 2719 if (!cmsg) { 2720 switch (optname) { 2721 case IPV6_2292PKTINFO: 2722 case IPV6_2292HOPLIMIT: 2723 case IPV6_2292NEXTHOP: 2724 case IPV6_2292HOPOPTS: 2725 case IPV6_2292DSTOPTS: 2726 case IPV6_2292RTHDR: 2727 case IPV6_2292PKTOPTIONS: 2728 return (ENOPROTOOPT); 2729 } 2730 } 2731 if (sticky && cmsg) { 2732 switch (optname) { 2733 case IPV6_PKTINFO: 2734 case IPV6_HOPLIMIT: 2735 case IPV6_NEXTHOP: 2736 case IPV6_HOPOPTS: 2737 case IPV6_DSTOPTS: 2738 case IPV6_RTHDRDSTOPTS: 2739 case IPV6_RTHDR: 2740 case IPV6_USE_MIN_MTU: 2741 case IPV6_DONTFRAG: 2742 case IPV6_TCLASS: 2743 return (ENOPROTOOPT); 2744 } 2745 } 2746 2747 switch (optname) { 2748 case IPV6_2292PKTINFO: 2749 case IPV6_PKTINFO: 2750 { 2751 struct ifnet *ifp = NULL; 2752 struct in6_pktinfo *pktinfo; 2753 2754 if (len != sizeof(struct in6_pktinfo)) 2755 return (EINVAL); 2756 2757 pktinfo = (struct in6_pktinfo *)buf; 2758 2759 /* 2760 * An application can clear any sticky IPV6_PKTINFO option by 2761 * doing a "regular" setsockopt with ipi6_addr being 2762 * in6addr_any and ipi6_ifindex being zero. 2763 * [RFC 3542, Section 6] 2764 */ 2765 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo && 2766 pktinfo->ipi6_ifindex == 0 && 2767 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { 2768 ip6_clearpktopts(opt, optname); 2769 break; 2770 } 2771 2772 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO && 2773 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { 2774 return (EINVAL); 2775 } 2776 2777 /* validate the interface index if specified. */ 2778 if (pktinfo->ipi6_ifindex >= if_indexlim || 2779 pktinfo->ipi6_ifindex < 0) { 2780 return (ENXIO); 2781 } 2782 if (pktinfo->ipi6_ifindex) { 2783 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex]; 2784 if (ifp == NULL) 2785 return (ENXIO); 2786 } 2787 2788 /* 2789 * We store the address anyway, and let in6_selectsrc() 2790 * validate the specified address. This is because ipi6_addr 2791 * may not have enough information about its scope zone, and 2792 * we may need additional information (such as outgoing 2793 * interface or the scope zone of a destination address) to 2794 * disambiguate the scope. 2795 * XXX: the delay of the validation may confuse the 2796 * application when it is used as a sticky option. 2797 */ 2798 if (opt->ip6po_pktinfo == NULL) { 2799 opt->ip6po_pktinfo = malloc(sizeof(*pktinfo), 2800 M_IP6OPT, M_NOWAIT); 2801 if (opt->ip6po_pktinfo == NULL) 2802 return (ENOBUFS); 2803 } 2804 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); 2805 break; 2806 } 2807 2808 case IPV6_2292HOPLIMIT: 2809 case IPV6_HOPLIMIT: 2810 { 2811 int *hlimp; 2812 2813 /* 2814 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT 2815 * to simplify the ordering among hoplimit options. 2816 */ 2817 if (optname == IPV6_HOPLIMIT && sticky) 2818 return (ENOPROTOOPT); 2819 2820 if (len != sizeof(int)) 2821 return (EINVAL); 2822 hlimp = (int *)buf; 2823 if (*hlimp < -1 || *hlimp > 255) 2824 return (EINVAL); 2825 2826 opt->ip6po_hlim = *hlimp; 2827 break; 2828 } 2829 2830 case IPV6_TCLASS: 2831 { 2832 int tclass; 2833 2834 if (len != sizeof(int)) 2835 return (EINVAL); 2836 tclass = *(int *)buf; 2837 if (tclass < -1 || tclass > 255) 2838 return (EINVAL); 2839 2840 opt->ip6po_tclass = tclass; 2841 break; 2842 } 2843 2844 case IPV6_2292NEXTHOP: 2845 case IPV6_NEXTHOP: 2846 if (!priv) 2847 return (EPERM); 2848 2849 if (len == 0) { /* just remove the option */ 2850 ip6_clearpktopts(opt, IPV6_NEXTHOP); 2851 break; 2852 } 2853 2854 /* check if cmsg_len is large enough for sa_len */ 2855 if (len < sizeof(struct sockaddr) || len < *buf) 2856 return (EINVAL); 2857 2858 switch (((struct sockaddr *)buf)->sa_family) { 2859 case AF_INET6: 2860 { 2861 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf; 2862 2863 if (sa6->sin6_len != sizeof(struct sockaddr_in6)) 2864 return (EINVAL); 2865 2866 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || 2867 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { 2868 return (EINVAL); 2869 } 2870 if (IN6_IS_SCOPE_EMBED(&sa6->sin6_addr)) { 2871 if (sa6->sin6_scope_id < 0 || 2872 if_indexlim <= sa6->sin6_scope_id || 2873 !ifindex2ifnet[sa6->sin6_scope_id]) 2874 return (EINVAL); 2875 sa6->sin6_addr.s6_addr16[1] = 2876 htonl(sa6->sin6_scope_id); 2877 } else if (sa6->sin6_scope_id) 2878 return (EINVAL); 2879 break; 2880 } 2881 case AF_LINK: /* eventually be supported? */ 2882 default: 2883 return (EAFNOSUPPORT); 2884 } 2885 2886 /* turn off the previous option, then set the new option. */ 2887 ip6_clearpktopts(opt, IPV6_NEXTHOP); 2888 opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT); 2889 if (opt->ip6po_nexthop == NULL) 2890 return (ENOBUFS); 2891 bcopy(buf, opt->ip6po_nexthop, *buf); 2892 break; 2893 2894 case IPV6_2292HOPOPTS: 2895 case IPV6_HOPOPTS: 2896 { 2897 struct ip6_hbh *hbh; 2898 int hbhlen; 2899 2900 /* 2901 * XXX: We don't allow a non-privileged user to set ANY HbH 2902 * options, since per-option restriction has too much 2903 * overhead. 2904 */ 2905 if (!priv) 2906 return (EPERM); 2907 2908 if (len == 0) { 2909 ip6_clearpktopts(opt, IPV6_HOPOPTS); 2910 break; /* just remove the option */ 2911 } 2912 2913 /* message length validation */ 2914 if (len < sizeof(struct ip6_hbh)) 2915 return (EINVAL); 2916 hbh = (struct ip6_hbh *)buf; 2917 hbhlen = (hbh->ip6h_len + 1) << 3; 2918 if (len != hbhlen) 2919 return (EINVAL); 2920 2921 /* turn off the previous option, then set the new option. */ 2922 ip6_clearpktopts(opt, IPV6_HOPOPTS); 2923 opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT); 2924 if (opt->ip6po_hbh == NULL) 2925 return (ENOBUFS); 2926 bcopy(hbh, opt->ip6po_hbh, hbhlen); 2927 2928 break; 2929 } 2930 2931 case IPV6_2292DSTOPTS: 2932 case IPV6_DSTOPTS: 2933 case IPV6_RTHDRDSTOPTS: 2934 { 2935 struct ip6_dest *dest, **newdest = NULL; 2936 int destlen; 2937 2938 if (!priv) /* XXX: see the comment for IPV6_HOPOPTS */ 2939 return (EPERM); 2940 2941 if (len == 0) { 2942 ip6_clearpktopts(opt, optname); 2943 break; /* just remove the option */ 2944 } 2945 2946 /* message length validation */ 2947 if (len < sizeof(struct ip6_dest)) 2948 return (EINVAL); 2949 dest = (struct ip6_dest *)buf; 2950 destlen = (dest->ip6d_len + 1) << 3; 2951 if (len != destlen) 2952 return (EINVAL); 2953 /* 2954 * Determine the position that the destination options header 2955 * should be inserted; before or after the routing header. 2956 */ 2957 switch (optname) { 2958 case IPV6_2292DSTOPTS: 2959 /* 2960 * The old advanced API is ambiguous on this point. 2961 * Our approach is to determine the position based 2962 * according to the existence of a routing header. 2963 * Note, however, that this depends on the order of the 2964 * extension headers in the ancillary data; the 1st 2965 * part of the destination options header must appear 2966 * before the routing header in the ancillary data, 2967 * too. 2968 * RFC3542 solved the ambiguity by introducing 2969 * separate ancillary data or option types. 2970 */ 2971 if (opt->ip6po_rthdr == NULL) 2972 newdest = &opt->ip6po_dest1; 2973 else 2974 newdest = &opt->ip6po_dest2; 2975 break; 2976 case IPV6_RTHDRDSTOPTS: 2977 newdest = &opt->ip6po_dest1; 2978 break; 2979 case IPV6_DSTOPTS: 2980 newdest = &opt->ip6po_dest2; 2981 break; 2982 } 2983 2984 /* turn off the previous option, then set the new option. */ 2985 ip6_clearpktopts(opt, optname); 2986 *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT); 2987 if (*newdest == NULL) 2988 return (ENOBUFS); 2989 bcopy(dest, *newdest, destlen); 2990 2991 break; 2992 } 2993 2994 case IPV6_2292RTHDR: 2995 case IPV6_RTHDR: 2996 { 2997 struct ip6_rthdr *rth; 2998 int rthlen; 2999 3000 if (len == 0) { 3001 ip6_clearpktopts(opt, IPV6_RTHDR); 3002 break; /* just remove the option */ 3003 } 3004 3005 /* message length validation */ 3006 if (len < sizeof(struct ip6_rthdr)) 3007 return (EINVAL); 3008 rth = (struct ip6_rthdr *)buf; 3009 rthlen = (rth->ip6r_len + 1) << 3; 3010 if (len != rthlen) 3011 return (EINVAL); 3012 3013 switch (rth->ip6r_type) { 3014 case IPV6_RTHDR_TYPE_0: 3015 if (rth->ip6r_len == 0) /* must contain one addr */ 3016 return (EINVAL); 3017 if (rth->ip6r_len % 2) /* length must be even */ 3018 return (EINVAL); 3019 if (rth->ip6r_len / 2 != rth->ip6r_segleft) 3020 return (EINVAL); 3021 break; 3022 default: 3023 return (EINVAL); /* not supported */ 3024 } 3025 /* turn off the previous option */ 3026 ip6_clearpktopts(opt, IPV6_RTHDR); 3027 opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT); 3028 if (opt->ip6po_rthdr == NULL) 3029 return (ENOBUFS); 3030 bcopy(rth, opt->ip6po_rthdr, rthlen); 3031 break; 3032 } 3033 3034 case IPV6_USE_MIN_MTU: 3035 if (len != sizeof(int)) 3036 return (EINVAL); 3037 minmtupolicy = *(int *)buf; 3038 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && 3039 minmtupolicy != IP6PO_MINMTU_DISABLE && 3040 minmtupolicy != IP6PO_MINMTU_ALL) { 3041 return (EINVAL); 3042 } 3043 opt->ip6po_minmtu = minmtupolicy; 3044 break; 3045 3046 case IPV6_DONTFRAG: 3047 if (len != sizeof(int)) 3048 return (EINVAL); 3049 3050 if (uproto == IPPROTO_TCP || *(int *)buf == 0) { 3051 /* 3052 * we ignore this option for TCP sockets. 3053 * (RFC3542 leaves this case unspecified.) 3054 */ 3055 opt->ip6po_flags &= ~IP6PO_DONTFRAG; 3056 } else 3057 opt->ip6po_flags |= IP6PO_DONTFRAG; 3058 break; 3059 3060 default: 3061 return (ENOPROTOOPT); 3062 } /* end of switch */ 3063 3064 return (0); 3065 } 3066 3067 /* 3068 * Routine called from ip6_output() to loop back a copy of an IP6 multicast 3069 * packet to the input queue of a specified interface. Note that this 3070 * calls the output routine of the loopback "driver", but with an interface 3071 * pointer that might NOT be lo0ifp -- easier than replicating that code here. 3072 */ 3073 void 3074 ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst) 3075 { 3076 struct mbuf *copym; 3077 struct ip6_hdr *ip6; 3078 3079 /* 3080 * Duplicate the packet. 3081 */ 3082 copym = m_copy(m, 0, M_COPYALL); 3083 if (copym == NULL) 3084 return; 3085 3086 /* 3087 * Make sure to deep-copy IPv6 header portion in case the data 3088 * is in an mbuf cluster, so that we can safely override the IPv6 3089 * header portion later. 3090 */ 3091 if ((copym->m_flags & M_EXT) != 0 || 3092 copym->m_len < sizeof(struct ip6_hdr)) { 3093 copym = m_pullup(copym, sizeof(struct ip6_hdr)); 3094 if (copym == NULL) 3095 return; 3096 } 3097 3098 #ifdef DIAGNOSTIC 3099 if (copym->m_len < sizeof(*ip6)) { 3100 m_freem(copym); 3101 return; 3102 } 3103 #endif 3104 3105 ip6 = mtod(copym, struct ip6_hdr *); 3106 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) 3107 ip6->ip6_src.s6_addr16[1] = 0; 3108 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) 3109 ip6->ip6_dst.s6_addr16[1] = 0; 3110 3111 (void)looutput(ifp, copym, (struct sockaddr *)dst, NULL); 3112 } 3113 3114 /* 3115 * Chop IPv6 header off from the payload. 3116 */ 3117 static int 3118 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) 3119 { 3120 struct mbuf *mh; 3121 struct ip6_hdr *ip6; 3122 3123 ip6 = mtod(m, struct ip6_hdr *); 3124 if (m->m_len > sizeof(*ip6)) { 3125 MGETHDR(mh, M_DONTWAIT, MT_HEADER); 3126 if (mh == 0) { 3127 m_freem(m); 3128 return ENOBUFS; 3129 } 3130 M_MOVE_PKTHDR(mh, m); 3131 MH_ALIGN(mh, sizeof(*ip6)); 3132 m->m_len -= sizeof(*ip6); 3133 m->m_data += sizeof(*ip6); 3134 mh->m_next = m; 3135 m = mh; 3136 m->m_len = sizeof(*ip6); 3137 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); 3138 } 3139 exthdrs->ip6e_ip6 = m; 3140 return 0; 3141 } 3142 3143 /* 3144 * Compute IPv6 extension header length. 3145 */ 3146 int 3147 ip6_optlen(struct inpcb *inp) 3148 { 3149 int len; 3150 3151 if (!inp->inp_outputopts6) 3152 return 0; 3153 3154 len = 0; 3155 #define elen(x) \ 3156 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) 3157 3158 len += elen(inp->inp_outputopts6->ip6po_hbh); 3159 len += elen(inp->inp_outputopts6->ip6po_dest1); 3160 len += elen(inp->inp_outputopts6->ip6po_rthdr); 3161 len += elen(inp->inp_outputopts6->ip6po_dest2); 3162 return len; 3163 #undef elen 3164 } 3165 3166 u_int32_t 3167 ip6_randomid(void) 3168 { 3169 return idgen32(&ip6_id_ctx); 3170 } 3171 3172 void 3173 ip6_randomid_init(void) 3174 { 3175 idgen32_init(&ip6_id_ctx); 3176 } 3177 3178