1 /* $NetBSD: ip_output.c,v 1.266 2017/01/10 07:39:52 knakahara Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Public Access Networks Corporation ("Panix"). It was developed under 38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 59 * POSSIBILITY OF SUCH DAMAGE. 60 */ 61 62 /* 63 * Copyright (c) 1982, 1986, 1988, 1990, 1993 64 * The Regents of the University of California. All rights reserved. 65 * 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * 1. Redistributions of source code must retain the above copyright 70 * notice, this list of conditions and the following disclaimer. 71 * 2. Redistributions in binary form must reproduce the above copyright 72 * notice, this list of conditions and the following disclaimer in the 73 * documentation and/or other materials provided with the distribution. 74 * 3. Neither the name of the University nor the names of its contributors 75 * may be used to endorse or promote products derived from this software 76 * without specific prior written permission. 77 * 78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 88 * SUCH DAMAGE. 89 * 90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.266 2017/01/10 07:39:52 knakahara Exp $"); 95 96 #ifdef _KERNEL_OPT 97 #include "opt_inet.h" 98 #include "opt_ipsec.h" 99 #include "opt_mrouting.h" 100 #include "opt_net_mpsafe.h" 101 #include "opt_mpls.h" 102 #endif 103 104 #include "arp.h" 105 106 #include <sys/param.h> 107 #include <sys/kmem.h> 108 #include <sys/mbuf.h> 109 #include <sys/protosw.h> 110 #include <sys/socket.h> 111 #include <sys/socketvar.h> 112 #include <sys/kauth.h> 113 #ifdef IPSEC 114 #include <sys/domain.h> 115 #endif 116 #include <sys/systm.h> 117 #include <sys/syslog.h> 118 119 #include <net/if.h> 120 #include <net/if_types.h> 121 #include <net/route.h> 122 #include <net/pfil.h> 123 124 #include <netinet/in.h> 125 #include <netinet/in_systm.h> 126 #include <netinet/ip.h> 127 #include <netinet/in_pcb.h> 128 #include <netinet/in_var.h> 129 #include <netinet/ip_var.h> 130 #include <netinet/ip_private.h> 131 #include <netinet/in_offload.h> 132 #include <netinet/portalgo.h> 133 #include <netinet/udp.h> 134 135 #ifdef INET6 136 #include <netinet6/ip6_var.h> 137 #endif 138 139 #ifdef MROUTING 140 #include <netinet/ip_mroute.h> 141 #endif 142 143 #ifdef IPSEC 144 #include <netipsec/ipsec.h> 145 #include <netipsec/key.h> 146 #endif 147 148 #ifdef MPLS 149 #include <netmpls/mpls.h> 150 #include <netmpls/mpls_var.h> 151 #endif 152 153 static int ip_pcbopts(struct inpcb *, const struct sockopt *); 154 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 155 static struct ifnet *ip_multicast_if(struct in_addr *, int *); 156 static void ip_mloopback(struct ifnet *, struct mbuf *, 157 const struct sockaddr_in *); 158 static int ip_ifaddrvalid(const struct in_ifaddr *); 159 160 extern pfil_head_t *inet_pfil_hook; /* XXX */ 161 162 int ip_do_loopback_cksum = 0; 163 164 static int 165 ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m, 166 const struct rtentry *rt) 167 { 168 int error = 0; 169 #ifdef MPLS 170 union mpls_shim msh; 171 172 if (rt == NULL || rt_gettag(rt) == NULL || 173 rt_gettag(rt)->sa_family != AF_MPLS || 174 (m->m_flags & (M_MCAST | M_BCAST)) != 0 || 175 ifp->if_type != IFT_ETHER) 176 return 0; 177 178 msh.s_addr = MPLS_GETSADDR(rt); 179 if (msh.shim.label != MPLS_LABEL_IMPLNULL) { 180 struct m_tag *mtag; 181 /* 182 * XXX tentative solution to tell ether_output 183 * it's MPLS. Need some more efficient solution. 184 */ 185 mtag = m_tag_get(PACKET_TAG_MPLS, 186 sizeof(int) /* dummy */, 187 M_NOWAIT); 188 if (mtag == NULL) 189 return ENOMEM; 190 m_tag_prepend(m, mtag); 191 } 192 #endif 193 return error; 194 } 195 196 /* 197 * Send an IP packet to a host. 198 */ 199 int 200 ip_if_output(struct ifnet * const ifp, struct mbuf * const m, 201 const struct sockaddr * const dst, const struct rtentry *rt) 202 { 203 int error = 0; 204 205 if (rt != NULL) { 206 error = rt_check_reject_route(rt, ifp); 207 if (error != 0) { 208 m_freem(m); 209 return error; 210 } 211 } 212 213 error = ip_mark_mpls(ifp, m, rt); 214 if (error != 0) { 215 m_freem(m); 216 return error; 217 } 218 219 error = if_output_lock(ifp, ifp, m, dst, rt); 220 221 return error; 222 } 223 224 /* 225 * IP output. The packet in mbuf chain m contains a skeletal IP 226 * header (with len, off, ttl, proto, tos, src, dst). 227 * The mbuf chain containing the packet will be freed. 228 * The mbuf opt, if present, will not be freed. 229 */ 230 int 231 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, 232 struct ip_moptions *imo, struct socket *so) 233 { 234 struct rtentry *rt; 235 struct ip *ip; 236 struct ifnet *ifp, *mifp = NULL; 237 struct mbuf *m = m0; 238 int hlen = sizeof (struct ip); 239 int len, error = 0; 240 struct route iproute; 241 const struct sockaddr_in *dst; 242 struct in_ifaddr *ia = NULL; 243 int isbroadcast; 244 int sw_csum; 245 u_long mtu; 246 #ifdef IPSEC 247 struct secpolicy *sp = NULL; 248 #endif 249 bool natt_frag = false; 250 bool rtmtu_nolock; 251 union { 252 struct sockaddr dst; 253 struct sockaddr_in dst4; 254 } u; 255 struct sockaddr *rdst = &u.dst; /* real IP destination, as opposed 256 * to the nexthop 257 */ 258 struct psref psref, psref_ia; 259 int bound; 260 bool bind_need_restore = false; 261 262 len = 0; 263 264 MCLAIM(m, &ip_tx_mowner); 265 266 KASSERT((m->m_flags & M_PKTHDR) != 0); 267 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) == 0); 268 KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) != 269 (M_CSUM_TCPv4|M_CSUM_UDPv4)); 270 271 if (opt) { 272 m = ip_insertoptions(m, opt, &len); 273 if (len >= sizeof(struct ip)) 274 hlen = len; 275 } 276 ip = mtod(m, struct ip *); 277 278 /* 279 * Fill in IP header. 280 */ 281 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 282 ip->ip_v = IPVERSION; 283 ip->ip_off = htons(0); 284 /* ip->ip_id filled in after we find out source ia */ 285 ip->ip_hl = hlen >> 2; 286 IP_STATINC(IP_STAT_LOCALOUT); 287 } else { 288 hlen = ip->ip_hl << 2; 289 } 290 291 /* 292 * Route packet. 293 */ 294 if (ro == NULL) { 295 memset(&iproute, 0, sizeof(iproute)); 296 ro = &iproute; 297 } 298 sockaddr_in_init(&u.dst4, &ip->ip_dst, 0); 299 dst = satocsin(rtcache_getdst(ro)); 300 301 /* 302 * If there is a cached route, check that it is to the same 303 * destination and is still up. If not, free it and try again. 304 * The address family should also be checked in case of sharing 305 * the cache with IPv6. 306 */ 307 if (dst && (dst->sin_family != AF_INET || 308 !in_hosteq(dst->sin_addr, ip->ip_dst))) 309 rtcache_free(ro); 310 311 if ((rt = rtcache_validate(ro)) == NULL && 312 (rt = rtcache_update(ro, 1)) == NULL) { 313 dst = &u.dst4; 314 error = rtcache_setdst(ro, &u.dst); 315 if (error != 0) 316 goto bad; 317 } 318 319 bound = curlwp_bind(); 320 bind_need_restore = true; 321 /* 322 * If routing to interface only, short circuit routing lookup. 323 */ 324 if (flags & IP_ROUTETOIF) { 325 struct ifaddr *ifa; 326 327 ifa = ifa_ifwithladdr_psref(sintocsa(dst), &psref_ia); 328 if (ifa == NULL) { 329 IP_STATINC(IP_STAT_NOROUTE); 330 error = ENETUNREACH; 331 goto bad; 332 } 333 /* ia is already referenced by psref_ia */ 334 ia = ifatoia(ifa); 335 336 ifp = ia->ia_ifp; 337 mtu = ifp->if_mtu; 338 ip->ip_ttl = 1; 339 isbroadcast = in_broadcast(dst->sin_addr, ifp); 340 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) || 341 ip->ip_dst.s_addr == INADDR_BROADCAST) && 342 imo != NULL && imo->imo_multicast_if_index != 0) { 343 ifp = mifp = if_get_byindex(imo->imo_multicast_if_index, &psref); 344 if (ifp == NULL) { 345 IP_STATINC(IP_STAT_NOROUTE); 346 error = ENETUNREACH; 347 goto bad; 348 } 349 mtu = ifp->if_mtu; 350 ia = in_get_ia_from_ifp_psref(ifp, &psref_ia); 351 if (ia == NULL) { 352 error = EADDRNOTAVAIL; 353 goto bad; 354 } 355 isbroadcast = 0; 356 } else { 357 if (rt == NULL) 358 rt = rtcache_init(ro); 359 if (rt == NULL) { 360 IP_STATINC(IP_STAT_NOROUTE); 361 error = EHOSTUNREACH; 362 goto bad; 363 } 364 if (ifa_is_destroying(rt->rt_ifa)) { 365 rtcache_unref(rt, ro); 366 rt = NULL; 367 IP_STATINC(IP_STAT_NOROUTE); 368 error = EHOSTUNREACH; 369 goto bad; 370 } 371 ifa_acquire(rt->rt_ifa, &psref_ia); 372 ia = ifatoia(rt->rt_ifa); 373 ifp = rt->rt_ifp; 374 if ((mtu = rt->rt_rmx.rmx_mtu) == 0) 375 mtu = ifp->if_mtu; 376 rt->rt_use++; 377 if (rt->rt_flags & RTF_GATEWAY) 378 dst = satosin(rt->rt_gateway); 379 if (rt->rt_flags & RTF_HOST) 380 isbroadcast = rt->rt_flags & RTF_BROADCAST; 381 else 382 isbroadcast = in_broadcast(dst->sin_addr, ifp); 383 } 384 rtmtu_nolock = rt && (rt->rt_rmx.rmx_locks & RTV_MTU) == 0; 385 386 if (IN_MULTICAST(ip->ip_dst.s_addr) || 387 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 388 bool inmgroup; 389 390 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 391 M_BCAST : M_MCAST; 392 /* 393 * See if the caller provided any multicast options 394 */ 395 if (imo != NULL) 396 ip->ip_ttl = imo->imo_multicast_ttl; 397 else 398 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 399 400 /* 401 * if we don't know the outgoing ifp yet, we can't generate 402 * output 403 */ 404 if (!ifp) { 405 IP_STATINC(IP_STAT_NOROUTE); 406 error = ENETUNREACH; 407 goto bad; 408 } 409 410 /* 411 * If the packet is multicast or broadcast, confirm that 412 * the outgoing interface can transmit it. 413 */ 414 if (((m->m_flags & M_MCAST) && 415 (ifp->if_flags & IFF_MULTICAST) == 0) || 416 ((m->m_flags & M_BCAST) && 417 (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0)) { 418 IP_STATINC(IP_STAT_NOROUTE); 419 error = ENETUNREACH; 420 goto bad; 421 } 422 /* 423 * If source address not specified yet, use an address 424 * of outgoing interface. 425 */ 426 if (in_nullhost(ip->ip_src)) { 427 struct in_ifaddr *xia; 428 struct ifaddr *xifa; 429 struct psref _psref; 430 431 xia = in_get_ia_from_ifp_psref(ifp, &_psref); 432 if (!xia) { 433 error = EADDRNOTAVAIL; 434 goto bad; 435 } 436 xifa = &xia->ia_ifa; 437 if (xifa->ifa_getifa != NULL) { 438 ia4_release(xia, &_psref); 439 /* FIXME NOMPSAFE */ 440 xia = ifatoia((*xifa->ifa_getifa)(xifa, rdst)); 441 if (xia == NULL) { 442 error = EADDRNOTAVAIL; 443 goto bad; 444 } 445 ia4_acquire(xia, &_psref); 446 } 447 ip->ip_src = xia->ia_addr.sin_addr; 448 ia4_release(xia, &_psref); 449 } 450 451 inmgroup = in_multi_group(ip->ip_dst, ifp, flags); 452 if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) { 453 /* 454 * If we belong to the destination multicast group 455 * on the outgoing interface, and the caller did not 456 * forbid loopback, loop back a copy. 457 */ 458 ip_mloopback(ifp, m, &u.dst4); 459 } 460 #ifdef MROUTING 461 else { 462 /* 463 * If we are acting as a multicast router, perform 464 * multicast forwarding as if the packet had just 465 * arrived on the interface to which we are about 466 * to send. The multicast forwarding function 467 * recursively calls this function, using the 468 * IP_FORWARDING flag to prevent infinite recursion. 469 * 470 * Multicasts that are looped back by ip_mloopback(), 471 * above, will be forwarded by the ip_input() routine, 472 * if necessary. 473 */ 474 extern struct socket *ip_mrouter; 475 476 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 477 if (ip_mforward(m, ifp) != 0) { 478 m_freem(m); 479 goto done; 480 } 481 } 482 } 483 #endif 484 /* 485 * Multicasts with a time-to-live of zero may be looped- 486 * back, above, but must not be transmitted on a network. 487 * Also, multicasts addressed to the loopback interface 488 * are not sent -- the above call to ip_mloopback() will 489 * loop back a copy if this host actually belongs to the 490 * destination group on the loopback interface. 491 */ 492 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 493 m_freem(m); 494 goto done; 495 } 496 goto sendit; 497 } 498 499 /* 500 * If source address not specified yet, use address 501 * of outgoing interface. 502 */ 503 if (in_nullhost(ip->ip_src)) { 504 struct ifaddr *xifa; 505 506 xifa = &ia->ia_ifa; 507 if (xifa->ifa_getifa != NULL) { 508 ia4_release(ia, &psref_ia); 509 /* FIXME NOMPSAFE */ 510 ia = ifatoia((*xifa->ifa_getifa)(xifa, rdst)); 511 if (ia == NULL) { 512 error = EADDRNOTAVAIL; 513 goto bad; 514 } 515 ia4_acquire(ia, &psref_ia); 516 } 517 ip->ip_src = ia->ia_addr.sin_addr; 518 } 519 520 /* 521 * packets with Class-D address as source are not valid per 522 * RFC 1112 523 */ 524 if (IN_MULTICAST(ip->ip_src.s_addr)) { 525 IP_STATINC(IP_STAT_ODROPPED); 526 error = EADDRNOTAVAIL; 527 goto bad; 528 } 529 530 /* 531 * Look for broadcast address and and verify user is allowed to 532 * send such a packet. 533 */ 534 if (isbroadcast) { 535 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 536 error = EADDRNOTAVAIL; 537 goto bad; 538 } 539 if ((flags & IP_ALLOWBROADCAST) == 0) { 540 error = EACCES; 541 goto bad; 542 } 543 /* don't allow broadcast messages to be fragmented */ 544 if (ntohs(ip->ip_len) > ifp->if_mtu) { 545 error = EMSGSIZE; 546 goto bad; 547 } 548 m->m_flags |= M_BCAST; 549 } else 550 m->m_flags &= ~M_BCAST; 551 552 sendit: 553 if ((flags & (IP_FORWARDING|IP_NOIPNEWID)) == 0) { 554 if (m->m_pkthdr.len < IP_MINFRAGSIZE) { 555 ip->ip_id = 0; 556 } else if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { 557 ip->ip_id = ip_newid(ia); 558 } else { 559 560 /* 561 * TSO capable interfaces (typically?) increment 562 * ip_id for each segment. 563 * "allocate" enough ids here to increase the chance 564 * for them to be unique. 565 * 566 * note that the following calculation is not 567 * needed to be precise. wasting some ip_id is fine. 568 */ 569 570 unsigned int segsz = m->m_pkthdr.segsz; 571 unsigned int datasz = ntohs(ip->ip_len) - hlen; 572 unsigned int num = howmany(datasz, segsz); 573 574 ip->ip_id = ip_newid_range(ia, num); 575 } 576 } 577 if (ia != NULL) { 578 ia4_release(ia, &psref_ia); 579 ia = NULL; 580 } 581 582 /* 583 * If we're doing Path MTU Discovery, we need to set DF unless 584 * the route's MTU is locked. 585 */ 586 if ((flags & IP_MTUDISC) != 0 && rtmtu_nolock) { 587 ip->ip_off |= htons(IP_DF); 588 } 589 590 #ifdef IPSEC 591 if (ipsec_used) { 592 bool ipsec_done = false; 593 594 /* Perform IPsec processing, if any. */ 595 error = ipsec4_output(m, so, flags, &sp, &mtu, &natt_frag, 596 &ipsec_done); 597 if (error || ipsec_done) 598 goto done; 599 } 600 #endif 601 602 /* 603 * Run through list of hooks for output packets. 604 */ 605 error = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_OUT); 606 if (error) 607 goto done; 608 if (m == NULL) 609 goto done; 610 611 ip = mtod(m, struct ip *); 612 hlen = ip->ip_hl << 2; 613 614 m->m_pkthdr.csum_data |= hlen << 16; 615 616 /* 617 * search for the source address structure to 618 * maintain output statistics. 619 */ 620 KASSERT(ia == NULL); 621 ia = in_get_ia_psref(ip->ip_src, &psref_ia); 622 623 /* Ensure we only send from a valid address. */ 624 if ((ia != NULL || (flags & IP_FORWARDING) == 0) && 625 (error = ip_ifaddrvalid(ia)) != 0) 626 { 627 arplog(LOG_ERR, 628 "refusing to send from invalid address %s (pid %d)\n", 629 in_fmtaddr(ip->ip_src), curproc->p_pid); 630 IP_STATINC(IP_STAT_ODROPPED); 631 if (error == 1) 632 /* 633 * Address exists, but is tentative or detached. 634 * We can't send from it because it's invalid, 635 * so we drop the packet. 636 */ 637 error = 0; 638 else 639 error = EADDRNOTAVAIL; 640 goto bad; 641 } 642 643 /* Maybe skip checksums on loopback interfaces. */ 644 if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) { 645 m->m_pkthdr.csum_flags |= M_CSUM_IPv4; 646 } 647 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx; 648 /* 649 * If small enough for mtu of path, or if using TCP segmentation 650 * offload, can just send directly. 651 */ 652 if (ntohs(ip->ip_len) <= mtu || 653 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0) { 654 const struct sockaddr *sa; 655 656 #if IFA_STATS 657 if (ia) 658 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len); 659 #endif 660 /* 661 * Always initialize the sum to 0! Some HW assisted 662 * checksumming requires this. 663 */ 664 ip->ip_sum = 0; 665 666 if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) { 667 /* 668 * Perform any checksums that the hardware can't do 669 * for us. 670 * 671 * XXX Does any hardware require the {th,uh}_sum 672 * XXX fields to be 0? 673 */ 674 if (sw_csum & M_CSUM_IPv4) { 675 KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)); 676 ip->ip_sum = in_cksum(m, hlen); 677 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4; 678 } 679 if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 680 if (IN_NEED_CHECKSUM(ifp, 681 sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { 682 in_delayed_cksum(m); 683 } 684 m->m_pkthdr.csum_flags &= 685 ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 686 } 687 } 688 689 sa = (m->m_flags & M_MCAST) ? sintocsa(rdst) : sintocsa(dst); 690 if (__predict_true( 691 (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 || 692 (ifp->if_capenable & IFCAP_TSOv4) != 0)) { 693 error = ip_if_output(ifp, m, sa, rt); 694 } else { 695 error = ip_tso_output(ifp, m, sa, rt); 696 } 697 goto done; 698 } 699 700 /* 701 * We can't use HW checksumming if we're about to 702 * to fragment the packet. 703 * 704 * XXX Some hardware can do this. 705 */ 706 if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 707 if (IN_NEED_CHECKSUM(ifp, 708 m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) { 709 in_delayed_cksum(m); 710 } 711 m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 712 } 713 714 /* 715 * Too large for interface; fragment if possible. 716 * Must be able to put at least 8 bytes per fragment. 717 */ 718 if (ntohs(ip->ip_off) & IP_DF) { 719 if (flags & IP_RETURNMTU) { 720 struct inpcb *inp; 721 722 KASSERT(so && solocked(so)); 723 inp = sotoinpcb(so); 724 inp->inp_errormtu = mtu; 725 } 726 error = EMSGSIZE; 727 IP_STATINC(IP_STAT_CANTFRAG); 728 goto bad; 729 } 730 731 error = ip_fragment(m, ifp, mtu); 732 if (error) { 733 m = NULL; 734 goto bad; 735 } 736 737 for (; m; m = m0) { 738 m0 = m->m_nextpkt; 739 m->m_nextpkt = 0; 740 if (error) { 741 m_freem(m); 742 continue; 743 } 744 #if IFA_STATS 745 if (ia) 746 ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len); 747 #endif 748 /* 749 * If we get there, the packet has not been handled by 750 * IPsec whereas it should have. Now that it has been 751 * fragmented, re-inject it in ip_output so that IPsec 752 * processing can occur. 753 */ 754 if (natt_frag) { 755 error = ip_output(m, opt, ro, 756 flags | IP_RAWOUTPUT | IP_NOIPNEWID, 757 imo, so); 758 } else { 759 KASSERT((m->m_pkthdr.csum_flags & 760 (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0); 761 error = ip_if_output(ifp, m, 762 (m->m_flags & M_MCAST) ? 763 sintocsa(rdst) : sintocsa(dst), rt); 764 } 765 } 766 if (error == 0) { 767 IP_STATINC(IP_STAT_FRAGMENTED); 768 } 769 done: 770 ia4_release(ia, &psref_ia); 771 rtcache_unref(rt, ro); 772 if (ro == &iproute) { 773 rtcache_free(&iproute); 774 } 775 #ifdef IPSEC 776 if (sp) { 777 KEY_FREESP(&sp); 778 } 779 #endif 780 if (mifp != NULL) { 781 if_put(mifp, &psref); 782 } 783 if (bind_need_restore) 784 curlwp_bindx(bound); 785 return error; 786 bad: 787 m_freem(m); 788 goto done; 789 } 790 791 int 792 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 793 { 794 struct ip *ip, *mhip; 795 struct mbuf *m0; 796 int len, hlen, off; 797 int mhlen, firstlen; 798 struct mbuf **mnext; 799 int sw_csum = m->m_pkthdr.csum_flags; 800 int fragments = 0; 801 int s; 802 int error = 0; 803 804 ip = mtod(m, struct ip *); 805 hlen = ip->ip_hl << 2; 806 if (ifp != NULL) 807 sw_csum &= ~ifp->if_csum_flags_tx; 808 809 len = (mtu - hlen) &~ 7; 810 if (len < 8) { 811 m_freem(m); 812 return (EMSGSIZE); 813 } 814 815 firstlen = len; 816 mnext = &m->m_nextpkt; 817 818 /* 819 * Loop through length of segment after first fragment, 820 * make new header and copy data of each part and link onto chain. 821 */ 822 m0 = m; 823 mhlen = sizeof (struct ip); 824 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 825 MGETHDR(m, M_DONTWAIT, MT_HEADER); 826 if (m == 0) { 827 error = ENOBUFS; 828 IP_STATINC(IP_STAT_ODROPPED); 829 goto sendorfree; 830 } 831 MCLAIM(m, m0->m_owner); 832 *mnext = m; 833 mnext = &m->m_nextpkt; 834 m->m_data += max_linkhdr; 835 mhip = mtod(m, struct ip *); 836 *mhip = *ip; 837 /* we must inherit MCAST and BCAST flags */ 838 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 839 if (hlen > sizeof (struct ip)) { 840 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 841 mhip->ip_hl = mhlen >> 2; 842 } 843 m->m_len = mhlen; 844 mhip->ip_off = ((off - hlen) >> 3) + 845 (ntohs(ip->ip_off) & ~IP_MF); 846 if (ip->ip_off & htons(IP_MF)) 847 mhip->ip_off |= IP_MF; 848 if (off + len >= ntohs(ip->ip_len)) 849 len = ntohs(ip->ip_len) - off; 850 else 851 mhip->ip_off |= IP_MF; 852 HTONS(mhip->ip_off); 853 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 854 m->m_next = m_copym(m0, off, len, M_DONTWAIT); 855 if (m->m_next == 0) { 856 error = ENOBUFS; /* ??? */ 857 IP_STATINC(IP_STAT_ODROPPED); 858 goto sendorfree; 859 } 860 m->m_pkthdr.len = mhlen + len; 861 m_reset_rcvif(m); 862 mhip->ip_sum = 0; 863 KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0); 864 if (sw_csum & M_CSUM_IPv4) { 865 mhip->ip_sum = in_cksum(m, mhlen); 866 } else { 867 /* 868 * checksum is hw-offloaded or not necessary. 869 */ 870 m->m_pkthdr.csum_flags |= 871 m0->m_pkthdr.csum_flags & M_CSUM_IPv4; 872 m->m_pkthdr.csum_data |= mhlen << 16; 873 KASSERT(!(ifp != NULL && 874 IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) || 875 (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0); 876 } 877 IP_STATINC(IP_STAT_OFRAGMENTS); 878 fragments++; 879 } 880 /* 881 * Update first fragment by trimming what's been copied out 882 * and updating header, then send each fragment (in order). 883 */ 884 m = m0; 885 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 886 m->m_pkthdr.len = hlen + firstlen; 887 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 888 ip->ip_off |= htons(IP_MF); 889 ip->ip_sum = 0; 890 if (sw_csum & M_CSUM_IPv4) { 891 ip->ip_sum = in_cksum(m, hlen); 892 m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4; 893 } else { 894 /* 895 * checksum is hw-offloaded or not necessary. 896 */ 897 KASSERT(!(ifp != NULL && IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) || 898 (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0); 899 KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >= 900 sizeof(struct ip)); 901 } 902 sendorfree: 903 /* 904 * If there is no room for all the fragments, don't queue 905 * any of them. 906 */ 907 if (ifp != NULL) { 908 s = splnet(); 909 if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments && 910 error == 0) { 911 error = ENOBUFS; 912 IP_STATINC(IP_STAT_ODROPPED); 913 IFQ_INC_DROPS(&ifp->if_snd); 914 } 915 splx(s); 916 } 917 if (error) { 918 for (m = m0; m; m = m0) { 919 m0 = m->m_nextpkt; 920 m->m_nextpkt = NULL; 921 m_freem(m); 922 } 923 } 924 return (error); 925 } 926 927 /* 928 * Process a delayed payload checksum calculation. 929 */ 930 void 931 in_delayed_cksum(struct mbuf *m) 932 { 933 struct ip *ip; 934 u_int16_t csum, offset; 935 936 ip = mtod(m, struct ip *); 937 offset = ip->ip_hl << 2; 938 csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset); 939 if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0) 940 csum = 0xffff; 941 942 offset += M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data); 943 944 if ((offset + sizeof(u_int16_t)) > m->m_len) { 945 /* This happen when ip options were inserted 946 printf("in_delayed_cksum: pullup len %d off %d proto %d\n", 947 m->m_len, offset, ip->ip_p); 948 */ 949 m_copyback(m, offset, sizeof(csum), (void *) &csum); 950 } else 951 *(u_int16_t *)(mtod(m, char *) + offset) = csum; 952 } 953 954 /* 955 * Determine the maximum length of the options to be inserted; 956 * we would far rather allocate too much space rather than too little. 957 */ 958 959 u_int 960 ip_optlen(struct inpcb *inp) 961 { 962 struct mbuf *m = inp->inp_options; 963 964 if (m && m->m_len > offsetof(struct ipoption, ipopt_dst)) { 965 return (m->m_len - offsetof(struct ipoption, ipopt_dst)); 966 } 967 return 0; 968 } 969 970 /* 971 * Insert IP options into preformed packet. 972 * Adjust IP destination as required for IP source routing, 973 * as indicated by a non-zero in_addr at the start of the options. 974 */ 975 static struct mbuf * 976 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 977 { 978 struct ipoption *p = mtod(opt, struct ipoption *); 979 struct mbuf *n; 980 struct ip *ip = mtod(m, struct ip *); 981 unsigned optlen; 982 983 optlen = opt->m_len - sizeof(p->ipopt_dst); 984 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 985 return (m); /* XXX should fail */ 986 if (!in_nullhost(p->ipopt_dst)) 987 ip->ip_dst = p->ipopt_dst; 988 if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) { 989 MGETHDR(n, M_DONTWAIT, MT_HEADER); 990 if (n == 0) 991 return (m); 992 MCLAIM(n, m->m_owner); 993 M_MOVE_PKTHDR(n, m); 994 m->m_len -= sizeof(struct ip); 995 m->m_data += sizeof(struct ip); 996 n->m_next = m; 997 m = n; 998 m->m_len = optlen + sizeof(struct ip); 999 m->m_data += max_linkhdr; 1000 bcopy((void *)ip, mtod(m, void *), sizeof(struct ip)); 1001 } else { 1002 m->m_data -= optlen; 1003 m->m_len += optlen; 1004 memmove(mtod(m, void *), ip, sizeof(struct ip)); 1005 } 1006 m->m_pkthdr.len += optlen; 1007 ip = mtod(m, struct ip *); 1008 bcopy((void *)p->ipopt_list, (void *)(ip + 1), (unsigned)optlen); 1009 *phlen = sizeof(struct ip) + optlen; 1010 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 1011 return (m); 1012 } 1013 1014 /* 1015 * Copy options from ip to jp, 1016 * omitting those not copied during fragmentation. 1017 */ 1018 int 1019 ip_optcopy(struct ip *ip, struct ip *jp) 1020 { 1021 u_char *cp, *dp; 1022 int opt, optlen, cnt; 1023 1024 cp = (u_char *)(ip + 1); 1025 dp = (u_char *)(jp + 1); 1026 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 1027 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1028 opt = cp[0]; 1029 if (opt == IPOPT_EOL) 1030 break; 1031 if (opt == IPOPT_NOP) { 1032 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1033 *dp++ = IPOPT_NOP; 1034 optlen = 1; 1035 continue; 1036 } 1037 1038 KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp)); 1039 optlen = cp[IPOPT_OLEN]; 1040 KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen < cnt); 1041 1042 /* Invalid lengths should have been caught by ip_dooptions. */ 1043 if (optlen > cnt) 1044 optlen = cnt; 1045 if (IPOPT_COPIED(opt)) { 1046 bcopy((void *)cp, (void *)dp, (unsigned)optlen); 1047 dp += optlen; 1048 } 1049 } 1050 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1051 *dp++ = IPOPT_EOL; 1052 return (optlen); 1053 } 1054 1055 /* 1056 * IP socket option processing. 1057 */ 1058 int 1059 ip_ctloutput(int op, struct socket *so, struct sockopt *sopt) 1060 { 1061 struct inpcb *inp = sotoinpcb(so); 1062 struct ip *ip = &inp->inp_ip; 1063 int inpflags = inp->inp_flags; 1064 int optval = 0, error = 0; 1065 1066 if (sopt->sopt_level != IPPROTO_IP) { 1067 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) 1068 return 0; 1069 return ENOPROTOOPT; 1070 } 1071 1072 switch (op) { 1073 case PRCO_SETOPT: 1074 switch (sopt->sopt_name) { 1075 case IP_OPTIONS: 1076 #ifdef notyet 1077 case IP_RETOPTS: 1078 #endif 1079 error = ip_pcbopts(inp, sopt); 1080 break; 1081 1082 case IP_TOS: 1083 case IP_TTL: 1084 case IP_MINTTL: 1085 case IP_PKTINFO: 1086 case IP_RECVOPTS: 1087 case IP_RECVRETOPTS: 1088 case IP_RECVDSTADDR: 1089 case IP_RECVIF: 1090 case IP_RECVPKTINFO: 1091 case IP_RECVTTL: 1092 error = sockopt_getint(sopt, &optval); 1093 if (error) 1094 break; 1095 1096 switch (sopt->sopt_name) { 1097 case IP_TOS: 1098 ip->ip_tos = optval; 1099 break; 1100 1101 case IP_TTL: 1102 ip->ip_ttl = optval; 1103 break; 1104 1105 case IP_MINTTL: 1106 if (optval > 0 && optval <= MAXTTL) 1107 inp->inp_ip_minttl = optval; 1108 else 1109 error = EINVAL; 1110 break; 1111 #define OPTSET(bit) \ 1112 if (optval) \ 1113 inpflags |= bit; \ 1114 else \ 1115 inpflags &= ~bit; 1116 1117 case IP_PKTINFO: 1118 OPTSET(INP_PKTINFO); 1119 break; 1120 1121 case IP_RECVOPTS: 1122 OPTSET(INP_RECVOPTS); 1123 break; 1124 1125 case IP_RECVPKTINFO: 1126 OPTSET(INP_RECVPKTINFO); 1127 break; 1128 1129 case IP_RECVRETOPTS: 1130 OPTSET(INP_RECVRETOPTS); 1131 break; 1132 1133 case IP_RECVDSTADDR: 1134 OPTSET(INP_RECVDSTADDR); 1135 break; 1136 1137 case IP_RECVIF: 1138 OPTSET(INP_RECVIF); 1139 break; 1140 1141 case IP_RECVTTL: 1142 OPTSET(INP_RECVTTL); 1143 break; 1144 } 1145 break; 1146 #undef OPTSET 1147 1148 case IP_MULTICAST_IF: 1149 case IP_MULTICAST_TTL: 1150 case IP_MULTICAST_LOOP: 1151 case IP_ADD_MEMBERSHIP: 1152 case IP_DROP_MEMBERSHIP: 1153 error = ip_setmoptions(&inp->inp_moptions, sopt); 1154 break; 1155 1156 case IP_PORTRANGE: 1157 error = sockopt_getint(sopt, &optval); 1158 if (error) 1159 break; 1160 1161 switch (optval) { 1162 case IP_PORTRANGE_DEFAULT: 1163 case IP_PORTRANGE_HIGH: 1164 inpflags &= ~(INP_LOWPORT); 1165 break; 1166 1167 case IP_PORTRANGE_LOW: 1168 inpflags |= INP_LOWPORT; 1169 break; 1170 1171 default: 1172 error = EINVAL; 1173 break; 1174 } 1175 break; 1176 1177 case IP_PORTALGO: 1178 error = sockopt_getint(sopt, &optval); 1179 if (error) 1180 break; 1181 1182 error = portalgo_algo_index_select( 1183 (struct inpcb_hdr *)inp, optval); 1184 break; 1185 1186 #if defined(IPSEC) 1187 case IP_IPSEC_POLICY: 1188 if (ipsec_enabled) { 1189 error = ipsec4_set_policy(inp, sopt->sopt_name, 1190 sopt->sopt_data, sopt->sopt_size, 1191 curlwp->l_cred); 1192 break; 1193 } 1194 /*FALLTHROUGH*/ 1195 #endif /* IPSEC */ 1196 1197 default: 1198 error = ENOPROTOOPT; 1199 break; 1200 } 1201 break; 1202 1203 case PRCO_GETOPT: 1204 switch (sopt->sopt_name) { 1205 case IP_OPTIONS: 1206 case IP_RETOPTS: { 1207 struct mbuf *mopts = inp->inp_options; 1208 1209 if (mopts) { 1210 struct mbuf *m; 1211 1212 m = m_copym(mopts, 0, M_COPYALL, M_DONTWAIT); 1213 if (m == NULL) { 1214 error = ENOBUFS; 1215 break; 1216 } 1217 error = sockopt_setmbuf(sopt, m); 1218 } 1219 break; 1220 } 1221 case IP_PKTINFO: 1222 case IP_TOS: 1223 case IP_TTL: 1224 case IP_MINTTL: 1225 case IP_RECVOPTS: 1226 case IP_RECVRETOPTS: 1227 case IP_RECVDSTADDR: 1228 case IP_RECVIF: 1229 case IP_RECVPKTINFO: 1230 case IP_RECVTTL: 1231 case IP_ERRORMTU: 1232 switch (sopt->sopt_name) { 1233 case IP_TOS: 1234 optval = ip->ip_tos; 1235 break; 1236 1237 case IP_TTL: 1238 optval = ip->ip_ttl; 1239 break; 1240 1241 case IP_MINTTL: 1242 optval = inp->inp_ip_minttl; 1243 break; 1244 1245 case IP_ERRORMTU: 1246 optval = inp->inp_errormtu; 1247 break; 1248 1249 #define OPTBIT(bit) (inpflags & bit ? 1 : 0) 1250 1251 case IP_PKTINFO: 1252 optval = OPTBIT(INP_PKTINFO); 1253 break; 1254 1255 case IP_RECVOPTS: 1256 optval = OPTBIT(INP_RECVOPTS); 1257 break; 1258 1259 case IP_RECVPKTINFO: 1260 optval = OPTBIT(INP_RECVPKTINFO); 1261 break; 1262 1263 case IP_RECVRETOPTS: 1264 optval = OPTBIT(INP_RECVRETOPTS); 1265 break; 1266 1267 case IP_RECVDSTADDR: 1268 optval = OPTBIT(INP_RECVDSTADDR); 1269 break; 1270 1271 case IP_RECVIF: 1272 optval = OPTBIT(INP_RECVIF); 1273 break; 1274 1275 case IP_RECVTTL: 1276 optval = OPTBIT(INP_RECVTTL); 1277 break; 1278 } 1279 error = sockopt_setint(sopt, optval); 1280 break; 1281 1282 #if 0 /* defined(IPSEC) */ 1283 case IP_IPSEC_POLICY: 1284 { 1285 struct mbuf *m = NULL; 1286 1287 /* XXX this will return EINVAL as sopt is empty */ 1288 error = ipsec4_get_policy(inp, sopt->sopt_data, 1289 sopt->sopt_size, &m); 1290 if (error == 0) 1291 error = sockopt_setmbuf(sopt, m); 1292 break; 1293 } 1294 #endif /*IPSEC*/ 1295 1296 case IP_MULTICAST_IF: 1297 case IP_MULTICAST_TTL: 1298 case IP_MULTICAST_LOOP: 1299 case IP_ADD_MEMBERSHIP: 1300 case IP_DROP_MEMBERSHIP: 1301 error = ip_getmoptions(inp->inp_moptions, sopt); 1302 break; 1303 1304 case IP_PORTRANGE: 1305 if (inpflags & INP_LOWPORT) 1306 optval = IP_PORTRANGE_LOW; 1307 else 1308 optval = IP_PORTRANGE_DEFAULT; 1309 error = sockopt_setint(sopt, optval); 1310 break; 1311 1312 case IP_PORTALGO: 1313 optval = inp->inp_portalgo; 1314 error = sockopt_setint(sopt, optval); 1315 break; 1316 1317 default: 1318 error = ENOPROTOOPT; 1319 break; 1320 } 1321 break; 1322 } 1323 1324 if (!error) { 1325 inp->inp_flags = inpflags; 1326 } 1327 return error; 1328 } 1329 1330 /* 1331 * Set up IP options in pcb for insertion in output packets. 1332 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1333 * with destination address if source routed. 1334 */ 1335 static int 1336 ip_pcbopts(struct inpcb *inp, const struct sockopt *sopt) 1337 { 1338 struct mbuf *m; 1339 const u_char *cp; 1340 u_char *dp; 1341 int cnt; 1342 1343 /* Turn off any old options. */ 1344 if (inp->inp_options) { 1345 m_free(inp->inp_options); 1346 } 1347 inp->inp_options = NULL; 1348 if ((cnt = sopt->sopt_size) == 0) { 1349 /* Only turning off any previous options. */ 1350 return 0; 1351 } 1352 cp = sopt->sopt_data; 1353 1354 #ifndef __vax__ 1355 if (cnt % sizeof(int32_t)) 1356 return (EINVAL); 1357 #endif 1358 1359 m = m_get(M_DONTWAIT, MT_SOOPTS); 1360 if (m == NULL) 1361 return (ENOBUFS); 1362 1363 dp = mtod(m, u_char *); 1364 memset(dp, 0, sizeof(struct in_addr)); 1365 dp += sizeof(struct in_addr); 1366 m->m_len = sizeof(struct in_addr); 1367 1368 /* 1369 * IP option list according to RFC791. Each option is of the form 1370 * 1371 * [optval] [olen] [(olen - 2) data bytes] 1372 * 1373 * We validate the list and copy options to an mbuf for prepending 1374 * to data packets. The IP first-hop destination address will be 1375 * stored before actual options and is zero if unset. 1376 */ 1377 while (cnt > 0) { 1378 uint8_t optval, olen, offset; 1379 1380 optval = cp[IPOPT_OPTVAL]; 1381 1382 if (optval == IPOPT_EOL || optval == IPOPT_NOP) { 1383 olen = 1; 1384 } else { 1385 if (cnt < IPOPT_OLEN + 1) 1386 goto bad; 1387 1388 olen = cp[IPOPT_OLEN]; 1389 if (olen < IPOPT_OLEN + 1 || olen > cnt) 1390 goto bad; 1391 } 1392 1393 if (optval == IPOPT_LSRR || optval == IPOPT_SSRR) { 1394 /* 1395 * user process specifies route as: 1396 * ->A->B->C->D 1397 * D must be our final destination (but we can't 1398 * check that since we may not have connected yet). 1399 * A is first hop destination, which doesn't appear in 1400 * actual IP option, but is stored before the options. 1401 */ 1402 if (olen < IPOPT_OFFSET + 1 + sizeof(struct in_addr)) 1403 goto bad; 1404 1405 offset = cp[IPOPT_OFFSET]; 1406 memcpy(mtod(m, u_char *), cp + IPOPT_OFFSET + 1, 1407 sizeof(struct in_addr)); 1408 1409 cp += sizeof(struct in_addr); 1410 cnt -= sizeof(struct in_addr); 1411 olen -= sizeof(struct in_addr); 1412 1413 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr)) 1414 goto bad; 1415 1416 memcpy(dp, cp, olen); 1417 dp[IPOPT_OPTVAL] = optval; 1418 dp[IPOPT_OLEN] = olen; 1419 dp[IPOPT_OFFSET] = offset; 1420 break; 1421 } else { 1422 if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr)) 1423 goto bad; 1424 1425 memcpy(dp, cp, olen); 1426 break; 1427 } 1428 1429 dp += olen; 1430 m->m_len += olen; 1431 1432 if (optval == IPOPT_EOL) 1433 break; 1434 1435 cp += olen; 1436 cnt -= olen; 1437 } 1438 1439 inp->inp_options = m; 1440 return 0; 1441 bad: 1442 (void)m_free(m); 1443 return EINVAL; 1444 } 1445 1446 /* 1447 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1448 */ 1449 static struct ifnet * 1450 ip_multicast_if(struct in_addr *a, int *ifindexp) 1451 { 1452 int ifindex; 1453 struct ifnet *ifp = NULL; 1454 struct in_ifaddr *ia; 1455 1456 if (ifindexp) 1457 *ifindexp = 0; 1458 if (ntohl(a->s_addr) >> 24 == 0) { 1459 ifindex = ntohl(a->s_addr) & 0xffffff; 1460 ifp = if_byindex(ifindex); 1461 if (!ifp) 1462 return NULL; 1463 if (ifindexp) 1464 *ifindexp = ifindex; 1465 } else { 1466 LIST_FOREACH(ia, &IN_IFADDR_HASH(a->s_addr), ia_hash) { 1467 if (in_hosteq(ia->ia_addr.sin_addr, *a) && 1468 (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) { 1469 ifp = ia->ia_ifp; 1470 break; 1471 } 1472 } 1473 } 1474 return ifp; 1475 } 1476 1477 static int 1478 ip_getoptval(const struct sockopt *sopt, u_int8_t *val, u_int maxval) 1479 { 1480 u_int tval; 1481 u_char cval; 1482 int error; 1483 1484 if (sopt == NULL) 1485 return EINVAL; 1486 1487 switch (sopt->sopt_size) { 1488 case sizeof(u_char): 1489 error = sockopt_get(sopt, &cval, sizeof(u_char)); 1490 tval = cval; 1491 break; 1492 1493 case sizeof(u_int): 1494 error = sockopt_get(sopt, &tval, sizeof(u_int)); 1495 break; 1496 1497 default: 1498 error = EINVAL; 1499 } 1500 1501 if (error) 1502 return error; 1503 1504 if (tval > maxval) 1505 return EINVAL; 1506 1507 *val = tval; 1508 return 0; 1509 } 1510 1511 static int 1512 ip_get_membership(const struct sockopt *sopt, struct ifnet **ifp, 1513 struct in_addr *ia, bool add) 1514 { 1515 int error; 1516 struct ip_mreq mreq; 1517 1518 error = sockopt_get(sopt, &mreq, sizeof(mreq)); 1519 if (error) 1520 return error; 1521 1522 if (!IN_MULTICAST(mreq.imr_multiaddr.s_addr)) 1523 return EINVAL; 1524 1525 memcpy(ia, &mreq.imr_multiaddr, sizeof(*ia)); 1526 1527 if (in_nullhost(mreq.imr_interface)) { 1528 union { 1529 struct sockaddr dst; 1530 struct sockaddr_in dst4; 1531 } u; 1532 struct route ro; 1533 1534 if (!add) { 1535 *ifp = NULL; 1536 return 0; 1537 } 1538 /* 1539 * If no interface address was provided, use the interface of 1540 * the route to the given multicast address. 1541 */ 1542 struct rtentry *rt; 1543 memset(&ro, 0, sizeof(ro)); 1544 1545 sockaddr_in_init(&u.dst4, ia, 0); 1546 error = rtcache_setdst(&ro, &u.dst); 1547 if (error != 0) 1548 return error; 1549 *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL; 1550 rtcache_unref(rt, &ro); 1551 rtcache_free(&ro); 1552 } else { 1553 *ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1554 if (!add && *ifp == NULL) 1555 return EADDRNOTAVAIL; 1556 } 1557 return 0; 1558 } 1559 1560 /* 1561 * Add a multicast group membership. 1562 * Group must be a valid IP multicast address. 1563 */ 1564 static int 1565 ip_add_membership(struct ip_moptions *imo, const struct sockopt *sopt) 1566 { 1567 struct ifnet *ifp = NULL; // XXX: gcc [ppc] 1568 struct in_addr ia; 1569 int i, error; 1570 1571 if (sopt->sopt_size == sizeof(struct ip_mreq)) 1572 error = ip_get_membership(sopt, &ifp, &ia, true); 1573 else 1574 #ifdef INET6 1575 error = ip6_get_membership(sopt, &ifp, &ia, sizeof(ia)); 1576 #else 1577 return EINVAL; 1578 #endif 1579 1580 if (error) 1581 return error; 1582 1583 /* 1584 * See if we found an interface, and confirm that it 1585 * supports multicast. 1586 */ 1587 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 1588 return EADDRNOTAVAIL; 1589 1590 /* 1591 * See if the membership already exists or if all the 1592 * membership slots are full. 1593 */ 1594 for (i = 0; i < imo->imo_num_memberships; ++i) { 1595 if (imo->imo_membership[i]->inm_ifp == ifp && 1596 in_hosteq(imo->imo_membership[i]->inm_addr, ia)) 1597 break; 1598 } 1599 if (i < imo->imo_num_memberships) 1600 return EADDRINUSE; 1601 1602 if (i == IP_MAX_MEMBERSHIPS) 1603 return ETOOMANYREFS; 1604 1605 /* 1606 * Everything looks good; add a new record to the multicast 1607 * address list for the given interface. 1608 */ 1609 if ((imo->imo_membership[i] = in_addmulti(&ia, ifp)) == NULL) 1610 return ENOBUFS; 1611 1612 ++imo->imo_num_memberships; 1613 return 0; 1614 } 1615 1616 /* 1617 * Drop a multicast group membership. 1618 * Group must be a valid IP multicast address. 1619 */ 1620 static int 1621 ip_drop_membership(struct ip_moptions *imo, const struct sockopt *sopt) 1622 { 1623 struct in_addr ia = { .s_addr = 0 }; // XXX: gcc [ppc] 1624 struct ifnet *ifp = NULL; // XXX: gcc [ppc] 1625 int i, error; 1626 1627 if (sopt->sopt_size == sizeof(struct ip_mreq)) 1628 error = ip_get_membership(sopt, &ifp, &ia, false); 1629 else 1630 #ifdef INET6 1631 error = ip6_get_membership(sopt, &ifp, &ia, sizeof(ia)); 1632 #else 1633 return EINVAL; 1634 #endif 1635 1636 if (error) 1637 return error; 1638 1639 /* 1640 * Find the membership in the membership array. 1641 */ 1642 for (i = 0; i < imo->imo_num_memberships; ++i) { 1643 if ((ifp == NULL || 1644 imo->imo_membership[i]->inm_ifp == ifp) && 1645 in_hosteq(imo->imo_membership[i]->inm_addr, ia)) 1646 break; 1647 } 1648 if (i == imo->imo_num_memberships) 1649 return EADDRNOTAVAIL; 1650 1651 /* 1652 * Give up the multicast address record to which the 1653 * membership points. 1654 */ 1655 in_delmulti(imo->imo_membership[i]); 1656 1657 /* 1658 * Remove the gap in the membership array. 1659 */ 1660 for (++i; i < imo->imo_num_memberships; ++i) 1661 imo->imo_membership[i-1] = imo->imo_membership[i]; 1662 --imo->imo_num_memberships; 1663 return 0; 1664 } 1665 1666 /* 1667 * Set the IP multicast options in response to user setsockopt(). 1668 */ 1669 int 1670 ip_setmoptions(struct ip_moptions **pimo, const struct sockopt *sopt) 1671 { 1672 struct ip_moptions *imo = *pimo; 1673 struct in_addr addr; 1674 struct ifnet *ifp; 1675 int ifindex, error = 0; 1676 1677 if (!imo) { 1678 /* 1679 * No multicast option buffer attached to the pcb; 1680 * allocate one and initialize to default values. 1681 */ 1682 imo = kmem_intr_alloc(sizeof(*imo), KM_NOSLEEP); 1683 if (imo == NULL) 1684 return ENOBUFS; 1685 1686 imo->imo_multicast_if_index = 0; 1687 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1688 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1689 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1690 imo->imo_num_memberships = 0; 1691 *pimo = imo; 1692 } 1693 1694 switch (sopt->sopt_name) { 1695 case IP_MULTICAST_IF: 1696 /* 1697 * Select the interface for outgoing multicast packets. 1698 */ 1699 error = sockopt_get(sopt, &addr, sizeof(addr)); 1700 if (error) 1701 break; 1702 1703 /* 1704 * INADDR_ANY is used to remove a previous selection. 1705 * When no interface is selected, a default one is 1706 * chosen every time a multicast packet is sent. 1707 */ 1708 if (in_nullhost(addr)) { 1709 imo->imo_multicast_if_index = 0; 1710 break; 1711 } 1712 /* 1713 * The selected interface is identified by its local 1714 * IP address. Find the interface and confirm that 1715 * it supports multicasting. 1716 */ 1717 ifp = ip_multicast_if(&addr, &ifindex); 1718 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1719 error = EADDRNOTAVAIL; 1720 break; 1721 } 1722 imo->imo_multicast_if_index = ifp->if_index; 1723 if (ifindex) 1724 imo->imo_multicast_addr = addr; 1725 else 1726 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1727 break; 1728 1729 case IP_MULTICAST_TTL: 1730 /* 1731 * Set the IP time-to-live for outgoing multicast packets. 1732 */ 1733 error = ip_getoptval(sopt, &imo->imo_multicast_ttl, MAXTTL); 1734 break; 1735 1736 case IP_MULTICAST_LOOP: 1737 /* 1738 * Set the loopback flag for outgoing multicast packets. 1739 * Must be zero or one. 1740 */ 1741 error = ip_getoptval(sopt, &imo->imo_multicast_loop, 1); 1742 break; 1743 1744 case IP_ADD_MEMBERSHIP: /* IPV6_JOIN_GROUP */ 1745 error = ip_add_membership(imo, sopt); 1746 break; 1747 1748 case IP_DROP_MEMBERSHIP: /* IPV6_LEAVE_GROUP */ 1749 error = ip_drop_membership(imo, sopt); 1750 break; 1751 1752 default: 1753 error = EOPNOTSUPP; 1754 break; 1755 } 1756 1757 /* 1758 * If all options have default values, no need to keep the mbuf. 1759 */ 1760 if (imo->imo_multicast_if_index == 0 && 1761 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1762 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1763 imo->imo_num_memberships == 0) { 1764 kmem_free(imo, sizeof(*imo)); 1765 *pimo = NULL; 1766 } 1767 1768 return error; 1769 } 1770 1771 /* 1772 * Return the IP multicast options in response to user getsockopt(). 1773 */ 1774 int 1775 ip_getmoptions(struct ip_moptions *imo, struct sockopt *sopt) 1776 { 1777 struct in_addr addr; 1778 uint8_t optval; 1779 int error = 0; 1780 1781 switch (sopt->sopt_name) { 1782 case IP_MULTICAST_IF: 1783 if (imo == NULL || imo->imo_multicast_if_index == 0) 1784 addr = zeroin_addr; 1785 else if (imo->imo_multicast_addr.s_addr) { 1786 /* return the value user has set */ 1787 addr = imo->imo_multicast_addr; 1788 } else { 1789 struct ifnet *ifp; 1790 struct in_ifaddr *ia = NULL; 1791 int s = pserialize_read_enter(); 1792 1793 ifp = if_byindex(imo->imo_multicast_if_index); 1794 if (ifp != NULL) { 1795 ia = in_get_ia_from_ifp(ifp); 1796 } 1797 addr = ia ? ia->ia_addr.sin_addr : zeroin_addr; 1798 pserialize_read_exit(s); 1799 } 1800 error = sockopt_set(sopt, &addr, sizeof(addr)); 1801 break; 1802 1803 case IP_MULTICAST_TTL: 1804 optval = imo ? imo->imo_multicast_ttl 1805 : IP_DEFAULT_MULTICAST_TTL; 1806 1807 error = sockopt_set(sopt, &optval, sizeof(optval)); 1808 break; 1809 1810 case IP_MULTICAST_LOOP: 1811 optval = imo ? imo->imo_multicast_loop 1812 : IP_DEFAULT_MULTICAST_LOOP; 1813 1814 error = sockopt_set(sopt, &optval, sizeof(optval)); 1815 break; 1816 1817 default: 1818 error = EOPNOTSUPP; 1819 } 1820 1821 return error; 1822 } 1823 1824 /* 1825 * Discard the IP multicast options. 1826 */ 1827 void 1828 ip_freemoptions(struct ip_moptions *imo) 1829 { 1830 int i; 1831 1832 if (imo != NULL) { 1833 for (i = 0; i < imo->imo_num_memberships; ++i) 1834 in_delmulti(imo->imo_membership[i]); 1835 kmem_free(imo, sizeof(*imo)); 1836 } 1837 } 1838 1839 /* 1840 * Routine called from ip_output() to loop back a copy of an IP multicast 1841 * packet to the input queue of a specified interface. Note that this 1842 * calls the output routine of the loopback "driver", but with an interface 1843 * pointer that might NOT be lo0ifp -- easier than replicating that code here. 1844 */ 1845 static void 1846 ip_mloopback(struct ifnet *ifp, struct mbuf *m, const struct sockaddr_in *dst) 1847 { 1848 struct ip *ip; 1849 struct mbuf *copym; 1850 1851 copym = m_copypacket(m, M_DONTWAIT); 1852 if (copym != NULL && 1853 (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip))) 1854 copym = m_pullup(copym, sizeof(struct ip)); 1855 if (copym == NULL) 1856 return; 1857 /* 1858 * We don't bother to fragment if the IP length is greater 1859 * than the interface's MTU. Can this possibly matter? 1860 */ 1861 ip = mtod(copym, struct ip *); 1862 1863 if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { 1864 in_delayed_cksum(copym); 1865 copym->m_pkthdr.csum_flags &= 1866 ~(M_CSUM_TCPv4|M_CSUM_UDPv4); 1867 } 1868 1869 ip->ip_sum = 0; 1870 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1871 #ifndef NET_MPSAFE 1872 KERNEL_LOCK(1, NULL); 1873 #endif 1874 (void)looutput(ifp, copym, sintocsa(dst), NULL); 1875 #ifndef NET_MPSAFE 1876 KERNEL_UNLOCK_ONE(NULL); 1877 #endif 1878 } 1879 1880 /* 1881 * Ensure sending address is valid. 1882 * Returns 0 on success, -1 if an error should be sent back or 1 1883 * if the packet could be dropped without error (protocol dependent). 1884 */ 1885 static int 1886 ip_ifaddrvalid(const struct in_ifaddr *ia) 1887 { 1888 1889 if (ia == NULL) 1890 return -1; 1891 1892 if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY) 1893 return 0; 1894 1895 if (ia->ia4_flags & IN_IFF_DUPLICATED) 1896 return -1; 1897 else if (ia->ia4_flags & (IN_IFF_TENTATIVE | IN_IFF_DETACHED)) 1898 return 1; 1899 1900 return 0; 1901 } 1902