1 /* $OpenBSD: ip_output.c,v 1.327 2016/09/04 17:18:56 mpi Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 33 */ 34 35 #include "pf.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_enc.h> 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/ip.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_icmp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/udp.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/udp_var.h> 62 63 #if NPF > 0 64 #include <net/pfvar.h> 65 #endif 66 67 #ifdef IPSEC 68 #ifdef ENCDEBUG 69 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 70 #else 71 #define DPRINTF(x) 72 #endif 73 #endif /* IPSEC */ 74 75 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *); 76 static __inline u_int16_t __attribute__((__unused__)) 77 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t); 78 void in_delayed_cksum(struct mbuf *); 79 80 #ifdef IPSEC 81 struct tdb * 82 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 83 int ipsecflowinfo); 84 int 85 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct ifnet *ifp, 86 struct route *ro); 87 #endif /* IPSEC */ 88 89 /* 90 * IP output. The packet in mbuf chain m contains a skeletal IP 91 * header (with len, off, ttl, proto, tos, src, dst). 92 * The mbuf chain containing the packet will be freed. 93 * The mbuf opt, if present, will not be freed. 94 */ 95 int 96 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, 97 struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) 98 { 99 struct ip *ip; 100 struct ifnet *ifp = NULL; 101 struct mbuf *m = m0; 102 int hlen = sizeof (struct ip); 103 int len, error = 0; 104 struct route iproute; 105 struct sockaddr_in *dst; 106 struct tdb *tdb = NULL; 107 u_long mtu; 108 #if defined(MROUTING) 109 int rv; 110 #endif 111 112 #ifdef IPSEC 113 if (inp && (inp->inp_flags & INP_IPV6) != 0) 114 panic("ip_output: IPv6 pcb is passed"); 115 #endif /* IPSEC */ 116 117 #ifdef DIAGNOSTIC 118 if ((m->m_flags & M_PKTHDR) == 0) 119 panic("ip_output no HDR"); 120 #endif 121 if (opt) { 122 m = ip_insertoptions(m, opt, &len); 123 hlen = len; 124 } 125 126 ip = mtod(m, struct ip *); 127 128 /* 129 * Fill in IP header. 130 */ 131 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 132 ip->ip_v = IPVERSION; 133 ip->ip_off &= htons(IP_DF); 134 ip->ip_id = htons(ip_randomid()); 135 ip->ip_hl = hlen >> 2; 136 ipstat.ips_localout++; 137 } else { 138 hlen = ip->ip_hl << 2; 139 } 140 141 /* 142 * We should not send traffic to 0/8 say both Stevens and RFCs 143 * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6. 144 */ 145 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) { 146 error = ENETUNREACH; 147 goto bad; 148 } 149 150 #if NPF > 0 151 reroute: 152 #endif 153 154 /* 155 * Do a route lookup now in case we need the source address to 156 * do an SPD lookup in IPsec; for most packets, the source address 157 * is set at a higher level protocol. ICMPs and other packets 158 * though (e.g., traceroute) have a source address of zeroes. 159 */ 160 if (ro == NULL) { 161 ro = &iproute; 162 memset(ro, 0, sizeof(*ro)); 163 } 164 165 dst = satosin(&ro->ro_dst); 166 167 /* 168 * If there is a cached route, check that it is to the same 169 * destination and is still up. If not, free it and try again. 170 */ 171 if (!rtisvalid(ro->ro_rt) || 172 dst->sin_addr.s_addr != ip->ip_dst.s_addr || 173 ro->ro_tableid != m->m_pkthdr.ph_rtableid) { 174 rtfree(ro->ro_rt); 175 ro->ro_rt = NULL; 176 } 177 178 if (ro->ro_rt == NULL) { 179 dst->sin_family = AF_INET; 180 dst->sin_len = sizeof(*dst); 181 dst->sin_addr = ip->ip_dst; 182 ro->ro_tableid = m->m_pkthdr.ph_rtableid; 183 } 184 185 if ((IN_MULTICAST(ip->ip_dst.s_addr) || 186 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 187 imo != NULL && (ifp = if_get(imo->imo_ifidx)) != NULL) { 188 189 mtu = ifp->if_mtu; 190 if (ip->ip_src.s_addr == INADDR_ANY) { 191 struct in_ifaddr *ia; 192 193 KERNEL_LOCK(); 194 IFP_TO_IA(ifp, ia); 195 if (ia != NULL) 196 ip->ip_src = ia->ia_addr.sin_addr; 197 KERNEL_UNLOCK(); 198 } 199 } else { 200 struct in_ifaddr *ia; 201 202 if (ro->ro_rt == NULL) 203 ro->ro_rt = rtalloc_mpath(&ro->ro_dst, 204 &ip->ip_src.s_addr, ro->ro_tableid); 205 206 if (ro->ro_rt == NULL) { 207 ipstat.ips_noroute++; 208 error = EHOSTUNREACH; 209 goto bad; 210 } 211 212 ia = ifatoia(ro->ro_rt->rt_ifa); 213 if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL)) 214 ifp = if_get(lo0ifidx); 215 else 216 ifp = if_get(ro->ro_rt->rt_ifidx); 217 if (ifp == NULL) { 218 error = EHOSTUNREACH; 219 goto bad; 220 } 221 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0) 222 mtu = ifp->if_mtu; 223 224 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 225 dst = satosin(ro->ro_rt->rt_gateway); 226 227 /* Set the source IP address */ 228 if (ip->ip_src.s_addr == INADDR_ANY && ia) 229 ip->ip_src = ia->ia_addr.sin_addr; 230 } 231 232 #ifdef IPSEC 233 if (ipsec_in_use || inp != NULL) { 234 KERNEL_LOCK(); 235 /* Do we have any pending SAs to apply ? */ 236 tdb = ip_output_ipsec_lookup(m, hlen, &error, inp, 237 ipsecflowinfo); 238 KERNEL_UNLOCK(); 239 if (error != 0) { 240 /* Should silently drop packet */ 241 if (error == -EINVAL) 242 error = 0; 243 m_freem(m); 244 goto done; 245 } 246 if (tdb != NULL) { 247 /* 248 * If it needs TCP/UDP hardware-checksumming, do the 249 * computation now. 250 */ 251 in_proto_cksum_out(m, NULL); 252 253 /* If it's not a multicast packet, try to fast-path */ 254 if (!IN_MULTICAST(ip->ip_dst.s_addr)) { 255 goto sendit; 256 } 257 } 258 } 259 #endif /* IPSEC */ 260 261 if (IN_MULTICAST(ip->ip_dst.s_addr) || 262 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 263 264 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 265 M_BCAST : M_MCAST; 266 267 /* 268 * IP destination address is multicast. Make sure "dst" 269 * still points to the address in "ro". (It may have been 270 * changed to point to a gateway address, above.) 271 */ 272 dst = satosin(&ro->ro_dst); 273 274 /* 275 * See if the caller provided any multicast options 276 */ 277 if (imo != NULL) 278 ip->ip_ttl = imo->imo_ttl; 279 else 280 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 281 282 /* 283 * if we don't know the outgoing ifp yet, we can't generate 284 * output 285 */ 286 if (!ifp) { 287 ipstat.ips_noroute++; 288 error = EHOSTUNREACH; 289 goto bad; 290 } 291 292 /* 293 * Confirm that the outgoing interface supports multicast, 294 * but only if the packet actually is going out on that 295 * interface (i.e., no IPsec is applied). 296 */ 297 if ((((m->m_flags & M_MCAST) && 298 (ifp->if_flags & IFF_MULTICAST) == 0) || 299 ((m->m_flags & M_BCAST) && 300 (ifp->if_flags & IFF_BROADCAST) == 0)) && (tdb == NULL)) { 301 ipstat.ips_noroute++; 302 error = ENETUNREACH; 303 goto bad; 304 } 305 306 /* 307 * If source address not specified yet, use address 308 * of outgoing interface. 309 */ 310 if (ip->ip_src.s_addr == INADDR_ANY) { 311 struct in_ifaddr *ia; 312 313 KERNEL_LOCK(); 314 IFP_TO_IA(ifp, ia); 315 if (ia != NULL) 316 ip->ip_src = ia->ia_addr.sin_addr; 317 KERNEL_UNLOCK(); 318 } 319 320 if ((imo == NULL || imo->imo_loop) && 321 in_hasmulti(&ip->ip_dst, ifp)) { 322 /* 323 * If we belong to the destination multicast group 324 * on the outgoing interface, and the caller did not 325 * forbid loopback, loop back a copy. 326 * Can't defer TCP/UDP checksumming, do the 327 * computation now. 328 */ 329 in_proto_cksum_out(m, NULL); 330 ip_mloopback(ifp, m, dst); 331 } 332 #ifdef MROUTING 333 else { 334 /* 335 * If we are acting as a multicast router, perform 336 * multicast forwarding as if the packet had just 337 * arrived on the interface to which we are about 338 * to send. The multicast forwarding function 339 * recursively calls this function, using the 340 * IP_FORWARDING flag to prevent infinite recursion. 341 * 342 * Multicasts that are looped back by ip_mloopback(), 343 * above, will be forwarded by the ip_input() routine, 344 * if necessary. 345 */ 346 if (ipmforwarding && ip_mrouter && 347 (flags & IP_FORWARDING) == 0) { 348 KERNEL_LOCK(); 349 rv = ip_mforward(m, ifp); 350 KERNEL_UNLOCK(); 351 if (rv != 0) { 352 m_freem(m); 353 goto done; 354 } 355 } 356 } 357 #endif 358 /* 359 * Multicasts with a time-to-live of zero may be looped- 360 * back, above, but must not be transmitted on a network. 361 * Also, multicasts addressed to the loopback interface 362 * are not sent -- the above call to ip_mloopback() will 363 * loop back a copy if this host actually belongs to the 364 * destination group on the loopback interface. 365 */ 366 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 367 m_freem(m); 368 goto done; 369 } 370 371 goto sendit; 372 } 373 374 /* 375 * Look for broadcast address and verify user is allowed to send 376 * such a packet; if the packet is going in an IPsec tunnel, skip 377 * this check. 378 */ 379 if ((tdb == NULL) && ((dst->sin_addr.s_addr == INADDR_BROADCAST) || 380 (ro && ro->ro_rt && ISSET(ro->ro_rt->rt_flags, RTF_BROADCAST)))) { 381 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 382 error = EADDRNOTAVAIL; 383 goto bad; 384 } 385 if ((flags & IP_ALLOWBROADCAST) == 0) { 386 error = EACCES; 387 goto bad; 388 } 389 390 /* Don't allow broadcast messages to be fragmented */ 391 if (ntohs(ip->ip_len) > ifp->if_mtu) { 392 error = EMSGSIZE; 393 goto bad; 394 } 395 m->m_flags |= M_BCAST; 396 } else 397 m->m_flags &= ~M_BCAST; 398 399 sendit: 400 /* 401 * If we're doing Path MTU discovery, we need to set DF unless 402 * the route's MTU is locked. 403 */ 404 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 405 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 406 ip->ip_off |= htons(IP_DF); 407 408 #ifdef IPSEC 409 /* 410 * Check if the packet needs encapsulation. 411 */ 412 if (tdb != NULL) { 413 KERNEL_LOCK(); 414 /* Callee frees mbuf */ 415 error = ip_output_ipsec_send(tdb, m, ifp, ro); 416 KERNEL_UNLOCK(); 417 goto done; 418 } 419 #endif /* IPSEC */ 420 421 /* 422 * Packet filter 423 */ 424 #if NPF > 0 425 if (pf_test(AF_INET, PF_OUT, ifp, &m) != PF_PASS) { 426 error = EACCES; 427 m_freem(m); 428 goto done; 429 } 430 if (m == NULL) 431 goto done; 432 ip = mtod(m, struct ip *); 433 hlen = ip->ip_hl << 2; 434 if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) == 435 (PF_TAG_REROUTE | PF_TAG_GENERATED)) 436 /* already rerun the route lookup, go on */ 437 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE); 438 else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { 439 /* tag as generated to skip over pf_test on rerun */ 440 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 441 ro = NULL; 442 if_put(ifp); /* drop reference since target changed */ 443 ifp = NULL; 444 goto reroute; 445 } 446 #endif 447 in_proto_cksum_out(m, ifp); 448 449 #ifdef IPSEC 450 if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) && 451 (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) { 452 error = EHOSTUNREACH; 453 m_freem(m); 454 goto done; 455 } 456 #endif 457 458 /* 459 * If small enough for interface, can just send directly. 460 */ 461 if (ntohs(ip->ip_len) <= mtu) { 462 ip->ip_sum = 0; 463 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 464 (ifp->if_bridgeport == NULL)) 465 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 466 else { 467 ipstat.ips_outswcsum++; 468 ip->ip_sum = in_cksum(m, hlen); 469 } 470 471 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 472 goto done; 473 } 474 475 /* 476 * Too large for interface; fragment if possible. 477 * Must be able to put at least 8 bytes per fragment. 478 */ 479 if (ip->ip_off & htons(IP_DF)) { 480 #ifdef IPSEC 481 if (ip_mtudisc) 482 ipsec_adjust_mtu(m, ifp->if_mtu); 483 #endif 484 error = EMSGSIZE; 485 /* 486 * This case can happen if the user changed the MTU 487 * of an interface after enabling IP on it. Because 488 * most netifs don't keep track of routes pointing to 489 * them, there is no way for one to update all its 490 * routes when the MTU is changed. 491 */ 492 if (rtisvalid(ro->ro_rt) && 493 ISSET(ro->ro_rt->rt_flags, RTF_HOST) && 494 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && 495 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 496 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 497 } 498 ipstat.ips_cantfrag++; 499 goto bad; 500 } 501 502 error = ip_fragment(m, ifp, mtu); 503 if (error) { 504 m = m0 = NULL; 505 goto bad; 506 } 507 508 for (; m; m = m0) { 509 m0 = m->m_nextpkt; 510 m->m_nextpkt = 0; 511 if (error == 0) 512 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 513 else 514 m_freem(m); 515 } 516 517 if (error == 0) 518 ipstat.ips_fragmented++; 519 520 done: 521 if (ro == &iproute && ro->ro_rt) 522 rtfree(ro->ro_rt); 523 if_put(ifp); 524 return (error); 525 bad: 526 m_freem(m0); 527 goto done; 528 } 529 530 #ifdef IPSEC 531 struct tdb * 532 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 533 int ipsecflowinfo) 534 { 535 struct m_tag *mtag; 536 struct tdb_ident *tdbi; 537 struct tdb *tdb; 538 539 /* Do we have any pending SAs to apply ? */ 540 tdb = ipsp_spd_lookup(m, AF_INET, hlen, error, IPSP_DIRECTION_OUT, 541 NULL, inp, ipsecflowinfo); 542 if (tdb == NULL) 543 return NULL; 544 /* Loop detection */ 545 for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { 546 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE) 547 continue; 548 tdbi = (struct tdb_ident *)(mtag + 1); 549 if (tdbi->spi == tdb->tdb_spi && 550 tdbi->proto == tdb->tdb_sproto && 551 tdbi->rdomain == tdb->tdb_rdomain && 552 !memcmp(&tdbi->dst, &tdb->tdb_dst, 553 sizeof(union sockaddr_union))) { 554 /* no IPsec needed */ 555 return NULL; 556 } 557 } 558 return tdb; 559 } 560 561 int 562 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct ifnet *ifp, 563 struct route *ro) 564 { 565 #if NPF > 0 566 struct ifnet *encif; 567 #endif 568 struct ip *ip; 569 570 #if NPF > 0 571 /* 572 * Packet filter 573 */ 574 if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || 575 pf_test(AF_INET, PF_OUT, encif, &m) != PF_PASS) { 576 m_freem(m); 577 return EACCES; 578 } 579 if (m == NULL) 580 return 0; 581 /* 582 * PF_TAG_REROUTE handling or not... 583 * Packet is entering IPsec so the routing is 584 * already overruled by the IPsec policy. 585 * Until now the change was not reconsidered. 586 * What's the behaviour? 587 */ 588 in_proto_cksum_out(m, encif); 589 #endif 590 591 /* Check if we are allowed to fragment */ 592 ip = mtod(m, struct ip *); 593 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && 594 ntohs(ip->ip_len) > tdb->tdb_mtu && 595 tdb->tdb_mtutimeout > time_second) { 596 struct rtentry *rt = NULL; 597 int rt_mtucloned = 0; 598 int transportmode = 0; 599 600 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) && 601 (tdb->tdb_dst.sin.sin_addr.s_addr == ip->ip_dst.s_addr); 602 603 /* Find a host route to store the mtu in */ 604 if (ro != NULL) 605 rt = ro->ro_rt; 606 /* but don't add a PMTU route for transport mode SAs */ 607 if (transportmode) 608 rt = NULL; 609 else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 610 rt = icmp_mtudisc_clone(ip->ip_dst, 611 m->m_pkthdr.ph_rtableid); 612 rt_mtucloned = 1; 613 } 614 DPRINTF(("%s: spi %08x mtu %d rt %p cloned %d\n", __func__, 615 ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned)); 616 if (rt != NULL) { 617 rt->rt_rmx.rmx_mtu = tdb->tdb_mtu; 618 if (ro && ro->ro_rt != NULL) { 619 rtfree(ro->ro_rt); 620 ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE, 621 m->m_pkthdr.ph_rtableid); 622 } 623 if (rt_mtucloned) 624 rtfree(rt); 625 } 626 ipsec_adjust_mtu(m, tdb->tdb_mtu); 627 m_freem(m); 628 return EMSGSIZE; 629 } 630 631 /* 632 * Clear these -- they'll be set in the recursive invocation 633 * as needed. 634 */ 635 m->m_flags &= ~(M_MCAST | M_BCAST); 636 637 /* Callee frees mbuf */ 638 return ipsp_process_packet(m, tdb, AF_INET, 0); 639 } 640 #endif /* IPSEC */ 641 642 int 643 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 644 { 645 struct ip *ip, *mhip; 646 struct mbuf *m0; 647 int len, hlen, off; 648 int mhlen, firstlen; 649 struct mbuf **mnext; 650 int fragments = 0; 651 int error = 0; 652 653 ip = mtod(m, struct ip *); 654 hlen = ip->ip_hl << 2; 655 656 len = (mtu - hlen) &~ 7; 657 if (len < 8) { 658 m_freem(m); 659 return (EMSGSIZE); 660 } 661 662 /* 663 * If we are doing fragmentation, we can't defer TCP/UDP 664 * checksumming; compute the checksum and clear the flag. 665 */ 666 in_proto_cksum_out(m, NULL); 667 firstlen = len; 668 mnext = &m->m_nextpkt; 669 670 /* 671 * Loop through length of segment after first fragment, 672 * make new header and copy data of each part and link onto chain. 673 */ 674 m0 = m; 675 mhlen = sizeof (struct ip); 676 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 677 MGETHDR(m, M_DONTWAIT, MT_HEADER); 678 if (m == NULL) { 679 ipstat.ips_odropped++; 680 error = ENOBUFS; 681 goto sendorfree; 682 } 683 *mnext = m; 684 mnext = &m->m_nextpkt; 685 m->m_data += max_linkhdr; 686 mhip = mtod(m, struct ip *); 687 *mhip = *ip; 688 /* we must inherit MCAST/BCAST flags, routing table and prio */ 689 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 690 m->m_pkthdr.ph_rtableid = m0->m_pkthdr.ph_rtableid; 691 m->m_pkthdr.pf.prio = m0->m_pkthdr.pf.prio; 692 if (hlen > sizeof (struct ip)) { 693 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 694 mhip->ip_hl = mhlen >> 2; 695 } 696 m->m_len = mhlen; 697 mhip->ip_off = ((off - hlen) >> 3) + 698 (ntohs(ip->ip_off) & ~IP_MF); 699 if (ip->ip_off & htons(IP_MF)) 700 mhip->ip_off |= IP_MF; 701 if (off + len >= ntohs(ip->ip_len)) 702 len = ntohs(ip->ip_len) - off; 703 else 704 mhip->ip_off |= IP_MF; 705 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 706 m->m_next = m_copym(m0, off, len, M_NOWAIT); 707 if (m->m_next == 0) { 708 ipstat.ips_odropped++; 709 error = ENOBUFS; 710 goto sendorfree; 711 } 712 m->m_pkthdr.len = mhlen + len; 713 m->m_pkthdr.ph_ifidx = 0; 714 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 715 mhip->ip_sum = 0; 716 if ((ifp != NULL) && 717 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 718 (ifp->if_bridgeport == NULL)) 719 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 720 else { 721 ipstat.ips_outswcsum++; 722 mhip->ip_sum = in_cksum(m, mhlen); 723 } 724 ipstat.ips_ofragments++; 725 fragments++; 726 } 727 /* 728 * Update first fragment by trimming what's been copied out 729 * and updating header, then send each fragment (in order). 730 */ 731 m = m0; 732 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 733 m->m_pkthdr.len = hlen + firstlen; 734 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 735 ip->ip_off |= htons(IP_MF); 736 ip->ip_sum = 0; 737 if ((ifp != NULL) && 738 (ifp->if_capabilities & IFCAP_CSUM_IPv4) && 739 (ifp->if_bridgeport == NULL)) 740 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 741 else { 742 ipstat.ips_outswcsum++; 743 ip->ip_sum = in_cksum(m, hlen); 744 } 745 sendorfree: 746 if (error) { 747 for (m = m0; m; m = m0) { 748 m0 = m->m_nextpkt; 749 m->m_nextpkt = NULL; 750 m_freem(m); 751 } 752 } 753 754 return (error); 755 } 756 757 /* 758 * Insert IP options into preformed packet. 759 * Adjust IP destination as required for IP source routing, 760 * as indicated by a non-zero in_addr at the start of the options. 761 */ 762 struct mbuf * 763 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 764 { 765 struct ipoption *p = mtod(opt, struct ipoption *); 766 struct mbuf *n; 767 struct ip *ip = mtod(m, struct ip *); 768 unsigned int optlen; 769 770 optlen = opt->m_len - sizeof(p->ipopt_dst); 771 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 772 return (m); /* XXX should fail */ 773 if (p->ipopt_dst.s_addr) 774 ip->ip_dst = p->ipopt_dst; 775 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 776 MGETHDR(n, M_DONTWAIT, MT_HEADER); 777 if (n == NULL) 778 return (m); 779 M_MOVE_HDR(n, m); 780 n->m_pkthdr.len += optlen; 781 m->m_len -= sizeof(struct ip); 782 m->m_data += sizeof(struct ip); 783 n->m_next = m; 784 m = n; 785 m->m_len = optlen + sizeof(struct ip); 786 m->m_data += max_linkhdr; 787 memcpy(mtod(m, caddr_t), ip, sizeof(struct ip)); 788 } else { 789 m->m_data -= optlen; 790 m->m_len += optlen; 791 m->m_pkthdr.len += optlen; 792 memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip)); 793 } 794 ip = mtod(m, struct ip *); 795 memcpy(ip + 1, p->ipopt_list, optlen); 796 *phlen = sizeof(struct ip) + optlen; 797 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 798 return (m); 799 } 800 801 /* 802 * Copy options from ip to jp, 803 * omitting those not copied during fragmentation. 804 */ 805 int 806 ip_optcopy(struct ip *ip, struct ip *jp) 807 { 808 u_char *cp, *dp; 809 int opt, optlen, cnt; 810 811 cp = (u_char *)(ip + 1); 812 dp = (u_char *)(jp + 1); 813 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 814 for (; cnt > 0; cnt -= optlen, cp += optlen) { 815 opt = cp[0]; 816 if (opt == IPOPT_EOL) 817 break; 818 if (opt == IPOPT_NOP) { 819 /* Preserve for IP mcast tunnel's LSRR alignment. */ 820 *dp++ = IPOPT_NOP; 821 optlen = 1; 822 continue; 823 } 824 #ifdef DIAGNOSTIC 825 if (cnt < IPOPT_OLEN + sizeof(*cp)) 826 panic("malformed IPv4 option passed to ip_optcopy"); 827 #endif 828 optlen = cp[IPOPT_OLEN]; 829 #ifdef DIAGNOSTIC 830 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 831 panic("malformed IPv4 option passed to ip_optcopy"); 832 #endif 833 /* bogus lengths should have been caught by ip_dooptions */ 834 if (optlen > cnt) 835 optlen = cnt; 836 if (IPOPT_COPIED(opt)) { 837 memcpy(dp, cp, optlen); 838 dp += optlen; 839 } 840 } 841 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 842 *dp++ = IPOPT_EOL; 843 return (optlen); 844 } 845 846 /* 847 * IP socket option processing. 848 */ 849 int 850 ip_ctloutput(int op, struct socket *so, int level, int optname, 851 struct mbuf **mp) 852 { 853 struct inpcb *inp = sotoinpcb(so); 854 struct mbuf *m = *mp; 855 int optval = 0; 856 struct proc *p = curproc; /* XXX */ 857 int error = 0; 858 u_int rtid = 0; 859 860 if (level != IPPROTO_IP) { 861 error = EINVAL; 862 if (op == PRCO_SETOPT) 863 (void) m_free(*mp); 864 } else switch (op) { 865 case PRCO_SETOPT: 866 switch (optname) { 867 case IP_OPTIONS: 868 return (ip_pcbopts(&inp->inp_options, m)); 869 870 case IP_TOS: 871 case IP_TTL: 872 case IP_MINTTL: 873 case IP_RECVOPTS: 874 case IP_RECVRETOPTS: 875 case IP_RECVDSTADDR: 876 case IP_RECVIF: 877 case IP_RECVTTL: 878 case IP_RECVDSTPORT: 879 case IP_RECVRTABLE: 880 case IP_IPSECFLOWINFO: 881 if (m == NULL || m->m_len != sizeof(int)) 882 error = EINVAL; 883 else { 884 optval = *mtod(m, int *); 885 switch (optname) { 886 887 case IP_TOS: 888 inp->inp_ip.ip_tos = optval; 889 break; 890 891 case IP_TTL: 892 if (optval > 0 && optval <= MAXTTL) 893 inp->inp_ip.ip_ttl = optval; 894 else if (optval == -1) 895 inp->inp_ip.ip_ttl = ip_defttl; 896 else 897 error = EINVAL; 898 break; 899 900 case IP_MINTTL: 901 if (optval >= 0 && optval <= MAXTTL) 902 inp->inp_ip_minttl = optval; 903 else 904 error = EINVAL; 905 break; 906 #define OPTSET(bit) \ 907 if (optval) \ 908 inp->inp_flags |= bit; \ 909 else \ 910 inp->inp_flags &= ~bit; 911 912 case IP_RECVOPTS: 913 OPTSET(INP_RECVOPTS); 914 break; 915 916 case IP_RECVRETOPTS: 917 OPTSET(INP_RECVRETOPTS); 918 break; 919 920 case IP_RECVDSTADDR: 921 OPTSET(INP_RECVDSTADDR); 922 break; 923 case IP_RECVIF: 924 OPTSET(INP_RECVIF); 925 break; 926 case IP_RECVTTL: 927 OPTSET(INP_RECVTTL); 928 break; 929 case IP_RECVDSTPORT: 930 OPTSET(INP_RECVDSTPORT); 931 break; 932 case IP_RECVRTABLE: 933 OPTSET(INP_RECVRTABLE); 934 break; 935 case IP_IPSECFLOWINFO: 936 OPTSET(INP_IPSECFLOWINFO); 937 break; 938 } 939 } 940 break; 941 #undef OPTSET 942 943 case IP_MULTICAST_IF: 944 case IP_MULTICAST_TTL: 945 case IP_MULTICAST_LOOP: 946 case IP_ADD_MEMBERSHIP: 947 case IP_DROP_MEMBERSHIP: 948 error = ip_setmoptions(optname, &inp->inp_moptions, m, 949 inp->inp_rtableid); 950 break; 951 952 case IP_PORTRANGE: 953 if (m == NULL || m->m_len != sizeof(int)) 954 error = EINVAL; 955 else { 956 optval = *mtod(m, int *); 957 958 switch (optval) { 959 960 case IP_PORTRANGE_DEFAULT: 961 inp->inp_flags &= ~(INP_LOWPORT); 962 inp->inp_flags &= ~(INP_HIGHPORT); 963 break; 964 965 case IP_PORTRANGE_HIGH: 966 inp->inp_flags &= ~(INP_LOWPORT); 967 inp->inp_flags |= INP_HIGHPORT; 968 break; 969 970 case IP_PORTRANGE_LOW: 971 inp->inp_flags &= ~(INP_HIGHPORT); 972 inp->inp_flags |= INP_LOWPORT; 973 break; 974 975 default: 976 977 error = EINVAL; 978 break; 979 } 980 } 981 break; 982 case IP_AUTH_LEVEL: 983 case IP_ESP_TRANS_LEVEL: 984 case IP_ESP_NETWORK_LEVEL: 985 case IP_IPCOMP_LEVEL: 986 #ifndef IPSEC 987 error = EOPNOTSUPP; 988 #else 989 if (m == NULL || m->m_len != sizeof(int)) { 990 error = EINVAL; 991 break; 992 } 993 optval = *mtod(m, int *); 994 995 if (optval < IPSEC_LEVEL_BYPASS || 996 optval > IPSEC_LEVEL_UNIQUE) { 997 error = EINVAL; 998 break; 999 } 1000 1001 switch (optname) { 1002 case IP_AUTH_LEVEL: 1003 if (optval < IPSEC_AUTH_LEVEL_DEFAULT && 1004 suser(p, 0)) { 1005 error = EACCES; 1006 break; 1007 } 1008 inp->inp_seclevel[SL_AUTH] = optval; 1009 break; 1010 1011 case IP_ESP_TRANS_LEVEL: 1012 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT && 1013 suser(p, 0)) { 1014 error = EACCES; 1015 break; 1016 } 1017 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1018 break; 1019 1020 case IP_ESP_NETWORK_LEVEL: 1021 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT && 1022 suser(p, 0)) { 1023 error = EACCES; 1024 break; 1025 } 1026 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1027 break; 1028 case IP_IPCOMP_LEVEL: 1029 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT && 1030 suser(p, 0)) { 1031 error = EACCES; 1032 break; 1033 } 1034 inp->inp_seclevel[SL_IPCOMP] = optval; 1035 break; 1036 } 1037 #endif 1038 break; 1039 1040 case IP_IPSEC_LOCAL_ID: 1041 case IP_IPSEC_REMOTE_ID: 1042 error = EOPNOTSUPP; 1043 break; 1044 case SO_RTABLE: 1045 if (m == NULL || m->m_len < sizeof(u_int)) { 1046 error = EINVAL; 1047 break; 1048 } 1049 rtid = *mtod(m, u_int *); 1050 if (inp->inp_rtableid == rtid) 1051 break; 1052 /* needs privileges to switch when already set */ 1053 if (p->p_p->ps_rtableid != rtid && 1054 p->p_p->ps_rtableid != 0 && 1055 (error = suser(p, 0)) != 0) 1056 break; 1057 /* table must exist */ 1058 if (!rtable_exists(rtid)) { 1059 error = EINVAL; 1060 break; 1061 } 1062 if (inp->inp_lport) { 1063 error = EBUSY; 1064 break; 1065 } 1066 inp->inp_rtableid = rtid; 1067 in_pcbrehash(inp); 1068 break; 1069 case IP_PIPEX: 1070 if (m != NULL && m->m_len == sizeof(int)) 1071 inp->inp_pipex = *mtod(m, int *); 1072 else 1073 error = EINVAL; 1074 break; 1075 1076 default: 1077 error = ENOPROTOOPT; 1078 break; 1079 } 1080 if (m) 1081 (void)m_free(m); 1082 break; 1083 1084 case PRCO_GETOPT: 1085 switch (optname) { 1086 case IP_OPTIONS: 1087 case IP_RETOPTS: 1088 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1089 if (inp->inp_options) { 1090 m->m_len = inp->inp_options->m_len; 1091 memcpy(mtod(m, caddr_t), 1092 mtod(inp->inp_options, caddr_t), m->m_len); 1093 } else 1094 m->m_len = 0; 1095 break; 1096 1097 case IP_TOS: 1098 case IP_TTL: 1099 case IP_MINTTL: 1100 case IP_RECVOPTS: 1101 case IP_RECVRETOPTS: 1102 case IP_RECVDSTADDR: 1103 case IP_RECVIF: 1104 case IP_RECVTTL: 1105 case IP_RECVDSTPORT: 1106 case IP_RECVRTABLE: 1107 case IP_IPSECFLOWINFO: 1108 case IP_IPDEFTTL: 1109 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1110 m->m_len = sizeof(int); 1111 switch (optname) { 1112 1113 case IP_TOS: 1114 optval = inp->inp_ip.ip_tos; 1115 break; 1116 1117 case IP_TTL: 1118 optval = inp->inp_ip.ip_ttl; 1119 break; 1120 1121 case IP_MINTTL: 1122 optval = inp->inp_ip_minttl; 1123 break; 1124 1125 case IP_IPDEFTTL: 1126 optval = ip_defttl; 1127 break; 1128 1129 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1130 1131 case IP_RECVOPTS: 1132 optval = OPTBIT(INP_RECVOPTS); 1133 break; 1134 1135 case IP_RECVRETOPTS: 1136 optval = OPTBIT(INP_RECVRETOPTS); 1137 break; 1138 1139 case IP_RECVDSTADDR: 1140 optval = OPTBIT(INP_RECVDSTADDR); 1141 break; 1142 case IP_RECVIF: 1143 optval = OPTBIT(INP_RECVIF); 1144 break; 1145 case IP_RECVTTL: 1146 optval = OPTBIT(INP_RECVTTL); 1147 break; 1148 case IP_RECVDSTPORT: 1149 optval = OPTBIT(INP_RECVDSTPORT); 1150 break; 1151 case IP_RECVRTABLE: 1152 optval = OPTBIT(INP_RECVRTABLE); 1153 break; 1154 case IP_IPSECFLOWINFO: 1155 optval = OPTBIT(INP_IPSECFLOWINFO); 1156 break; 1157 } 1158 *mtod(m, int *) = optval; 1159 break; 1160 1161 case IP_MULTICAST_IF: 1162 case IP_MULTICAST_TTL: 1163 case IP_MULTICAST_LOOP: 1164 case IP_ADD_MEMBERSHIP: 1165 case IP_DROP_MEMBERSHIP: 1166 error = ip_getmoptions(optname, inp->inp_moptions, mp); 1167 break; 1168 1169 case IP_PORTRANGE: 1170 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1171 m->m_len = sizeof(int); 1172 1173 if (inp->inp_flags & INP_HIGHPORT) 1174 optval = IP_PORTRANGE_HIGH; 1175 else if (inp->inp_flags & INP_LOWPORT) 1176 optval = IP_PORTRANGE_LOW; 1177 else 1178 optval = 0; 1179 1180 *mtod(m, int *) = optval; 1181 break; 1182 1183 case IP_AUTH_LEVEL: 1184 case IP_ESP_TRANS_LEVEL: 1185 case IP_ESP_NETWORK_LEVEL: 1186 case IP_IPCOMP_LEVEL: 1187 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1188 #ifndef IPSEC 1189 m->m_len = sizeof(int); 1190 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1191 #else 1192 m->m_len = sizeof(int); 1193 switch (optname) { 1194 case IP_AUTH_LEVEL: 1195 optval = inp->inp_seclevel[SL_AUTH]; 1196 break; 1197 1198 case IP_ESP_TRANS_LEVEL: 1199 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1200 break; 1201 1202 case IP_ESP_NETWORK_LEVEL: 1203 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1204 break; 1205 case IP_IPCOMP_LEVEL: 1206 optval = inp->inp_seclevel[SL_IPCOMP]; 1207 break; 1208 } 1209 *mtod(m, int *) = optval; 1210 #endif 1211 break; 1212 case IP_IPSEC_LOCAL_ID: 1213 case IP_IPSEC_REMOTE_ID: 1214 error = EOPNOTSUPP; 1215 break; 1216 case SO_RTABLE: 1217 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1218 m->m_len = sizeof(u_int); 1219 *mtod(m, u_int *) = inp->inp_rtableid; 1220 break; 1221 case IP_PIPEX: 1222 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1223 m->m_len = sizeof(int); 1224 *mtod(m, int *) = inp->inp_pipex; 1225 break; 1226 default: 1227 error = ENOPROTOOPT; 1228 break; 1229 } 1230 break; 1231 } 1232 return (error); 1233 } 1234 1235 /* 1236 * Set up IP options in pcb for insertion in output packets. 1237 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1238 * with destination address if source routed. 1239 */ 1240 int 1241 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m) 1242 { 1243 int cnt, optlen; 1244 u_char *cp; 1245 u_char opt; 1246 1247 /* turn off any old options */ 1248 if (*pcbopt) 1249 (void)m_free(*pcbopt); 1250 *pcbopt = 0; 1251 if (m == NULL || m->m_len == 0) { 1252 /* 1253 * Only turning off any previous options. 1254 */ 1255 if (m) 1256 (void)m_free(m); 1257 return (0); 1258 } 1259 1260 if (m->m_len % sizeof(int32_t)) 1261 goto bad; 1262 1263 /* 1264 * IP first-hop destination address will be stored before 1265 * actual options; move other options back 1266 * and clear it when none present. 1267 */ 1268 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1269 goto bad; 1270 cnt = m->m_len; 1271 m->m_len += sizeof(struct in_addr); 1272 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1273 memmove((caddr_t)cp, mtod(m, caddr_t), (unsigned)cnt); 1274 memset(mtod(m, caddr_t), 0, sizeof(struct in_addr)); 1275 1276 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1277 opt = cp[IPOPT_OPTVAL]; 1278 if (opt == IPOPT_EOL) 1279 break; 1280 if (opt == IPOPT_NOP) 1281 optlen = 1; 1282 else { 1283 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1284 goto bad; 1285 optlen = cp[IPOPT_OLEN]; 1286 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1287 goto bad; 1288 } 1289 switch (opt) { 1290 1291 default: 1292 break; 1293 1294 case IPOPT_LSRR: 1295 case IPOPT_SSRR: 1296 /* 1297 * user process specifies route as: 1298 * ->A->B->C->D 1299 * D must be our final destination (but we can't 1300 * check that since we may not have connected yet). 1301 * A is first hop destination, which doesn't appear in 1302 * actual IP option, but is stored before the options. 1303 */ 1304 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1305 goto bad; 1306 m->m_len -= sizeof(struct in_addr); 1307 cnt -= sizeof(struct in_addr); 1308 optlen -= sizeof(struct in_addr); 1309 cp[IPOPT_OLEN] = optlen; 1310 /* 1311 * Move first hop before start of options. 1312 */ 1313 memcpy(mtod(m, caddr_t), &cp[IPOPT_OFFSET+1], 1314 sizeof(struct in_addr)); 1315 /* 1316 * Then copy rest of options back 1317 * to close up the deleted entry. 1318 */ 1319 memmove((caddr_t)&cp[IPOPT_OFFSET+1], 1320 (caddr_t)(&cp[IPOPT_OFFSET+1] + 1321 sizeof(struct in_addr)), 1322 (unsigned)cnt - (IPOPT_OFFSET+1)); 1323 break; 1324 } 1325 } 1326 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1327 goto bad; 1328 *pcbopt = m; 1329 return (0); 1330 1331 bad: 1332 (void)m_free(m); 1333 return (EINVAL); 1334 } 1335 1336 /* 1337 * Set the IP multicast options in response to user setsockopt(). 1338 */ 1339 int 1340 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m, 1341 u_int rtableid) 1342 { 1343 struct in_addr addr; 1344 struct in_ifaddr *ia; 1345 struct ip_mreq *mreq; 1346 struct ifnet *ifp = NULL; 1347 struct ip_moptions *imo = *imop; 1348 struct in_multi **immp; 1349 struct rtentry *rt; 1350 struct sockaddr_in sin; 1351 int i, error = 0; 1352 u_char loop; 1353 1354 if (imo == NULL) { 1355 /* 1356 * No multicast option buffer attached to the pcb; 1357 * allocate one and initialize to default values. 1358 */ 1359 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK|M_ZERO); 1360 immp = (struct in_multi **)malloc( 1361 (sizeof(*immp) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 1362 M_WAITOK|M_ZERO); 1363 *imop = imo; 1364 imo->imo_ifidx = 0; 1365 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 1366 imo->imo_loop = IP_DEFAULT_MULTICAST_LOOP; 1367 imo->imo_num_memberships = 0; 1368 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1369 imo->imo_membership = immp; 1370 } 1371 1372 switch (optname) { 1373 1374 case IP_MULTICAST_IF: 1375 /* 1376 * Select the interface for outgoing multicast packets. 1377 */ 1378 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1379 error = EINVAL; 1380 break; 1381 } 1382 addr = *(mtod(m, struct in_addr *)); 1383 /* 1384 * INADDR_ANY is used to remove a previous selection. 1385 * When no interface is selected, a default one is 1386 * chosen every time a multicast packet is sent. 1387 */ 1388 if (addr.s_addr == INADDR_ANY) { 1389 imo->imo_ifidx = 0; 1390 break; 1391 } 1392 /* 1393 * The selected interface is identified by its local 1394 * IP address. Find the interface and confirm that 1395 * it supports multicasting. 1396 */ 1397 memset(&sin, 0, sizeof(sin)); 1398 sin.sin_len = sizeof(sin); 1399 sin.sin_family = AF_INET; 1400 sin.sin_addr = addr; 1401 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1402 if (ia == NULL || 1403 (ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) { 1404 error = EADDRNOTAVAIL; 1405 break; 1406 } 1407 imo->imo_ifidx = ia->ia_ifp->if_index; 1408 break; 1409 1410 case IP_MULTICAST_TTL: 1411 /* 1412 * Set the IP time-to-live for outgoing multicast packets. 1413 */ 1414 if (m == NULL || m->m_len != 1) { 1415 error = EINVAL; 1416 break; 1417 } 1418 imo->imo_ttl = *(mtod(m, u_char *)); 1419 break; 1420 1421 case IP_MULTICAST_LOOP: 1422 /* 1423 * Set the loopback flag for outgoing multicast packets. 1424 * Must be zero or one. 1425 */ 1426 if (m == NULL || m->m_len != 1 || 1427 (loop = *(mtod(m, u_char *))) > 1) { 1428 error = EINVAL; 1429 break; 1430 } 1431 imo->imo_loop = loop; 1432 break; 1433 1434 case IP_ADD_MEMBERSHIP: 1435 /* 1436 * Add a multicast group membership. 1437 * Group must be a valid IP multicast address. 1438 */ 1439 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1440 error = EINVAL; 1441 break; 1442 } 1443 mreq = mtod(m, struct ip_mreq *); 1444 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1445 error = EINVAL; 1446 break; 1447 } 1448 /* 1449 * If no interface address was provided, use the interface of 1450 * the route to the given multicast address. 1451 */ 1452 if (mreq->imr_interface.s_addr == INADDR_ANY) { 1453 memset(&sin, 0, sizeof(sin)); 1454 sin.sin_len = sizeof(sin); 1455 sin.sin_family = AF_INET; 1456 sin.sin_addr = mreq->imr_multiaddr; 1457 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 1458 if (!rtisvalid(rt)) { 1459 rtfree(rt); 1460 error = EADDRNOTAVAIL; 1461 break; 1462 } 1463 } else { 1464 memset(&sin, 0, sizeof(sin)); 1465 sin.sin_len = sizeof(sin); 1466 sin.sin_family = AF_INET; 1467 sin.sin_addr = mreq->imr_interface; 1468 rt = rtalloc(sintosa(&sin), 0, rtableid); 1469 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) { 1470 rtfree(rt); 1471 error = EADDRNOTAVAIL; 1472 break; 1473 } 1474 } 1475 ifp = if_get(rt->rt_ifidx); 1476 rtfree(rt); 1477 1478 /* 1479 * See if we found an interface, and confirm that it 1480 * supports multicast. 1481 */ 1482 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1483 error = EADDRNOTAVAIL; 1484 if_put(ifp); 1485 break; 1486 } 1487 /* 1488 * See if the membership already exists or if all the 1489 * membership slots are full. 1490 */ 1491 for (i = 0; i < imo->imo_num_memberships; ++i) { 1492 if (imo->imo_membership[i]->inm_ifidx 1493 == ifp->if_index && 1494 imo->imo_membership[i]->inm_addr.s_addr 1495 == mreq->imr_multiaddr.s_addr) 1496 break; 1497 } 1498 if (i < imo->imo_num_memberships) { 1499 error = EADDRINUSE; 1500 if_put(ifp); 1501 break; 1502 } 1503 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1504 struct in_multi **nmships, **omships; 1505 size_t newmax; 1506 /* 1507 * Resize the vector to next power-of-two minus 1. If the 1508 * size would exceed the maximum then we know we've really 1509 * run out of entries. Otherwise, we reallocate the vector. 1510 */ 1511 nmships = NULL; 1512 omships = imo->imo_membership; 1513 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1514 if (newmax <= IP_MAX_MEMBERSHIPS) { 1515 nmships = (struct in_multi **)malloc( 1516 sizeof(*nmships) * newmax, M_IPMOPTS, 1517 M_NOWAIT|M_ZERO); 1518 if (nmships != NULL) { 1519 memcpy(nmships, omships, 1520 sizeof(*omships) * 1521 imo->imo_max_memberships); 1522 free(omships, M_IPMOPTS, 1523 sizeof(*omships) * 1524 imo->imo_max_memberships); 1525 imo->imo_membership = nmships; 1526 imo->imo_max_memberships = newmax; 1527 } 1528 } 1529 if (nmships == NULL) { 1530 error = ENOBUFS; 1531 if_put(ifp); 1532 break; 1533 } 1534 } 1535 /* 1536 * Everything looks good; add a new record to the multicast 1537 * address list for the given interface. 1538 */ 1539 if ((imo->imo_membership[i] = 1540 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1541 error = ENOBUFS; 1542 if_put(ifp); 1543 break; 1544 } 1545 ++imo->imo_num_memberships; 1546 if_put(ifp); 1547 break; 1548 1549 case IP_DROP_MEMBERSHIP: 1550 /* 1551 * Drop a multicast group membership. 1552 * Group must be a valid IP multicast address. 1553 */ 1554 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1555 error = EINVAL; 1556 break; 1557 } 1558 mreq = mtod(m, struct ip_mreq *); 1559 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1560 error = EINVAL; 1561 break; 1562 } 1563 /* 1564 * If an interface address was specified, get a pointer 1565 * to its ifnet structure. 1566 */ 1567 if (mreq->imr_interface.s_addr == INADDR_ANY) 1568 ifp = NULL; 1569 else { 1570 memset(&sin, 0, sizeof(sin)); 1571 sin.sin_len = sizeof(sin); 1572 sin.sin_family = AF_INET; 1573 sin.sin_addr = mreq->imr_interface; 1574 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1575 if (ia == NULL) { 1576 error = EADDRNOTAVAIL; 1577 break; 1578 } 1579 ifp = ia->ia_ifp; 1580 } 1581 /* 1582 * Find the membership in the membership array. 1583 */ 1584 for (i = 0; i < imo->imo_num_memberships; ++i) { 1585 if ((ifp == NULL || 1586 imo->imo_membership[i]->inm_ifidx == 1587 ifp->if_index) && 1588 imo->imo_membership[i]->inm_addr.s_addr == 1589 mreq->imr_multiaddr.s_addr) 1590 break; 1591 } 1592 if (i == imo->imo_num_memberships) { 1593 error = EADDRNOTAVAIL; 1594 break; 1595 } 1596 /* 1597 * Give up the multicast address record to which the 1598 * membership points. 1599 */ 1600 in_delmulti(imo->imo_membership[i]); 1601 /* 1602 * Remove the gap in the membership array. 1603 */ 1604 for (++i; i < imo->imo_num_memberships; ++i) 1605 imo->imo_membership[i-1] = imo->imo_membership[i]; 1606 --imo->imo_num_memberships; 1607 break; 1608 1609 default: 1610 error = EOPNOTSUPP; 1611 break; 1612 } 1613 1614 /* 1615 * If all options have default values, no need to keep the data. 1616 */ 1617 if (imo->imo_ifidx == 0 && 1618 imo->imo_ttl == IP_DEFAULT_MULTICAST_TTL && 1619 imo->imo_loop == IP_DEFAULT_MULTICAST_LOOP && 1620 imo->imo_num_memberships == 0) { 1621 free(imo->imo_membership , M_IPMOPTS, 0); 1622 free(*imop, M_IPMOPTS, sizeof(**imop)); 1623 *imop = NULL; 1624 } 1625 1626 return (error); 1627 } 1628 1629 /* 1630 * Return the IP multicast options in response to user getsockopt(). 1631 */ 1632 int 1633 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf **mp) 1634 { 1635 u_char *ttl; 1636 u_char *loop; 1637 struct in_addr *addr; 1638 struct in_ifaddr *ia; 1639 struct ifnet *ifp; 1640 1641 *mp = m_get(M_WAIT, MT_SOOPTS); 1642 1643 switch (optname) { 1644 1645 case IP_MULTICAST_IF: 1646 addr = mtod(*mp, struct in_addr *); 1647 (*mp)->m_len = sizeof(struct in_addr); 1648 if (imo == NULL || (ifp = if_get(imo->imo_ifidx)) == NULL) 1649 addr->s_addr = INADDR_ANY; 1650 else { 1651 IFP_TO_IA(ifp, ia); 1652 if_put(ifp); 1653 addr->s_addr = (ia == NULL) ? INADDR_ANY 1654 : ia->ia_addr.sin_addr.s_addr; 1655 } 1656 return (0); 1657 1658 case IP_MULTICAST_TTL: 1659 ttl = mtod(*mp, u_char *); 1660 (*mp)->m_len = 1; 1661 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1662 : imo->imo_ttl; 1663 return (0); 1664 1665 case IP_MULTICAST_LOOP: 1666 loop = mtod(*mp, u_char *); 1667 (*mp)->m_len = 1; 1668 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1669 : imo->imo_loop; 1670 return (0); 1671 1672 default: 1673 return (EOPNOTSUPP); 1674 } 1675 } 1676 1677 /* 1678 * Discard the IP multicast options. 1679 */ 1680 void 1681 ip_freemoptions(struct ip_moptions *imo) 1682 { 1683 int i; 1684 1685 if (imo != NULL) { 1686 for (i = 0; i < imo->imo_num_memberships; ++i) 1687 in_delmulti(imo->imo_membership[i]); 1688 free(imo->imo_membership, M_IPMOPTS, 0); 1689 free(imo, M_IPMOPTS, sizeof(*imo)); 1690 } 1691 } 1692 1693 /* 1694 * Routine called from ip_output() to loop back a copy of an IP multicast 1695 * packet to the input queue of a specified interface. 1696 */ 1697 void 1698 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst) 1699 { 1700 struct ip *ip; 1701 struct mbuf *copym; 1702 1703 copym = m_dup_pkt(m, max_linkhdr, M_DONTWAIT); 1704 if (copym != NULL) { 1705 /* 1706 * We don't bother to fragment if the IP length is greater 1707 * than the interface's MTU. Can this possibly matter? 1708 */ 1709 ip = mtod(copym, struct ip *); 1710 ip->ip_sum = 0; 1711 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1712 if_input_local(ifp, copym, dst->sin_family); 1713 } 1714 } 1715 1716 /* 1717 * Compute significant parts of the IPv4 checksum pseudo-header 1718 * for use in a delayed TCP/UDP checksum calculation. 1719 */ 1720 static __inline u_int16_t __attribute__((__unused__)) 1721 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto) 1722 { 1723 u_int32_t sum; 1724 1725 sum = lenproto + 1726 (u_int16_t)(src >> 16) + 1727 (u_int16_t)(src /*& 0xffff*/) + 1728 (u_int16_t)(dst >> 16) + 1729 (u_int16_t)(dst /*& 0xffff*/); 1730 1731 sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); 1732 1733 if (sum > 0xffff) 1734 sum -= 0xffff; 1735 1736 return (sum); 1737 } 1738 1739 /* 1740 * Process a delayed payload checksum calculation. 1741 */ 1742 void 1743 in_delayed_cksum(struct mbuf *m) 1744 { 1745 struct ip *ip; 1746 u_int16_t csum, offset; 1747 1748 ip = mtod(m, struct ip *); 1749 offset = ip->ip_hl << 2; 1750 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1751 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1752 csum = 0xffff; 1753 1754 switch (ip->ip_p) { 1755 case IPPROTO_TCP: 1756 offset += offsetof(struct tcphdr, th_sum); 1757 break; 1758 1759 case IPPROTO_UDP: 1760 offset += offsetof(struct udphdr, uh_sum); 1761 break; 1762 1763 case IPPROTO_ICMP: 1764 offset += offsetof(struct icmp, icmp_cksum); 1765 break; 1766 1767 default: 1768 return; 1769 } 1770 1771 if ((offset + sizeof(u_int16_t)) > m->m_len) 1772 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1773 else 1774 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1775 } 1776 1777 void 1778 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) 1779 { 1780 struct ip *ip = mtod(m, struct ip *); 1781 1782 /* some hw and in_delayed_cksum need the pseudo header cksum */ 1783 if (m->m_pkthdr.csum_flags & 1784 (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) { 1785 u_int16_t csum = 0, offset; 1786 1787 offset = ip->ip_hl << 2; 1788 if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) 1789 csum = in_cksum_phdr(ip->ip_src.s_addr, 1790 ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - 1791 offset + ip->ip_p)); 1792 if (ip->ip_p == IPPROTO_TCP) 1793 offset += offsetof(struct tcphdr, th_sum); 1794 else if (ip->ip_p == IPPROTO_UDP) 1795 offset += offsetof(struct udphdr, uh_sum); 1796 else if (ip->ip_p == IPPROTO_ICMP) 1797 offset += offsetof(struct icmp, icmp_cksum); 1798 if ((offset + sizeof(u_int16_t)) > m->m_len) 1799 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1800 else 1801 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1802 } 1803 1804 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { 1805 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 1806 ip->ip_hl != 5 || ifp->if_bridgeport != NULL) { 1807 tcpstat.tcps_outswcsum++; 1808 in_delayed_cksum(m); 1809 m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ 1810 } 1811 } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { 1812 if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 1813 ip->ip_hl != 5 || ifp->if_bridgeport != NULL) { 1814 udpstat.udps_outswcsum++; 1815 in_delayed_cksum(m); 1816 m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */ 1817 } 1818 } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) { 1819 in_delayed_cksum(m); 1820 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */ 1821 } 1822 } 1823