1 /* $OpenBSD: ip_output.c,v 1.364 2021/02/06 13:15:37 bluhm Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 33 */ 34 35 #include "pf.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_enc.h> 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/ip.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_icmp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/udp.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/udp_var.h> 62 63 #if NPF > 0 64 #include <net/pfvar.h> 65 #endif 66 67 #ifdef IPSEC 68 #ifdef ENCDEBUG 69 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 70 #else 71 #define DPRINTF(x) 72 #endif 73 #endif /* IPSEC */ 74 75 int ip_pcbopts(struct mbuf **, struct mbuf *); 76 int ip_multicast_if(struct ip_mreqn *, u_int, unsigned int *); 77 int ip_setmoptions(int, struct ip_moptions **, struct mbuf *, u_int); 78 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *); 79 static __inline u_int16_t __attribute__((__unused__)) 80 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t); 81 void in_delayed_cksum(struct mbuf *); 82 int in_ifcap_cksum(struct mbuf *, struct ifnet *, int); 83 84 #ifdef IPSEC 85 struct tdb * 86 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 87 int ipsecflowinfo); 88 int 89 ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int); 90 #endif /* IPSEC */ 91 92 /* 93 * IP output. The packet in mbuf chain m contains a skeletal IP 94 * header (with len, off, ttl, proto, tos, src, dst). 95 * The mbuf chain containing the packet will be freed. 96 * The mbuf opt, if present, will not be freed. 97 */ 98 int 99 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, 100 struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) 101 { 102 struct ip *ip; 103 struct ifnet *ifp = NULL; 104 struct mbuf *m = m0; 105 int hlen = sizeof (struct ip); 106 int error = 0; 107 struct route iproute; 108 struct sockaddr_in *dst; 109 struct tdb *tdb = NULL; 110 u_long mtu; 111 #if defined(MROUTING) 112 int rv; 113 #endif 114 115 NET_ASSERT_LOCKED(); 116 117 #ifdef IPSEC 118 if (inp && (inp->inp_flags & INP_IPV6) != 0) 119 panic("ip_output: IPv6 pcb is passed"); 120 #endif /* IPSEC */ 121 122 #ifdef DIAGNOSTIC 123 if ((m->m_flags & M_PKTHDR) == 0) 124 panic("ip_output no HDR"); 125 #endif 126 if (opt) 127 m = ip_insertoptions(m, opt, &hlen); 128 129 ip = mtod(m, struct ip *); 130 131 /* 132 * Fill in IP header. 133 */ 134 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 135 ip->ip_v = IPVERSION; 136 ip->ip_off &= htons(IP_DF); 137 ip->ip_id = htons(ip_randomid()); 138 ip->ip_hl = hlen >> 2; 139 ipstat_inc(ips_localout); 140 } else { 141 hlen = ip->ip_hl << 2; 142 } 143 144 /* 145 * We should not send traffic to 0/8 say both Stevens and RFCs 146 * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6. 147 */ 148 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) { 149 error = ENETUNREACH; 150 goto bad; 151 } 152 153 #if NPF > 0 154 reroute: 155 #endif 156 157 /* 158 * Do a route lookup now in case we need the source address to 159 * do an SPD lookup in IPsec; for most packets, the source address 160 * is set at a higher level protocol. ICMPs and other packets 161 * though (e.g., traceroute) have a source address of zeroes. 162 */ 163 if (ro == NULL) { 164 ro = &iproute; 165 memset(ro, 0, sizeof(*ro)); 166 } 167 168 dst = satosin(&ro->ro_dst); 169 170 /* 171 * If there is a cached route, check that it is to the same 172 * destination and is still up. If not, free it and try again. 173 */ 174 if (!rtisvalid(ro->ro_rt) || 175 dst->sin_addr.s_addr != ip->ip_dst.s_addr || 176 ro->ro_tableid != m->m_pkthdr.ph_rtableid) { 177 rtfree(ro->ro_rt); 178 ro->ro_rt = NULL; 179 } 180 181 if (ro->ro_rt == NULL) { 182 dst->sin_family = AF_INET; 183 dst->sin_len = sizeof(*dst); 184 dst->sin_addr = ip->ip_dst; 185 ro->ro_tableid = m->m_pkthdr.ph_rtableid; 186 } 187 188 if ((IN_MULTICAST(ip->ip_dst.s_addr) || 189 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 190 imo != NULL && (ifp = if_get(imo->imo_ifidx)) != NULL) { 191 192 mtu = ifp->if_mtu; 193 if (ip->ip_src.s_addr == INADDR_ANY) { 194 struct in_ifaddr *ia; 195 196 IFP_TO_IA(ifp, ia); 197 if (ia != NULL) 198 ip->ip_src = ia->ia_addr.sin_addr; 199 } 200 } else { 201 struct in_ifaddr *ia; 202 203 if (ro->ro_rt == NULL) 204 ro->ro_rt = rtalloc_mpath(&ro->ro_dst, 205 &ip->ip_src.s_addr, ro->ro_tableid); 206 207 if (ro->ro_rt == NULL) { 208 ipstat_inc(ips_noroute); 209 error = EHOSTUNREACH; 210 goto bad; 211 } 212 213 ia = ifatoia(ro->ro_rt->rt_ifa); 214 if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL)) 215 ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid)); 216 else 217 ifp = if_get(ro->ro_rt->rt_ifidx); 218 /* 219 * We aren't using rtisvalid() here because the UP/DOWN state 220 * machine is broken with some Ethernet drivers like em(4). 221 * As a result we might try to use an invalid cached route 222 * entry while an interface is being detached. 223 */ 224 if (ifp == NULL) { 225 ipstat_inc(ips_noroute); 226 error = EHOSTUNREACH; 227 goto bad; 228 } 229 if ((mtu = ro->ro_rt->rt_mtu) == 0) 230 mtu = ifp->if_mtu; 231 232 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 233 dst = satosin(ro->ro_rt->rt_gateway); 234 235 /* Set the source IP address */ 236 if (ip->ip_src.s_addr == INADDR_ANY && ia) 237 ip->ip_src = ia->ia_addr.sin_addr; 238 } 239 240 #ifdef IPSEC 241 if (ipsec_in_use || inp != NULL) { 242 /* Do we have any pending SAs to apply ? */ 243 tdb = ip_output_ipsec_lookup(m, hlen, &error, inp, 244 ipsecflowinfo); 245 if (error != 0) { 246 /* Should silently drop packet */ 247 if (error == -EINVAL) 248 error = 0; 249 m_freem(m); 250 goto done; 251 } 252 if (tdb != NULL) { 253 /* 254 * If it needs TCP/UDP hardware-checksumming, do the 255 * computation now. 256 */ 257 in_proto_cksum_out(m, NULL); 258 } 259 } 260 #endif /* IPSEC */ 261 262 if (IN_MULTICAST(ip->ip_dst.s_addr) || 263 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 264 265 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 266 M_BCAST : M_MCAST; 267 268 /* 269 * IP destination address is multicast. Make sure "dst" 270 * still points to the address in "ro". (It may have been 271 * changed to point to a gateway address, above.) 272 */ 273 dst = satosin(&ro->ro_dst); 274 275 /* 276 * See if the caller provided any multicast options 277 */ 278 if (imo != NULL) 279 ip->ip_ttl = imo->imo_ttl; 280 else 281 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 282 283 /* 284 * if we don't know the outgoing ifp yet, we can't generate 285 * output 286 */ 287 if (!ifp) { 288 ipstat_inc(ips_noroute); 289 error = EHOSTUNREACH; 290 goto bad; 291 } 292 293 /* 294 * Confirm that the outgoing interface supports multicast, 295 * but only if the packet actually is going out on that 296 * interface (i.e., no IPsec is applied). 297 */ 298 if ((((m->m_flags & M_MCAST) && 299 (ifp->if_flags & IFF_MULTICAST) == 0) || 300 ((m->m_flags & M_BCAST) && 301 (ifp->if_flags & IFF_BROADCAST) == 0)) && (tdb == NULL)) { 302 ipstat_inc(ips_noroute); 303 error = ENETUNREACH; 304 goto bad; 305 } 306 307 /* 308 * If source address not specified yet, use address 309 * of outgoing interface. 310 */ 311 if (ip->ip_src.s_addr == INADDR_ANY) { 312 struct in_ifaddr *ia; 313 314 IFP_TO_IA(ifp, ia); 315 if (ia != NULL) 316 ip->ip_src = ia->ia_addr.sin_addr; 317 } 318 319 if ((imo == NULL || imo->imo_loop) && 320 in_hasmulti(&ip->ip_dst, ifp)) { 321 /* 322 * If we belong to the destination multicast group 323 * on the outgoing interface, and the caller did not 324 * forbid loopback, loop back a copy. 325 * Can't defer TCP/UDP checksumming, do the 326 * computation now. 327 */ 328 in_proto_cksum_out(m, NULL); 329 ip_mloopback(ifp, m, dst); 330 } 331 #ifdef MROUTING 332 else { 333 /* 334 * If we are acting as a multicast router, perform 335 * multicast forwarding as if the packet had just 336 * arrived on the interface to which we are about 337 * to send. The multicast forwarding function 338 * recursively calls this function, using the 339 * IP_FORWARDING flag to prevent infinite recursion. 340 * 341 * Multicasts that are looped back by ip_mloopback(), 342 * above, will be forwarded by the ip_input() routine, 343 * if necessary. 344 */ 345 if (ipmforwarding && ip_mrouter[ifp->if_rdomain] && 346 (flags & IP_FORWARDING) == 0) { 347 KERNEL_LOCK(); 348 rv = ip_mforward(m, ifp); 349 KERNEL_UNLOCK(); 350 if (rv != 0) { 351 m_freem(m); 352 goto done; 353 } 354 } 355 } 356 #endif 357 /* 358 * Multicasts with a time-to-live of zero may be looped- 359 * back, above, but must not be transmitted on a network. 360 * Also, multicasts addressed to the loopback interface 361 * are not sent -- the above call to ip_mloopback() will 362 * loop back a copy if this host actually belongs to the 363 * destination group on the loopback interface. 364 */ 365 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 366 m_freem(m); 367 goto done; 368 } 369 370 goto sendit; 371 } 372 373 /* 374 * Look for broadcast address and verify user is allowed to send 375 * such a packet; if the packet is going in an IPsec tunnel, skip 376 * this check. 377 */ 378 if ((tdb == NULL) && ((dst->sin_addr.s_addr == INADDR_BROADCAST) || 379 (ro && ro->ro_rt && ISSET(ro->ro_rt->rt_flags, RTF_BROADCAST)))) { 380 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 381 error = EADDRNOTAVAIL; 382 goto bad; 383 } 384 if ((flags & IP_ALLOWBROADCAST) == 0) { 385 error = EACCES; 386 goto bad; 387 } 388 389 /* Don't allow broadcast messages to be fragmented */ 390 if (ntohs(ip->ip_len) > ifp->if_mtu) { 391 error = EMSGSIZE; 392 goto bad; 393 } 394 m->m_flags |= M_BCAST; 395 } else 396 m->m_flags &= ~M_BCAST; 397 398 sendit: 399 /* 400 * If we're doing Path MTU discovery, we need to set DF unless 401 * the route's MTU is locked. 402 */ 403 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 404 (ro->ro_rt->rt_locks & RTV_MTU) == 0) 405 ip->ip_off |= htons(IP_DF); 406 407 #ifdef IPSEC 408 /* 409 * Check if the packet needs encapsulation. 410 */ 411 if (tdb != NULL) { 412 /* Callee frees mbuf */ 413 error = ip_output_ipsec_send(tdb, m, ro, 414 (flags & IP_FORWARDING) ? 1 : 0); 415 goto done; 416 } 417 #endif /* IPSEC */ 418 419 /* 420 * Packet filter 421 */ 422 #if NPF > 0 423 if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT, 424 ifp, &m) != PF_PASS) { 425 error = EACCES; 426 m_freem(m); 427 goto done; 428 } 429 if (m == NULL) 430 goto done; 431 ip = mtod(m, struct ip *); 432 hlen = ip->ip_hl << 2; 433 if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) == 434 (PF_TAG_REROUTE | PF_TAG_GENERATED)) 435 /* already rerun the route lookup, go on */ 436 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE); 437 else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { 438 /* tag as generated to skip over pf_test on rerun */ 439 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 440 ro = NULL; 441 if_put(ifp); /* drop reference since target changed */ 442 ifp = NULL; 443 goto reroute; 444 } 445 #endif 446 in_proto_cksum_out(m, ifp); 447 448 #ifdef IPSEC 449 if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) && 450 (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) { 451 error = EHOSTUNREACH; 452 m_freem(m); 453 goto done; 454 } 455 #endif 456 457 /* 458 * If small enough for interface, can just send directly. 459 */ 460 if (ntohs(ip->ip_len) <= mtu) { 461 ip->ip_sum = 0; 462 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) 463 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 464 else { 465 ipstat_inc(ips_outswcsum); 466 ip->ip_sum = in_cksum(m, hlen); 467 } 468 469 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 470 goto done; 471 } 472 473 /* 474 * Too large for interface; fragment if possible. 475 * Must be able to put at least 8 bytes per fragment. 476 */ 477 if (ip->ip_off & htons(IP_DF)) { 478 #ifdef IPSEC 479 if (ip_mtudisc) 480 ipsec_adjust_mtu(m, ifp->if_mtu); 481 #endif 482 error = EMSGSIZE; 483 /* 484 * This case can happen if the user changed the MTU 485 * of an interface after enabling IP on it. Because 486 * most netifs don't keep track of routes pointing to 487 * them, there is no way for one to update all its 488 * routes when the MTU is changed. 489 */ 490 if (rtisvalid(ro->ro_rt) && 491 ISSET(ro->ro_rt->rt_flags, RTF_HOST) && 492 !(ro->ro_rt->rt_locks & RTV_MTU) && 493 (ro->ro_rt->rt_mtu > ifp->if_mtu)) { 494 ro->ro_rt->rt_mtu = ifp->if_mtu; 495 } 496 ipstat_inc(ips_cantfrag); 497 goto bad; 498 } 499 500 error = ip_fragment(m, ifp, mtu); 501 if (error) { 502 m = m0 = NULL; 503 goto bad; 504 } 505 506 for (; m; m = m0) { 507 m0 = m->m_nextpkt; 508 m->m_nextpkt = 0; 509 if (error == 0) 510 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 511 else 512 m_freem(m); 513 } 514 515 if (error == 0) 516 ipstat_inc(ips_fragmented); 517 518 done: 519 if (ro == &iproute && ro->ro_rt) 520 rtfree(ro->ro_rt); 521 if_put(ifp); 522 return (error); 523 bad: 524 m_freem(m0); 525 goto done; 526 } 527 528 #ifdef IPSEC 529 struct tdb * 530 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 531 int ipsecflowinfo) 532 { 533 struct m_tag *mtag; 534 struct tdb_ident *tdbi; 535 struct tdb *tdb; 536 537 /* Do we have any pending SAs to apply ? */ 538 tdb = ipsp_spd_lookup(m, AF_INET, hlen, error, IPSP_DIRECTION_OUT, 539 NULL, inp, ipsecflowinfo); 540 if (tdb == NULL) 541 return NULL; 542 /* Loop detection */ 543 for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { 544 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE) 545 continue; 546 tdbi = (struct tdb_ident *)(mtag + 1); 547 if (tdbi->spi == tdb->tdb_spi && 548 tdbi->proto == tdb->tdb_sproto && 549 tdbi->rdomain == tdb->tdb_rdomain && 550 !memcmp(&tdbi->dst, &tdb->tdb_dst, 551 sizeof(union sockaddr_union))) { 552 /* no IPsec needed */ 553 return NULL; 554 } 555 } 556 return tdb; 557 } 558 559 int 560 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, int fwd) 561 { 562 #if NPF > 0 563 struct ifnet *encif; 564 #endif 565 struct ip *ip; 566 int error; 567 568 #if NPF > 0 569 /* 570 * Packet filter 571 */ 572 if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || 573 pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) { 574 m_freem(m); 575 return EACCES; 576 } 577 if (m == NULL) 578 return 0; 579 /* 580 * PF_TAG_REROUTE handling or not... 581 * Packet is entering IPsec so the routing is 582 * already overruled by the IPsec policy. 583 * Until now the change was not reconsidered. 584 * What's the behaviour? 585 */ 586 in_proto_cksum_out(m, encif); 587 #endif 588 589 /* Check if we are allowed to fragment */ 590 ip = mtod(m, struct ip *); 591 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && 592 ntohs(ip->ip_len) > tdb->tdb_mtu && 593 tdb->tdb_mtutimeout > gettime()) { 594 struct rtentry *rt = NULL; 595 int rt_mtucloned = 0; 596 int transportmode = 0; 597 598 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) && 599 (tdb->tdb_dst.sin.sin_addr.s_addr == ip->ip_dst.s_addr); 600 601 /* Find a host route to store the mtu in */ 602 if (ro != NULL) 603 rt = ro->ro_rt; 604 /* but don't add a PMTU route for transport mode SAs */ 605 if (transportmode) 606 rt = NULL; 607 else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 608 rt = icmp_mtudisc_clone(ip->ip_dst, 609 m->m_pkthdr.ph_rtableid, 1); 610 rt_mtucloned = 1; 611 } 612 DPRINTF(("%s: spi %08x mtu %d rt %p cloned %d\n", __func__, 613 ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned)); 614 if (rt != NULL) { 615 rt->rt_mtu = tdb->tdb_mtu; 616 if (ro != NULL && ro->ro_rt != NULL) { 617 rtfree(ro->ro_rt); 618 ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE, 619 m->m_pkthdr.ph_rtableid); 620 } 621 if (rt_mtucloned) 622 rtfree(rt); 623 } 624 ipsec_adjust_mtu(m, tdb->tdb_mtu); 625 m_freem(m); 626 return EMSGSIZE; 627 } 628 /* propagate IP_DF for v4-over-v6 */ 629 if (ip_mtudisc && ip->ip_off & htons(IP_DF)) 630 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 631 632 /* 633 * Clear these -- they'll be set in the recursive invocation 634 * as needed. 635 */ 636 m->m_flags &= ~(M_MCAST | M_BCAST); 637 638 /* Callee frees mbuf */ 639 error = ipsp_process_packet(m, tdb, AF_INET, 0); 640 if (error) { 641 ipsecstat_inc(ipsec_odrops); 642 tdb->tdb_odrops++; 643 } 644 return error; 645 } 646 #endif /* IPSEC */ 647 648 int 649 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 650 { 651 struct ip *ip, *mhip; 652 struct mbuf *m0; 653 int len, hlen, off; 654 int mhlen, firstlen; 655 struct mbuf **mnext; 656 int fragments = 0; 657 int error = 0; 658 659 ip = mtod(m, struct ip *); 660 hlen = ip->ip_hl << 2; 661 662 len = (mtu - hlen) &~ 7; 663 if (len < 8) { 664 m_freem(m); 665 return (EMSGSIZE); 666 } 667 668 /* 669 * If we are doing fragmentation, we can't defer TCP/UDP 670 * checksumming; compute the checksum and clear the flag. 671 */ 672 in_proto_cksum_out(m, NULL); 673 firstlen = len; 674 mnext = &m->m_nextpkt; 675 676 /* 677 * Loop through length of segment after first fragment, 678 * make new header and copy data of each part and link onto chain. 679 */ 680 m0 = m; 681 mhlen = sizeof (struct ip); 682 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 683 MGETHDR(m, M_DONTWAIT, MT_HEADER); 684 if (m == NULL) { 685 ipstat_inc(ips_odropped); 686 error = ENOBUFS; 687 goto sendorfree; 688 } 689 *mnext = m; 690 mnext = &m->m_nextpkt; 691 m->m_data += max_linkhdr; 692 mhip = mtod(m, struct ip *); 693 *mhip = *ip; 694 /* we must inherit MCAST/BCAST flags, routing table and prio */ 695 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 696 m->m_pkthdr.ph_rtableid = m0->m_pkthdr.ph_rtableid; 697 m->m_pkthdr.pf.prio = m0->m_pkthdr.pf.prio; 698 if (hlen > sizeof (struct ip)) { 699 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 700 mhip->ip_hl = mhlen >> 2; 701 } 702 m->m_len = mhlen; 703 mhip->ip_off = ((off - hlen) >> 3) + 704 (ntohs(ip->ip_off) & ~IP_MF); 705 if (ip->ip_off & htons(IP_MF)) 706 mhip->ip_off |= IP_MF; 707 if (off + len >= ntohs(ip->ip_len)) 708 len = ntohs(ip->ip_len) - off; 709 else 710 mhip->ip_off |= IP_MF; 711 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 712 m->m_next = m_copym(m0, off, len, M_NOWAIT); 713 if (m->m_next == 0) { 714 ipstat_inc(ips_odropped); 715 error = ENOBUFS; 716 goto sendorfree; 717 } 718 m->m_pkthdr.len = mhlen + len; 719 m->m_pkthdr.ph_ifidx = 0; 720 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 721 mhip->ip_sum = 0; 722 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) 723 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 724 else { 725 ipstat_inc(ips_outswcsum); 726 mhip->ip_sum = in_cksum(m, mhlen); 727 } 728 ipstat_inc(ips_ofragments); 729 fragments++; 730 } 731 /* 732 * Update first fragment by trimming what's been copied out 733 * and updating header, then send each fragment (in order). 734 */ 735 m = m0; 736 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 737 m->m_pkthdr.len = hlen + firstlen; 738 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 739 ip->ip_off |= htons(IP_MF); 740 ip->ip_sum = 0; 741 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) 742 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 743 else { 744 ipstat_inc(ips_outswcsum); 745 ip->ip_sum = in_cksum(m, hlen); 746 } 747 sendorfree: 748 if (error) { 749 for (m = m0; m; m = m0) { 750 m0 = m->m_nextpkt; 751 m->m_nextpkt = NULL; 752 m_freem(m); 753 } 754 } 755 756 return (error); 757 } 758 759 /* 760 * Insert IP options into preformed packet. 761 * Adjust IP destination as required for IP source routing, 762 * as indicated by a non-zero in_addr at the start of the options. 763 */ 764 struct mbuf * 765 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 766 { 767 struct ipoption *p = mtod(opt, struct ipoption *); 768 struct mbuf *n; 769 struct ip *ip = mtod(m, struct ip *); 770 unsigned int optlen; 771 772 optlen = opt->m_len - sizeof(p->ipopt_dst); 773 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 774 return (m); /* XXX should fail */ 775 if (p->ipopt_dst.s_addr) 776 ip->ip_dst = p->ipopt_dst; 777 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 778 MGETHDR(n, M_DONTWAIT, MT_HEADER); 779 if (n == NULL) 780 return (m); 781 M_MOVE_HDR(n, m); 782 n->m_pkthdr.len += optlen; 783 m->m_len -= sizeof(struct ip); 784 m->m_data += sizeof(struct ip); 785 n->m_next = m; 786 m = n; 787 m->m_len = optlen + sizeof(struct ip); 788 m->m_data += max_linkhdr; 789 memcpy(mtod(m, caddr_t), ip, sizeof(struct ip)); 790 } else { 791 m->m_data -= optlen; 792 m->m_len += optlen; 793 m->m_pkthdr.len += optlen; 794 memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip)); 795 } 796 ip = mtod(m, struct ip *); 797 memcpy(ip + 1, p->ipopt_list, optlen); 798 *phlen = sizeof(struct ip) + optlen; 799 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 800 return (m); 801 } 802 803 /* 804 * Copy options from ip to jp, 805 * omitting those not copied during fragmentation. 806 */ 807 int 808 ip_optcopy(struct ip *ip, struct ip *jp) 809 { 810 u_char *cp, *dp; 811 int opt, optlen, cnt; 812 813 cp = (u_char *)(ip + 1); 814 dp = (u_char *)(jp + 1); 815 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 816 for (; cnt > 0; cnt -= optlen, cp += optlen) { 817 opt = cp[0]; 818 if (opt == IPOPT_EOL) 819 break; 820 if (opt == IPOPT_NOP) { 821 /* Preserve for IP mcast tunnel's LSRR alignment. */ 822 *dp++ = IPOPT_NOP; 823 optlen = 1; 824 continue; 825 } 826 #ifdef DIAGNOSTIC 827 if (cnt < IPOPT_OLEN + sizeof(*cp)) 828 panic("malformed IPv4 option passed to ip_optcopy"); 829 #endif 830 optlen = cp[IPOPT_OLEN]; 831 #ifdef DIAGNOSTIC 832 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 833 panic("malformed IPv4 option passed to ip_optcopy"); 834 #endif 835 /* bogus lengths should have been caught by ip_dooptions */ 836 if (optlen > cnt) 837 optlen = cnt; 838 if (IPOPT_COPIED(opt)) { 839 memcpy(dp, cp, optlen); 840 dp += optlen; 841 } 842 } 843 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 844 *dp++ = IPOPT_EOL; 845 return (optlen); 846 } 847 848 /* 849 * IP socket option processing. 850 */ 851 int 852 ip_ctloutput(int op, struct socket *so, int level, int optname, 853 struct mbuf *m) 854 { 855 struct inpcb *inp = sotoinpcb(so); 856 int optval = 0; 857 struct proc *p = curproc; /* XXX */ 858 int error = 0; 859 u_int rtid = 0; 860 861 if (level != IPPROTO_IP) 862 return (EINVAL); 863 864 switch (op) { 865 case PRCO_SETOPT: 866 switch (optname) { 867 case IP_OPTIONS: 868 return (ip_pcbopts(&inp->inp_options, m)); 869 870 case IP_TOS: 871 case IP_TTL: 872 case IP_MINTTL: 873 case IP_RECVOPTS: 874 case IP_RECVRETOPTS: 875 case IP_RECVDSTADDR: 876 case IP_RECVIF: 877 case IP_RECVTTL: 878 case IP_RECVDSTPORT: 879 case IP_RECVRTABLE: 880 case IP_IPSECFLOWINFO: 881 if (m == NULL || m->m_len != sizeof(int)) 882 error = EINVAL; 883 else { 884 optval = *mtod(m, int *); 885 switch (optname) { 886 887 case IP_TOS: 888 inp->inp_ip.ip_tos = optval; 889 break; 890 891 case IP_TTL: 892 if (optval > 0 && optval <= MAXTTL) 893 inp->inp_ip.ip_ttl = optval; 894 else if (optval == -1) 895 inp->inp_ip.ip_ttl = ip_defttl; 896 else 897 error = EINVAL; 898 break; 899 900 case IP_MINTTL: 901 if (optval >= 0 && optval <= MAXTTL) 902 inp->inp_ip_minttl = optval; 903 else 904 error = EINVAL; 905 break; 906 #define OPTSET(bit) \ 907 if (optval) \ 908 inp->inp_flags |= bit; \ 909 else \ 910 inp->inp_flags &= ~bit; 911 912 case IP_RECVOPTS: 913 OPTSET(INP_RECVOPTS); 914 break; 915 916 case IP_RECVRETOPTS: 917 OPTSET(INP_RECVRETOPTS); 918 break; 919 920 case IP_RECVDSTADDR: 921 OPTSET(INP_RECVDSTADDR); 922 break; 923 case IP_RECVIF: 924 OPTSET(INP_RECVIF); 925 break; 926 case IP_RECVTTL: 927 OPTSET(INP_RECVTTL); 928 break; 929 case IP_RECVDSTPORT: 930 OPTSET(INP_RECVDSTPORT); 931 break; 932 case IP_RECVRTABLE: 933 OPTSET(INP_RECVRTABLE); 934 break; 935 case IP_IPSECFLOWINFO: 936 OPTSET(INP_IPSECFLOWINFO); 937 break; 938 } 939 } 940 break; 941 #undef OPTSET 942 943 case IP_MULTICAST_IF: 944 case IP_MULTICAST_TTL: 945 case IP_MULTICAST_LOOP: 946 case IP_ADD_MEMBERSHIP: 947 case IP_DROP_MEMBERSHIP: 948 error = ip_setmoptions(optname, &inp->inp_moptions, m, 949 inp->inp_rtableid); 950 break; 951 952 case IP_PORTRANGE: 953 if (m == NULL || m->m_len != sizeof(int)) 954 error = EINVAL; 955 else { 956 optval = *mtod(m, int *); 957 958 switch (optval) { 959 960 case IP_PORTRANGE_DEFAULT: 961 inp->inp_flags &= ~(INP_LOWPORT); 962 inp->inp_flags &= ~(INP_HIGHPORT); 963 break; 964 965 case IP_PORTRANGE_HIGH: 966 inp->inp_flags &= ~(INP_LOWPORT); 967 inp->inp_flags |= INP_HIGHPORT; 968 break; 969 970 case IP_PORTRANGE_LOW: 971 inp->inp_flags &= ~(INP_HIGHPORT); 972 inp->inp_flags |= INP_LOWPORT; 973 break; 974 975 default: 976 977 error = EINVAL; 978 break; 979 } 980 } 981 break; 982 case IP_AUTH_LEVEL: 983 case IP_ESP_TRANS_LEVEL: 984 case IP_ESP_NETWORK_LEVEL: 985 case IP_IPCOMP_LEVEL: 986 #ifndef IPSEC 987 error = EOPNOTSUPP; 988 #else 989 if (m == NULL || m->m_len != sizeof(int)) { 990 error = EINVAL; 991 break; 992 } 993 optval = *mtod(m, int *); 994 995 if (optval < IPSEC_LEVEL_BYPASS || 996 optval > IPSEC_LEVEL_UNIQUE) { 997 error = EINVAL; 998 break; 999 } 1000 1001 switch (optname) { 1002 case IP_AUTH_LEVEL: 1003 if (optval < IPSEC_AUTH_LEVEL_DEFAULT && 1004 suser(p)) { 1005 error = EACCES; 1006 break; 1007 } 1008 inp->inp_seclevel[SL_AUTH] = optval; 1009 break; 1010 1011 case IP_ESP_TRANS_LEVEL: 1012 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT && 1013 suser(p)) { 1014 error = EACCES; 1015 break; 1016 } 1017 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1018 break; 1019 1020 case IP_ESP_NETWORK_LEVEL: 1021 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT && 1022 suser(p)) { 1023 error = EACCES; 1024 break; 1025 } 1026 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1027 break; 1028 case IP_IPCOMP_LEVEL: 1029 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT && 1030 suser(p)) { 1031 error = EACCES; 1032 break; 1033 } 1034 inp->inp_seclevel[SL_IPCOMP] = optval; 1035 break; 1036 } 1037 #endif 1038 break; 1039 1040 case IP_IPSEC_LOCAL_ID: 1041 case IP_IPSEC_REMOTE_ID: 1042 error = EOPNOTSUPP; 1043 break; 1044 case SO_RTABLE: 1045 if (m == NULL || m->m_len < sizeof(u_int)) { 1046 error = EINVAL; 1047 break; 1048 } 1049 rtid = *mtod(m, u_int *); 1050 if (inp->inp_rtableid == rtid) 1051 break; 1052 /* needs privileges to switch when already set */ 1053 if (p->p_p->ps_rtableid != rtid && 1054 p->p_p->ps_rtableid != 0 && 1055 (error = suser(p)) != 0) 1056 break; 1057 /* table must exist */ 1058 if (!rtable_exists(rtid)) { 1059 error = EINVAL; 1060 break; 1061 } 1062 if (inp->inp_lport) { 1063 error = EBUSY; 1064 break; 1065 } 1066 inp->inp_rtableid = rtid; 1067 in_pcbrehash(inp); 1068 break; 1069 case IP_PIPEX: 1070 if (m != NULL && m->m_len == sizeof(int)) 1071 inp->inp_pipex = *mtod(m, int *); 1072 else 1073 error = EINVAL; 1074 break; 1075 1076 default: 1077 error = ENOPROTOOPT; 1078 break; 1079 } 1080 break; 1081 1082 case PRCO_GETOPT: 1083 switch (optname) { 1084 case IP_OPTIONS: 1085 case IP_RETOPTS: 1086 if (inp->inp_options) { 1087 m->m_len = inp->inp_options->m_len; 1088 memcpy(mtod(m, caddr_t), 1089 mtod(inp->inp_options, caddr_t), m->m_len); 1090 } else 1091 m->m_len = 0; 1092 break; 1093 1094 case IP_TOS: 1095 case IP_TTL: 1096 case IP_MINTTL: 1097 case IP_RECVOPTS: 1098 case IP_RECVRETOPTS: 1099 case IP_RECVDSTADDR: 1100 case IP_RECVIF: 1101 case IP_RECVTTL: 1102 case IP_RECVDSTPORT: 1103 case IP_RECVRTABLE: 1104 case IP_IPSECFLOWINFO: 1105 case IP_IPDEFTTL: 1106 m->m_len = sizeof(int); 1107 switch (optname) { 1108 1109 case IP_TOS: 1110 optval = inp->inp_ip.ip_tos; 1111 break; 1112 1113 case IP_TTL: 1114 optval = inp->inp_ip.ip_ttl; 1115 break; 1116 1117 case IP_MINTTL: 1118 optval = inp->inp_ip_minttl; 1119 break; 1120 1121 case IP_IPDEFTTL: 1122 optval = ip_defttl; 1123 break; 1124 1125 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1126 1127 case IP_RECVOPTS: 1128 optval = OPTBIT(INP_RECVOPTS); 1129 break; 1130 1131 case IP_RECVRETOPTS: 1132 optval = OPTBIT(INP_RECVRETOPTS); 1133 break; 1134 1135 case IP_RECVDSTADDR: 1136 optval = OPTBIT(INP_RECVDSTADDR); 1137 break; 1138 case IP_RECVIF: 1139 optval = OPTBIT(INP_RECVIF); 1140 break; 1141 case IP_RECVTTL: 1142 optval = OPTBIT(INP_RECVTTL); 1143 break; 1144 case IP_RECVDSTPORT: 1145 optval = OPTBIT(INP_RECVDSTPORT); 1146 break; 1147 case IP_RECVRTABLE: 1148 optval = OPTBIT(INP_RECVRTABLE); 1149 break; 1150 case IP_IPSECFLOWINFO: 1151 optval = OPTBIT(INP_IPSECFLOWINFO); 1152 break; 1153 } 1154 *mtod(m, int *) = optval; 1155 break; 1156 1157 case IP_MULTICAST_IF: 1158 case IP_MULTICAST_TTL: 1159 case IP_MULTICAST_LOOP: 1160 case IP_ADD_MEMBERSHIP: 1161 case IP_DROP_MEMBERSHIP: 1162 error = ip_getmoptions(optname, inp->inp_moptions, m); 1163 break; 1164 1165 case IP_PORTRANGE: 1166 m->m_len = sizeof(int); 1167 1168 if (inp->inp_flags & INP_HIGHPORT) 1169 optval = IP_PORTRANGE_HIGH; 1170 else if (inp->inp_flags & INP_LOWPORT) 1171 optval = IP_PORTRANGE_LOW; 1172 else 1173 optval = 0; 1174 1175 *mtod(m, int *) = optval; 1176 break; 1177 1178 case IP_AUTH_LEVEL: 1179 case IP_ESP_TRANS_LEVEL: 1180 case IP_ESP_NETWORK_LEVEL: 1181 case IP_IPCOMP_LEVEL: 1182 #ifndef IPSEC 1183 m->m_len = sizeof(int); 1184 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1185 #else 1186 m->m_len = sizeof(int); 1187 switch (optname) { 1188 case IP_AUTH_LEVEL: 1189 optval = inp->inp_seclevel[SL_AUTH]; 1190 break; 1191 1192 case IP_ESP_TRANS_LEVEL: 1193 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1194 break; 1195 1196 case IP_ESP_NETWORK_LEVEL: 1197 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1198 break; 1199 case IP_IPCOMP_LEVEL: 1200 optval = inp->inp_seclevel[SL_IPCOMP]; 1201 break; 1202 } 1203 *mtod(m, int *) = optval; 1204 #endif 1205 break; 1206 case IP_IPSEC_LOCAL_ID: 1207 case IP_IPSEC_REMOTE_ID: 1208 error = EOPNOTSUPP; 1209 break; 1210 case SO_RTABLE: 1211 m->m_len = sizeof(u_int); 1212 *mtod(m, u_int *) = inp->inp_rtableid; 1213 break; 1214 case IP_PIPEX: 1215 m->m_len = sizeof(int); 1216 *mtod(m, int *) = inp->inp_pipex; 1217 break; 1218 default: 1219 error = ENOPROTOOPT; 1220 break; 1221 } 1222 break; 1223 } 1224 return (error); 1225 } 1226 1227 /* 1228 * Set up IP options in pcb for insertion in output packets. 1229 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1230 * with destination address if source routed. 1231 */ 1232 int 1233 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m) 1234 { 1235 struct mbuf *n; 1236 struct ipoption *p; 1237 int cnt, off, optlen; 1238 u_char *cp; 1239 u_char opt; 1240 1241 /* turn off any old options */ 1242 m_freem(*pcbopt); 1243 *pcbopt = NULL; 1244 if (m == NULL || m->m_len == 0) { 1245 /* 1246 * Only turning off any previous options. 1247 */ 1248 return (0); 1249 } 1250 1251 if (m->m_len % sizeof(int32_t) || 1252 m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1253 return (EINVAL); 1254 1255 /* Don't sleep because NET_LOCK() is hold. */ 1256 if ((n = m_get(M_NOWAIT, MT_SOOPTS)) == NULL) 1257 return (ENOBUFS); 1258 p = mtod(n, struct ipoption *); 1259 memset(p, 0, sizeof (*p)); /* 0 = IPOPT_EOL, needed for padding */ 1260 n->m_len = sizeof(struct in_addr); 1261 1262 off = 0; 1263 cnt = m->m_len; 1264 cp = mtod(m, u_char *); 1265 1266 while (cnt > 0) { 1267 opt = cp[IPOPT_OPTVAL]; 1268 1269 if (opt == IPOPT_NOP || opt == IPOPT_EOL) { 1270 optlen = 1; 1271 } else { 1272 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1273 goto bad; 1274 optlen = cp[IPOPT_OLEN]; 1275 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1276 goto bad; 1277 } 1278 switch (opt) { 1279 default: 1280 memcpy(p->ipopt_list + off, cp, optlen); 1281 break; 1282 1283 case IPOPT_LSRR: 1284 case IPOPT_SSRR: 1285 /* 1286 * user process specifies route as: 1287 * ->A->B->C->D 1288 * D must be our final destination (but we can't 1289 * check that since we may not have connected yet). 1290 * A is first hop destination, which doesn't appear in 1291 * actual IP option, but is stored before the options. 1292 */ 1293 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1294 goto bad; 1295 1296 /* 1297 * Optlen is smaller because first address is popped. 1298 * Cnt and cp will be adjusted a bit later to reflect 1299 * this. 1300 */ 1301 optlen -= sizeof(struct in_addr); 1302 p->ipopt_list[off + IPOPT_OPTVAL] = opt; 1303 p->ipopt_list[off + IPOPT_OLEN] = optlen; 1304 1305 /* 1306 * Move first hop before start of options. 1307 */ 1308 memcpy(&p->ipopt_dst, cp + IPOPT_OFFSET, 1309 sizeof(struct in_addr)); 1310 cp += sizeof(struct in_addr); 1311 cnt -= sizeof(struct in_addr); 1312 /* 1313 * Then copy rest of options 1314 */ 1315 memcpy(p->ipopt_list + off + IPOPT_OFFSET, 1316 cp + IPOPT_OFFSET, optlen - IPOPT_OFFSET); 1317 break; 1318 } 1319 off += optlen; 1320 cp += optlen; 1321 cnt -= optlen; 1322 1323 if (opt == IPOPT_EOL) 1324 break; 1325 } 1326 /* pad options to next word, since p was zeroed just adjust off */ 1327 off = (off + sizeof(int32_t) - 1) & ~(sizeof(int32_t) - 1); 1328 n->m_len += off; 1329 if (n->m_len > sizeof(*p)) { 1330 bad: 1331 m_freem(n); 1332 return (EINVAL); 1333 } 1334 1335 *pcbopt = n; 1336 return (0); 1337 } 1338 1339 /* 1340 * Lookup the interface based on the information in the ip_mreqn struct. 1341 */ 1342 int 1343 ip_multicast_if(struct ip_mreqn *mreq, u_int rtableid, unsigned int *ifidx) 1344 { 1345 struct sockaddr_in sin; 1346 struct rtentry *rt; 1347 1348 /* 1349 * In case userland provides the imr_ifindex use this as interface. 1350 * If no interface address was provided, use the interface of 1351 * the route to the given multicast address. 1352 */ 1353 if (mreq->imr_ifindex != 0) { 1354 *ifidx = mreq->imr_ifindex; 1355 } else if (mreq->imr_address.s_addr == INADDR_ANY) { 1356 memset(&sin, 0, sizeof(sin)); 1357 sin.sin_len = sizeof(sin); 1358 sin.sin_family = AF_INET; 1359 sin.sin_addr = mreq->imr_multiaddr; 1360 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 1361 if (!rtisvalid(rt)) { 1362 rtfree(rt); 1363 return EADDRNOTAVAIL; 1364 } 1365 *ifidx = rt->rt_ifidx; 1366 rtfree(rt); 1367 } else { 1368 memset(&sin, 0, sizeof(sin)); 1369 sin.sin_len = sizeof(sin); 1370 sin.sin_family = AF_INET; 1371 sin.sin_addr = mreq->imr_address; 1372 rt = rtalloc(sintosa(&sin), 0, rtableid); 1373 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) { 1374 rtfree(rt); 1375 return EADDRNOTAVAIL; 1376 } 1377 *ifidx = rt->rt_ifidx; 1378 rtfree(rt); 1379 } 1380 1381 return 0; 1382 } 1383 1384 /* 1385 * Set the IP multicast options in response to user setsockopt(). 1386 */ 1387 int 1388 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m, 1389 u_int rtableid) 1390 { 1391 struct in_addr addr; 1392 struct in_ifaddr *ia; 1393 struct ip_mreqn mreqn; 1394 struct ifnet *ifp = NULL; 1395 struct ip_moptions *imo = *imop; 1396 struct in_multi **immp; 1397 struct sockaddr_in sin; 1398 unsigned int ifidx; 1399 int i, error = 0; 1400 u_char loop; 1401 1402 if (imo == NULL) { 1403 /* 1404 * No multicast option buffer attached to the pcb; 1405 * allocate one and initialize to default values. 1406 */ 1407 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK|M_ZERO); 1408 immp = mallocarray(IP_MIN_MEMBERSHIPS, sizeof(*immp), M_IPMOPTS, 1409 M_WAITOK|M_ZERO); 1410 *imop = imo; 1411 imo->imo_ifidx = 0; 1412 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 1413 imo->imo_loop = IP_DEFAULT_MULTICAST_LOOP; 1414 imo->imo_num_memberships = 0; 1415 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1416 imo->imo_membership = immp; 1417 } 1418 1419 switch (optname) { 1420 1421 case IP_MULTICAST_IF: 1422 /* 1423 * Select the interface for outgoing multicast packets. 1424 */ 1425 if (m == NULL) { 1426 error = EINVAL; 1427 break; 1428 } 1429 if (m->m_len == sizeof(struct in_addr)) { 1430 addr = *(mtod(m, struct in_addr *)); 1431 } else if (m->m_len == sizeof(struct ip_mreq) || 1432 m->m_len == sizeof(struct ip_mreqn)) { 1433 memset(&mreqn, 0, sizeof(mreqn)); 1434 memcpy(&mreqn, mtod(m, void *), m->m_len); 1435 1436 /* 1437 * If an interface index is given use this 1438 * index to set the imo_ifidx but check first 1439 * that the interface actually exists. 1440 * In the other case just set the addr to 1441 * the imr_address and fall through to the 1442 * regular code. 1443 */ 1444 if (mreqn.imr_ifindex != 0) { 1445 ifp = if_get(mreqn.imr_ifindex); 1446 if (ifp == NULL || 1447 ifp->if_rdomain != rtable_l2(rtableid)) { 1448 error = EADDRNOTAVAIL; 1449 if_put(ifp); 1450 break; 1451 } 1452 imo->imo_ifidx = ifp->if_index; 1453 if_put(ifp); 1454 break; 1455 } else 1456 addr = mreqn.imr_address; 1457 } else { 1458 error = EINVAL; 1459 break; 1460 } 1461 /* 1462 * INADDR_ANY is used to remove a previous selection. 1463 * When no interface is selected, a default one is 1464 * chosen every time a multicast packet is sent. 1465 */ 1466 if (addr.s_addr == INADDR_ANY) { 1467 imo->imo_ifidx = 0; 1468 break; 1469 } 1470 /* 1471 * The selected interface is identified by its local 1472 * IP address. Find the interface and confirm that 1473 * it supports multicasting. 1474 */ 1475 memset(&sin, 0, sizeof(sin)); 1476 sin.sin_len = sizeof(sin); 1477 sin.sin_family = AF_INET; 1478 sin.sin_addr = addr; 1479 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1480 if (ia == NULL || 1481 (ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) { 1482 error = EADDRNOTAVAIL; 1483 break; 1484 } 1485 imo->imo_ifidx = ia->ia_ifp->if_index; 1486 break; 1487 1488 case IP_MULTICAST_TTL: 1489 /* 1490 * Set the IP time-to-live for outgoing multicast packets. 1491 */ 1492 if (m == NULL || m->m_len != 1) { 1493 error = EINVAL; 1494 break; 1495 } 1496 imo->imo_ttl = *(mtod(m, u_char *)); 1497 break; 1498 1499 case IP_MULTICAST_LOOP: 1500 /* 1501 * Set the loopback flag for outgoing multicast packets. 1502 * Must be zero or one. 1503 */ 1504 if (m == NULL || m->m_len != 1 || 1505 (loop = *(mtod(m, u_char *))) > 1) { 1506 error = EINVAL; 1507 break; 1508 } 1509 imo->imo_loop = loop; 1510 break; 1511 1512 case IP_ADD_MEMBERSHIP: 1513 /* 1514 * Add a multicast group membership. 1515 * Group must be a valid IP multicast address. 1516 */ 1517 if (m == NULL || !(m->m_len == sizeof(struct ip_mreq) || 1518 m->m_len == sizeof(struct ip_mreqn))) { 1519 error = EINVAL; 1520 break; 1521 } 1522 memset(&mreqn, 0, sizeof(mreqn)); 1523 memcpy(&mreqn, mtod(m, void *), m->m_len); 1524 if (!IN_MULTICAST(mreqn.imr_multiaddr.s_addr)) { 1525 error = EINVAL; 1526 break; 1527 } 1528 1529 error = ip_multicast_if(&mreqn, rtableid, &ifidx); 1530 if (error) 1531 break; 1532 1533 /* 1534 * See if we found an interface, and confirm that it 1535 * supports multicast. 1536 */ 1537 ifp = if_get(ifidx); 1538 if (ifp == NULL || ifp->if_rdomain != rtable_l2(rtableid) || 1539 (ifp->if_flags & IFF_MULTICAST) == 0) { 1540 error = EADDRNOTAVAIL; 1541 if_put(ifp); 1542 break; 1543 } 1544 1545 /* 1546 * See if the membership already exists or if all the 1547 * membership slots are full. 1548 */ 1549 for (i = 0; i < imo->imo_num_memberships; ++i) { 1550 if (imo->imo_membership[i]->inm_ifidx == ifidx && 1551 imo->imo_membership[i]->inm_addr.s_addr 1552 == mreqn.imr_multiaddr.s_addr) 1553 break; 1554 } 1555 if (i < imo->imo_num_memberships) { 1556 error = EADDRINUSE; 1557 if_put(ifp); 1558 break; 1559 } 1560 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1561 struct in_multi **nmships, **omships; 1562 size_t newmax; 1563 /* 1564 * Resize the vector to next power-of-two minus 1. If 1565 * the size would exceed the maximum then we know we've 1566 * really run out of entries. Otherwise, we reallocate 1567 * the vector. 1568 */ 1569 nmships = NULL; 1570 omships = imo->imo_membership; 1571 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1572 if (newmax <= IP_MAX_MEMBERSHIPS) { 1573 nmships = mallocarray(newmax, sizeof(*nmships), 1574 M_IPMOPTS, M_NOWAIT|M_ZERO); 1575 if (nmships != NULL) { 1576 memcpy(nmships, omships, 1577 sizeof(*omships) * 1578 imo->imo_max_memberships); 1579 free(omships, M_IPMOPTS, 1580 sizeof(*omships) * 1581 imo->imo_max_memberships); 1582 imo->imo_membership = nmships; 1583 imo->imo_max_memberships = newmax; 1584 } 1585 } 1586 if (nmships == NULL) { 1587 error = ENOBUFS; 1588 if_put(ifp); 1589 break; 1590 } 1591 } 1592 /* 1593 * Everything looks good; add a new record to the multicast 1594 * address list for the given interface. 1595 */ 1596 if ((imo->imo_membership[i] = 1597 in_addmulti(&mreqn.imr_multiaddr, ifp)) == NULL) { 1598 error = ENOBUFS; 1599 if_put(ifp); 1600 break; 1601 } 1602 ++imo->imo_num_memberships; 1603 if_put(ifp); 1604 break; 1605 1606 case IP_DROP_MEMBERSHIP: 1607 /* 1608 * Drop a multicast group membership. 1609 * Group must be a valid IP multicast address. 1610 */ 1611 if (m == NULL || !(m->m_len == sizeof(struct ip_mreq) || 1612 m->m_len == sizeof(struct ip_mreqn))) { 1613 error = EINVAL; 1614 break; 1615 } 1616 memset(&mreqn, 0, sizeof(mreqn)); 1617 memcpy(&mreqn, mtod(m, void *), m->m_len); 1618 if (!IN_MULTICAST(mreqn.imr_multiaddr.s_addr)) { 1619 error = EINVAL; 1620 break; 1621 } 1622 1623 /* 1624 * If an interface address was specified, get a pointer 1625 * to its ifnet structure. 1626 */ 1627 error = ip_multicast_if(&mreqn, rtableid, &ifidx); 1628 if (error) 1629 break; 1630 1631 /* 1632 * Find the membership in the membership array. 1633 */ 1634 for (i = 0; i < imo->imo_num_memberships; ++i) { 1635 if ((ifidx == 0 || 1636 imo->imo_membership[i]->inm_ifidx == ifidx) && 1637 imo->imo_membership[i]->inm_addr.s_addr == 1638 mreqn.imr_multiaddr.s_addr) 1639 break; 1640 } 1641 if (i == imo->imo_num_memberships) { 1642 error = EADDRNOTAVAIL; 1643 break; 1644 } 1645 /* 1646 * Give up the multicast address record to which the 1647 * membership points. 1648 */ 1649 in_delmulti(imo->imo_membership[i]); 1650 /* 1651 * Remove the gap in the membership array. 1652 */ 1653 for (++i; i < imo->imo_num_memberships; ++i) 1654 imo->imo_membership[i-1] = imo->imo_membership[i]; 1655 --imo->imo_num_memberships; 1656 break; 1657 1658 default: 1659 error = EOPNOTSUPP; 1660 break; 1661 } 1662 1663 /* 1664 * If all options have default values, no need to keep the data. 1665 */ 1666 if (imo->imo_ifidx == 0 && 1667 imo->imo_ttl == IP_DEFAULT_MULTICAST_TTL && 1668 imo->imo_loop == IP_DEFAULT_MULTICAST_LOOP && 1669 imo->imo_num_memberships == 0) { 1670 free(imo->imo_membership , M_IPMOPTS, 1671 imo->imo_max_memberships * sizeof(struct in_multi *)); 1672 free(*imop, M_IPMOPTS, sizeof(**imop)); 1673 *imop = NULL; 1674 } 1675 1676 return (error); 1677 } 1678 1679 /* 1680 * Return the IP multicast options in response to user getsockopt(). 1681 */ 1682 int 1683 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf *m) 1684 { 1685 u_char *ttl; 1686 u_char *loop; 1687 struct in_addr *addr; 1688 struct in_ifaddr *ia; 1689 struct ifnet *ifp; 1690 1691 switch (optname) { 1692 1693 case IP_MULTICAST_IF: 1694 addr = mtod(m, struct in_addr *); 1695 m->m_len = sizeof(struct in_addr); 1696 if (imo == NULL || (ifp = if_get(imo->imo_ifidx)) == NULL) 1697 addr->s_addr = INADDR_ANY; 1698 else { 1699 IFP_TO_IA(ifp, ia); 1700 if_put(ifp); 1701 addr->s_addr = (ia == NULL) ? INADDR_ANY 1702 : ia->ia_addr.sin_addr.s_addr; 1703 } 1704 return (0); 1705 1706 case IP_MULTICAST_TTL: 1707 ttl = mtod(m, u_char *); 1708 m->m_len = 1; 1709 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1710 : imo->imo_ttl; 1711 return (0); 1712 1713 case IP_MULTICAST_LOOP: 1714 loop = mtod(m, u_char *); 1715 m->m_len = 1; 1716 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1717 : imo->imo_loop; 1718 return (0); 1719 1720 default: 1721 return (EOPNOTSUPP); 1722 } 1723 } 1724 1725 /* 1726 * Discard the IP multicast options. 1727 */ 1728 void 1729 ip_freemoptions(struct ip_moptions *imo) 1730 { 1731 int i; 1732 1733 if (imo != NULL) { 1734 for (i = 0; i < imo->imo_num_memberships; ++i) 1735 in_delmulti(imo->imo_membership[i]); 1736 free(imo->imo_membership, M_IPMOPTS, 1737 imo->imo_max_memberships * sizeof(struct in_multi *)); 1738 free(imo, M_IPMOPTS, sizeof(*imo)); 1739 } 1740 } 1741 1742 /* 1743 * Routine called from ip_output() to loop back a copy of an IP multicast 1744 * packet to the input queue of a specified interface. 1745 */ 1746 void 1747 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst) 1748 { 1749 struct ip *ip; 1750 struct mbuf *copym; 1751 1752 copym = m_dup_pkt(m, max_linkhdr, M_DONTWAIT); 1753 if (copym != NULL) { 1754 /* 1755 * We don't bother to fragment if the IP length is greater 1756 * than the interface's MTU. Can this possibly matter? 1757 */ 1758 ip = mtod(copym, struct ip *); 1759 ip->ip_sum = 0; 1760 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1761 if_input_local(ifp, copym, dst->sin_family); 1762 } 1763 } 1764 1765 /* 1766 * Compute significant parts of the IPv4 checksum pseudo-header 1767 * for use in a delayed TCP/UDP checksum calculation. 1768 */ 1769 static __inline u_int16_t __attribute__((__unused__)) 1770 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto) 1771 { 1772 u_int32_t sum; 1773 1774 sum = lenproto + 1775 (u_int16_t)(src >> 16) + 1776 (u_int16_t)(src /*& 0xffff*/) + 1777 (u_int16_t)(dst >> 16) + 1778 (u_int16_t)(dst /*& 0xffff*/); 1779 1780 sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); 1781 1782 if (sum > 0xffff) 1783 sum -= 0xffff; 1784 1785 return (sum); 1786 } 1787 1788 /* 1789 * Process a delayed payload checksum calculation. 1790 */ 1791 void 1792 in_delayed_cksum(struct mbuf *m) 1793 { 1794 struct ip *ip; 1795 u_int16_t csum, offset; 1796 1797 ip = mtod(m, struct ip *); 1798 offset = ip->ip_hl << 2; 1799 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1800 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1801 csum = 0xffff; 1802 1803 switch (ip->ip_p) { 1804 case IPPROTO_TCP: 1805 offset += offsetof(struct tcphdr, th_sum); 1806 break; 1807 1808 case IPPROTO_UDP: 1809 offset += offsetof(struct udphdr, uh_sum); 1810 break; 1811 1812 case IPPROTO_ICMP: 1813 offset += offsetof(struct icmp, icmp_cksum); 1814 break; 1815 1816 default: 1817 return; 1818 } 1819 1820 if ((offset + sizeof(u_int16_t)) > m->m_len) 1821 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1822 else 1823 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1824 } 1825 1826 void 1827 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) 1828 { 1829 struct ip *ip = mtod(m, struct ip *); 1830 1831 /* some hw and in_delayed_cksum need the pseudo header cksum */ 1832 if (m->m_pkthdr.csum_flags & 1833 (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) { 1834 u_int16_t csum = 0, offset; 1835 1836 offset = ip->ip_hl << 2; 1837 if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) 1838 csum = in_cksum_phdr(ip->ip_src.s_addr, 1839 ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - 1840 offset + ip->ip_p)); 1841 if (ip->ip_p == IPPROTO_TCP) 1842 offset += offsetof(struct tcphdr, th_sum); 1843 else if (ip->ip_p == IPPROTO_UDP) 1844 offset += offsetof(struct udphdr, uh_sum); 1845 else if (ip->ip_p == IPPROTO_ICMP) 1846 offset += offsetof(struct icmp, icmp_cksum); 1847 if ((offset + sizeof(u_int16_t)) > m->m_len) 1848 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1849 else 1850 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1851 } 1852 1853 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { 1854 if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_TCPv4) || 1855 ip->ip_hl != 5) { 1856 tcpstat_inc(tcps_outswcsum); 1857 in_delayed_cksum(m); 1858 m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ 1859 } 1860 } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { 1861 if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_UDPv4) || 1862 ip->ip_hl != 5) { 1863 udpstat_inc(udps_outswcsum); 1864 in_delayed_cksum(m); 1865 m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */ 1866 } 1867 } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) { 1868 in_delayed_cksum(m); 1869 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */ 1870 } 1871 } 1872 1873 int 1874 in_ifcap_cksum(struct mbuf *m, struct ifnet *ifp, int ifcap) 1875 { 1876 if ((ifp == NULL) || 1877 !ISSET(ifp->if_capabilities, ifcap) || 1878 (ifp->if_bridgeidx != 0)) 1879 return (0); 1880 /* 1881 * Simplex interface sends packet back without hardware cksum. 1882 * Keep this check in sync with the condition where ether_resolve() 1883 * calls if_input_local(). 1884 */ 1885 if (ISSET(m->m_flags, M_BCAST) && 1886 ISSET(ifp->if_flags, IFF_SIMPLEX) && 1887 !m->m_pkthdr.pf.routed) 1888 return (0); 1889 return (1); 1890 } 1891