1 /* $OpenBSD: ip_output.c,v 1.371 2021/05/12 08:09:33 mvs Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 33 */ 34 35 #include "pf.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_enc.h> 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/ip.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_icmp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/udp.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/udp_var.h> 62 63 #if NPF > 0 64 #include <net/pfvar.h> 65 #endif 66 67 #ifdef IPSEC 68 #ifdef ENCDEBUG 69 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 70 #else 71 #define DPRINTF(x) 72 #endif 73 #endif /* IPSEC */ 74 75 int ip_pcbopts(struct mbuf **, struct mbuf *); 76 int ip_multicast_if(struct ip_mreqn *, u_int, unsigned int *); 77 int ip_setmoptions(int, struct ip_moptions **, struct mbuf *, u_int); 78 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *); 79 static __inline u_int16_t __attribute__((__unused__)) 80 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t); 81 void in_delayed_cksum(struct mbuf *); 82 int in_ifcap_cksum(struct mbuf *, struct ifnet *, int); 83 84 #ifdef IPSEC 85 struct tdb * 86 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 87 int ipsecflowinfo); 88 int 89 ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int); 90 #endif /* IPSEC */ 91 92 /* 93 * IP output. The packet in mbuf chain m contains a skeletal IP 94 * header (with len, off, ttl, proto, tos, src, dst). 95 * The mbuf chain containing the packet will be freed. 96 * The mbuf opt, if present, will not be freed. 97 */ 98 int 99 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 100 struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) 101 { 102 struct ip *ip; 103 struct ifnet *ifp = NULL; 104 struct mbuf_list fml; 105 int hlen = sizeof (struct ip); 106 int error = 0; 107 struct route iproute; 108 struct sockaddr_in *dst; 109 struct tdb *tdb = NULL; 110 u_long mtu; 111 #if NPF > 0 112 u_int orig_rtableid; 113 #endif 114 115 NET_ASSERT_LOCKED(); 116 117 #ifdef IPSEC 118 if (inp && (inp->inp_flags & INP_IPV6) != 0) 119 panic("ip_output: IPv6 pcb is passed"); 120 #endif /* IPSEC */ 121 122 #ifdef DIAGNOSTIC 123 if ((m->m_flags & M_PKTHDR) == 0) 124 panic("ip_output no HDR"); 125 #endif 126 if (opt) 127 m = ip_insertoptions(m, opt, &hlen); 128 129 ip = mtod(m, struct ip *); 130 131 /* 132 * Fill in IP header. 133 */ 134 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 135 ip->ip_v = IPVERSION; 136 ip->ip_off &= htons(IP_DF); 137 ip->ip_id = htons(ip_randomid()); 138 ip->ip_hl = hlen >> 2; 139 ipstat_inc(ips_localout); 140 } else { 141 hlen = ip->ip_hl << 2; 142 } 143 144 /* 145 * We should not send traffic to 0/8 say both Stevens and RFCs 146 * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6. 147 */ 148 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) { 149 error = ENETUNREACH; 150 goto bad; 151 } 152 153 #if NPF > 0 154 orig_rtableid = m->m_pkthdr.ph_rtableid; 155 reroute: 156 #endif 157 158 /* 159 * Do a route lookup now in case we need the source address to 160 * do an SPD lookup in IPsec; for most packets, the source address 161 * is set at a higher level protocol. ICMPs and other packets 162 * though (e.g., traceroute) have a source address of zeroes. 163 */ 164 if (ro == NULL) { 165 ro = &iproute; 166 memset(ro, 0, sizeof(*ro)); 167 } 168 169 dst = satosin(&ro->ro_dst); 170 171 /* 172 * If there is a cached route, check that it is to the same 173 * destination and is still up. If not, free it and try again. 174 */ 175 if (!rtisvalid(ro->ro_rt) || 176 dst->sin_addr.s_addr != ip->ip_dst.s_addr || 177 ro->ro_tableid != m->m_pkthdr.ph_rtableid) { 178 rtfree(ro->ro_rt); 179 ro->ro_rt = NULL; 180 } 181 182 if (ro->ro_rt == NULL) { 183 dst->sin_family = AF_INET; 184 dst->sin_len = sizeof(*dst); 185 dst->sin_addr = ip->ip_dst; 186 ro->ro_tableid = m->m_pkthdr.ph_rtableid; 187 } 188 189 if ((IN_MULTICAST(ip->ip_dst.s_addr) || 190 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 191 imo != NULL && (ifp = if_get(imo->imo_ifidx)) != NULL) { 192 193 mtu = ifp->if_mtu; 194 if (ip->ip_src.s_addr == INADDR_ANY) { 195 struct in_ifaddr *ia; 196 197 IFP_TO_IA(ifp, ia); 198 if (ia != NULL) 199 ip->ip_src = ia->ia_addr.sin_addr; 200 } 201 } else { 202 struct in_ifaddr *ia; 203 204 if (ro->ro_rt == NULL) 205 ro->ro_rt = rtalloc_mpath(&ro->ro_dst, 206 &ip->ip_src.s_addr, ro->ro_tableid); 207 208 if (ro->ro_rt == NULL) { 209 ipstat_inc(ips_noroute); 210 error = EHOSTUNREACH; 211 goto bad; 212 } 213 214 ia = ifatoia(ro->ro_rt->rt_ifa); 215 if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL)) 216 ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid)); 217 else 218 ifp = if_get(ro->ro_rt->rt_ifidx); 219 /* 220 * We aren't using rtisvalid() here because the UP/DOWN state 221 * machine is broken with some Ethernet drivers like em(4). 222 * As a result we might try to use an invalid cached route 223 * entry while an interface is being detached. 224 */ 225 if (ifp == NULL) { 226 ipstat_inc(ips_noroute); 227 error = EHOSTUNREACH; 228 goto bad; 229 } 230 if ((mtu = ro->ro_rt->rt_mtu) == 0) 231 mtu = ifp->if_mtu; 232 233 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 234 dst = satosin(ro->ro_rt->rt_gateway); 235 236 /* Set the source IP address */ 237 if (ip->ip_src.s_addr == INADDR_ANY && ia) 238 ip->ip_src = ia->ia_addr.sin_addr; 239 } 240 241 #ifdef IPSEC 242 if (ipsec_in_use || inp != NULL) { 243 /* Do we have any pending SAs to apply ? */ 244 tdb = ip_output_ipsec_lookup(m, hlen, &error, inp, 245 ipsecflowinfo); 246 if (error != 0) { 247 /* Should silently drop packet */ 248 if (error == -EINVAL) 249 error = 0; 250 goto bad; 251 } 252 if (tdb != NULL) { 253 /* 254 * If it needs TCP/UDP hardware-checksumming, do the 255 * computation now. 256 */ 257 in_proto_cksum_out(m, NULL); 258 } 259 } 260 #endif /* IPSEC */ 261 262 if (IN_MULTICAST(ip->ip_dst.s_addr) || 263 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 264 265 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 266 M_BCAST : M_MCAST; 267 268 /* 269 * IP destination address is multicast. Make sure "dst" 270 * still points to the address in "ro". (It may have been 271 * changed to point to a gateway address, above.) 272 */ 273 dst = satosin(&ro->ro_dst); 274 275 /* 276 * See if the caller provided any multicast options 277 */ 278 if (imo != NULL) 279 ip->ip_ttl = imo->imo_ttl; 280 else 281 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 282 283 /* 284 * if we don't know the outgoing ifp yet, we can't generate 285 * output 286 */ 287 if (!ifp) { 288 ipstat_inc(ips_noroute); 289 error = EHOSTUNREACH; 290 goto bad; 291 } 292 293 /* 294 * Confirm that the outgoing interface supports multicast, 295 * but only if the packet actually is going out on that 296 * interface (i.e., no IPsec is applied). 297 */ 298 if ((((m->m_flags & M_MCAST) && 299 (ifp->if_flags & IFF_MULTICAST) == 0) || 300 ((m->m_flags & M_BCAST) && 301 (ifp->if_flags & IFF_BROADCAST) == 0)) && (tdb == NULL)) { 302 ipstat_inc(ips_noroute); 303 error = ENETUNREACH; 304 goto bad; 305 } 306 307 /* 308 * If source address not specified yet, use address 309 * of outgoing interface. 310 */ 311 if (ip->ip_src.s_addr == INADDR_ANY) { 312 struct in_ifaddr *ia; 313 314 IFP_TO_IA(ifp, ia); 315 if (ia != NULL) 316 ip->ip_src = ia->ia_addr.sin_addr; 317 } 318 319 if ((imo == NULL || imo->imo_loop) && 320 in_hasmulti(&ip->ip_dst, ifp)) { 321 /* 322 * If we belong to the destination multicast group 323 * on the outgoing interface, and the caller did not 324 * forbid loopback, loop back a copy. 325 * Can't defer TCP/UDP checksumming, do the 326 * computation now. 327 */ 328 in_proto_cksum_out(m, NULL); 329 ip_mloopback(ifp, m, dst); 330 } 331 #ifdef MROUTING 332 else { 333 /* 334 * If we are acting as a multicast router, perform 335 * multicast forwarding as if the packet had just 336 * arrived on the interface to which we are about 337 * to send. The multicast forwarding function 338 * recursively calls this function, using the 339 * IP_FORWARDING flag to prevent infinite recursion. 340 * 341 * Multicasts that are looped back by ip_mloopback(), 342 * above, will be forwarded by the ip_input() routine, 343 * if necessary. 344 */ 345 if (ipmforwarding && ip_mrouter[ifp->if_rdomain] && 346 (flags & IP_FORWARDING) == 0) { 347 int rv; 348 349 KERNEL_LOCK(); 350 rv = ip_mforward(m, ifp); 351 KERNEL_UNLOCK(); 352 if (rv != 0) 353 goto bad; 354 } 355 } 356 #endif 357 /* 358 * Multicasts with a time-to-live of zero may be looped- 359 * back, above, but must not be transmitted on a network. 360 * Also, multicasts addressed to the loopback interface 361 * are not sent -- the above call to ip_mloopback() will 362 * loop back a copy if this host actually belongs to the 363 * destination group on the loopback interface. 364 */ 365 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) 366 goto bad; 367 368 goto sendit; 369 } 370 371 /* 372 * Look for broadcast address and verify user is allowed to send 373 * such a packet; if the packet is going in an IPsec tunnel, skip 374 * this check. 375 */ 376 if ((tdb == NULL) && ((dst->sin_addr.s_addr == INADDR_BROADCAST) || 377 (ro && ro->ro_rt && ISSET(ro->ro_rt->rt_flags, RTF_BROADCAST)))) { 378 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 379 error = EADDRNOTAVAIL; 380 goto bad; 381 } 382 if ((flags & IP_ALLOWBROADCAST) == 0) { 383 error = EACCES; 384 goto bad; 385 } 386 387 /* Don't allow broadcast messages to be fragmented */ 388 if (ntohs(ip->ip_len) > ifp->if_mtu) { 389 error = EMSGSIZE; 390 goto bad; 391 } 392 m->m_flags |= M_BCAST; 393 } else 394 m->m_flags &= ~M_BCAST; 395 396 sendit: 397 /* 398 * If we're doing Path MTU discovery, we need to set DF unless 399 * the route's MTU is locked. 400 */ 401 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 402 (ro->ro_rt->rt_locks & RTV_MTU) == 0) 403 ip->ip_off |= htons(IP_DF); 404 405 #ifdef IPSEC 406 /* 407 * Check if the packet needs encapsulation. 408 */ 409 if (tdb != NULL) { 410 /* Callee frees mbuf */ 411 error = ip_output_ipsec_send(tdb, m, ro, 412 (flags & IP_FORWARDING) ? 1 : 0); 413 goto done; 414 } 415 #endif /* IPSEC */ 416 417 /* 418 * Packet filter 419 */ 420 #if NPF > 0 421 if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT, 422 ifp, &m) != PF_PASS) { 423 error = EACCES; 424 goto bad; 425 } 426 if (m == NULL) 427 goto done; 428 ip = mtod(m, struct ip *); 429 hlen = ip->ip_hl << 2; 430 if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) == 431 (PF_TAG_REROUTE | PF_TAG_GENERATED)) 432 /* already rerun the route lookup, go on */ 433 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE); 434 else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { 435 /* tag as generated to skip over pf_test on rerun */ 436 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 437 ro = NULL; 438 if_put(ifp); /* drop reference since target changed */ 439 ifp = NULL; 440 goto reroute; 441 } 442 #endif 443 in_proto_cksum_out(m, ifp); 444 445 #ifdef IPSEC 446 if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) && 447 (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) { 448 error = EHOSTUNREACH; 449 goto bad; 450 } 451 #endif 452 453 /* 454 * If small enough for interface, can just send directly. 455 */ 456 if (ntohs(ip->ip_len) <= mtu) { 457 ip->ip_sum = 0; 458 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) 459 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 460 else { 461 ipstat_inc(ips_outswcsum); 462 ip->ip_sum = in_cksum(m, hlen); 463 } 464 465 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 466 goto done; 467 } 468 469 /* 470 * Too large for interface; fragment if possible. 471 * Must be able to put at least 8 bytes per fragment. 472 */ 473 if (ip->ip_off & htons(IP_DF)) { 474 #ifdef IPSEC 475 if (ip_mtudisc) 476 ipsec_adjust_mtu(m, ifp->if_mtu); 477 #endif 478 error = EMSGSIZE; 479 #if NPF > 0 480 /* pf changed routing table, use orig rtable for path MTU */ 481 if (ro->ro_tableid != orig_rtableid) { 482 rtfree(ro->ro_rt); 483 ro->ro_tableid = orig_rtableid; 484 ro->ro_rt = icmp_mtudisc_clone( 485 satosin(&ro->ro_dst)->sin_addr, ro->ro_tableid, 0); 486 } 487 #endif 488 /* 489 * This case can happen if the user changed the MTU 490 * of an interface after enabling IP on it. Because 491 * most netifs don't keep track of routes pointing to 492 * them, there is no way for one to update all its 493 * routes when the MTU is changed. 494 */ 495 if (rtisvalid(ro->ro_rt) && 496 ISSET(ro->ro_rt->rt_flags, RTF_HOST) && 497 !(ro->ro_rt->rt_locks & RTV_MTU) && 498 (ro->ro_rt->rt_mtu > ifp->if_mtu)) { 499 ro->ro_rt->rt_mtu = ifp->if_mtu; 500 } 501 ipstat_inc(ips_cantfrag); 502 goto bad; 503 } 504 505 error = ip_fragment(m, &fml, ifp, mtu); 506 if (error) 507 goto done; 508 509 while ((m = ml_dequeue(&fml)) != NULL) { 510 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 511 if (error) 512 break; 513 } 514 if (error) 515 ml_purge(&fml); 516 else 517 ipstat_inc(ips_fragmented); 518 519 done: 520 if (ro == &iproute && ro->ro_rt) 521 rtfree(ro->ro_rt); 522 if_put(ifp); 523 return (error); 524 525 bad: 526 m_freem(m); 527 goto done; 528 } 529 530 #ifdef IPSEC 531 struct tdb * 532 ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp, 533 int ipsecflowinfo) 534 { 535 struct m_tag *mtag; 536 struct tdb_ident *tdbi; 537 struct tdb *tdb; 538 539 /* Do we have any pending SAs to apply ? */ 540 tdb = ipsp_spd_lookup(m, AF_INET, hlen, error, IPSP_DIRECTION_OUT, 541 NULL, inp, ipsecflowinfo); 542 if (tdb == NULL) 543 return NULL; 544 /* Loop detection */ 545 for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { 546 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE) 547 continue; 548 tdbi = (struct tdb_ident *)(mtag + 1); 549 if (tdbi->spi == tdb->tdb_spi && 550 tdbi->proto == tdb->tdb_sproto && 551 tdbi->rdomain == tdb->tdb_rdomain && 552 !memcmp(&tdbi->dst, &tdb->tdb_dst, 553 sizeof(union sockaddr_union))) { 554 /* no IPsec needed */ 555 return NULL; 556 } 557 } 558 return tdb; 559 } 560 561 int 562 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, int fwd) 563 { 564 #if NPF > 0 565 struct ifnet *encif; 566 #endif 567 struct ip *ip; 568 int error; 569 570 #if NPF > 0 571 /* 572 * Packet filter 573 */ 574 if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || 575 pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) { 576 m_freem(m); 577 return EACCES; 578 } 579 if (m == NULL) 580 return 0; 581 /* 582 * PF_TAG_REROUTE handling or not... 583 * Packet is entering IPsec so the routing is 584 * already overruled by the IPsec policy. 585 * Until now the change was not reconsidered. 586 * What's the behaviour? 587 */ 588 in_proto_cksum_out(m, encif); 589 #endif 590 591 /* Check if we are allowed to fragment */ 592 ip = mtod(m, struct ip *); 593 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && 594 ntohs(ip->ip_len) > tdb->tdb_mtu && 595 tdb->tdb_mtutimeout > gettime()) { 596 struct rtentry *rt = NULL; 597 int rt_mtucloned = 0; 598 int transportmode = 0; 599 600 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) && 601 (tdb->tdb_dst.sin.sin_addr.s_addr == ip->ip_dst.s_addr); 602 603 /* Find a host route to store the mtu in */ 604 if (ro != NULL) 605 rt = ro->ro_rt; 606 /* but don't add a PMTU route for transport mode SAs */ 607 if (transportmode) 608 rt = NULL; 609 else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 610 rt = icmp_mtudisc_clone(ip->ip_dst, 611 m->m_pkthdr.ph_rtableid, 1); 612 rt_mtucloned = 1; 613 } 614 DPRINTF(("%s: spi %08x mtu %d rt %p cloned %d\n", __func__, 615 ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned)); 616 if (rt != NULL) { 617 rt->rt_mtu = tdb->tdb_mtu; 618 if (ro != NULL && ro->ro_rt != NULL) { 619 rtfree(ro->ro_rt); 620 ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE, 621 m->m_pkthdr.ph_rtableid); 622 } 623 if (rt_mtucloned) 624 rtfree(rt); 625 } 626 ipsec_adjust_mtu(m, tdb->tdb_mtu); 627 m_freem(m); 628 return EMSGSIZE; 629 } 630 /* propagate IP_DF for v4-over-v6 */ 631 if (ip_mtudisc && ip->ip_off & htons(IP_DF)) 632 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 633 634 /* 635 * Clear these -- they'll be set in the recursive invocation 636 * as needed. 637 */ 638 m->m_flags &= ~(M_MCAST | M_BCAST); 639 640 /* Callee frees mbuf */ 641 error = ipsp_process_packet(m, tdb, AF_INET, 0); 642 if (error) { 643 ipsecstat_inc(ipsec_odrops); 644 tdb->tdb_odrops++; 645 } 646 return error; 647 } 648 #endif /* IPSEC */ 649 650 int 651 ip_fragment(struct mbuf *m, struct mbuf_list *fml, struct ifnet *ifp, 652 u_long mtu) 653 { 654 struct ip *ip, *mhip; 655 struct mbuf *m0; 656 int len, hlen, off; 657 int mhlen, firstlen; 658 int error; 659 660 ml_init(fml); 661 ml_enqueue(fml, m); 662 663 ip = mtod(m, struct ip *); 664 hlen = ip->ip_hl << 2; 665 len = (mtu - hlen) &~ 7; 666 if (len < 8) { 667 error = EMSGSIZE; 668 goto bad; 669 } 670 671 /* 672 * If we are doing fragmentation, we can't defer TCP/UDP 673 * checksumming; compute the checksum and clear the flag. 674 */ 675 in_proto_cksum_out(m, NULL); 676 firstlen = len; 677 678 /* 679 * Loop through length of segment after first fragment, 680 * make new header and copy data of each part and link onto chain. 681 */ 682 m0 = m; 683 mhlen = sizeof (struct ip); 684 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 685 MGETHDR(m, M_DONTWAIT, MT_HEADER); 686 if (m == NULL) { 687 error = ENOBUFS; 688 goto bad; 689 } 690 ml_enqueue(fml, m); 691 if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0) 692 goto bad; 693 m->m_data += max_linkhdr; 694 mhip = mtod(m, struct ip *); 695 *mhip = *ip; 696 if (hlen > sizeof (struct ip)) { 697 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 698 mhip->ip_hl = mhlen >> 2; 699 } 700 m->m_len = mhlen; 701 mhip->ip_off = ((off - hlen) >> 3) + 702 (ntohs(ip->ip_off) & ~IP_MF); 703 if (ip->ip_off & htons(IP_MF)) 704 mhip->ip_off |= IP_MF; 705 if (off + len >= ntohs(ip->ip_len)) 706 len = ntohs(ip->ip_len) - off; 707 else 708 mhip->ip_off |= IP_MF; 709 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 710 m->m_next = m_copym(m0, off, len, M_NOWAIT); 711 if (m->m_next == NULL) { 712 error = ENOBUFS; 713 goto bad; 714 } 715 m->m_pkthdr.len = mhlen + len; 716 m->m_pkthdr.ph_ifidx = 0; 717 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 718 mhip->ip_sum = 0; 719 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) 720 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 721 else { 722 ipstat_inc(ips_outswcsum); 723 mhip->ip_sum = in_cksum(m, mhlen); 724 } 725 } 726 /* 727 * Update first fragment by trimming what's been copied out 728 * and updating header, then send each fragment (in order). 729 */ 730 m = m0; 731 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 732 m->m_pkthdr.len = hlen + firstlen; 733 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 734 ip->ip_off |= htons(IP_MF); 735 ip->ip_sum = 0; 736 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) 737 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 738 else { 739 ipstat_inc(ips_outswcsum); 740 ip->ip_sum = in_cksum(m, hlen); 741 } 742 743 ipstat_add(ips_ofragments, ml_len(fml)); 744 return (0); 745 746 bad: 747 ipstat_inc(ips_odropped); 748 ml_purge(fml); 749 return (error); 750 } 751 752 /* 753 * Insert IP options into preformed packet. 754 * Adjust IP destination as required for IP source routing, 755 * as indicated by a non-zero in_addr at the start of the options. 756 */ 757 struct mbuf * 758 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 759 { 760 struct ipoption *p = mtod(opt, struct ipoption *); 761 struct mbuf *n; 762 struct ip *ip = mtod(m, struct ip *); 763 unsigned int optlen; 764 765 optlen = opt->m_len - sizeof(p->ipopt_dst); 766 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 767 return (m); /* XXX should fail */ 768 769 /* check if options will fit to IP header */ 770 if ((optlen + sizeof(struct ip)) > (0x0f << 2)) { 771 *phlen = sizeof(struct ip); 772 return (m); 773 } 774 775 if (p->ipopt_dst.s_addr) 776 ip->ip_dst = p->ipopt_dst; 777 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 778 MGETHDR(n, M_DONTWAIT, MT_HEADER); 779 if (n == NULL) 780 return (m); 781 M_MOVE_HDR(n, m); 782 n->m_pkthdr.len += optlen; 783 m->m_len -= sizeof(struct ip); 784 m->m_data += sizeof(struct ip); 785 n->m_next = m; 786 m = n; 787 m->m_len = optlen + sizeof(struct ip); 788 m->m_data += max_linkhdr; 789 memcpy(mtod(m, caddr_t), ip, sizeof(struct ip)); 790 } else { 791 m->m_data -= optlen; 792 m->m_len += optlen; 793 m->m_pkthdr.len += optlen; 794 memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip)); 795 } 796 ip = mtod(m, struct ip *); 797 memcpy(ip + 1, p->ipopt_list, optlen); 798 *phlen = sizeof(struct ip) + optlen; 799 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 800 return (m); 801 } 802 803 /* 804 * Copy options from ip to jp, 805 * omitting those not copied during fragmentation. 806 */ 807 int 808 ip_optcopy(struct ip *ip, struct ip *jp) 809 { 810 u_char *cp, *dp; 811 int opt, optlen, cnt; 812 813 cp = (u_char *)(ip + 1); 814 dp = (u_char *)(jp + 1); 815 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 816 for (; cnt > 0; cnt -= optlen, cp += optlen) { 817 opt = cp[0]; 818 if (opt == IPOPT_EOL) 819 break; 820 if (opt == IPOPT_NOP) { 821 /* Preserve for IP mcast tunnel's LSRR alignment. */ 822 *dp++ = IPOPT_NOP; 823 optlen = 1; 824 continue; 825 } 826 #ifdef DIAGNOSTIC 827 if (cnt < IPOPT_OLEN + sizeof(*cp)) 828 panic("malformed IPv4 option passed to ip_optcopy"); 829 #endif 830 optlen = cp[IPOPT_OLEN]; 831 #ifdef DIAGNOSTIC 832 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 833 panic("malformed IPv4 option passed to ip_optcopy"); 834 #endif 835 /* bogus lengths should have been caught by ip_dooptions */ 836 if (optlen > cnt) 837 optlen = cnt; 838 if (IPOPT_COPIED(opt)) { 839 memcpy(dp, cp, optlen); 840 dp += optlen; 841 } 842 } 843 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 844 *dp++ = IPOPT_EOL; 845 return (optlen); 846 } 847 848 /* 849 * IP socket option processing. 850 */ 851 int 852 ip_ctloutput(int op, struct socket *so, int level, int optname, 853 struct mbuf *m) 854 { 855 struct inpcb *inp = sotoinpcb(so); 856 int optval = 0; 857 struct proc *p = curproc; /* XXX */ 858 int error = 0; 859 u_int rtableid, rtid = 0; 860 861 if (level != IPPROTO_IP) 862 return (EINVAL); 863 864 rtableid = p->p_p->ps_rtableid; 865 866 switch (op) { 867 case PRCO_SETOPT: 868 switch (optname) { 869 case IP_OPTIONS: 870 return (ip_pcbopts(&inp->inp_options, m)); 871 872 case IP_TOS: 873 case IP_TTL: 874 case IP_MINTTL: 875 case IP_RECVOPTS: 876 case IP_RECVRETOPTS: 877 case IP_RECVDSTADDR: 878 case IP_RECVIF: 879 case IP_RECVTTL: 880 case IP_RECVDSTPORT: 881 case IP_RECVRTABLE: 882 case IP_IPSECFLOWINFO: 883 if (m == NULL || m->m_len != sizeof(int)) 884 error = EINVAL; 885 else { 886 optval = *mtod(m, int *); 887 switch (optname) { 888 889 case IP_TOS: 890 inp->inp_ip.ip_tos = optval; 891 break; 892 893 case IP_TTL: 894 if (optval > 0 && optval <= MAXTTL) 895 inp->inp_ip.ip_ttl = optval; 896 else if (optval == -1) 897 inp->inp_ip.ip_ttl = ip_defttl; 898 else 899 error = EINVAL; 900 break; 901 902 case IP_MINTTL: 903 if (optval >= 0 && optval <= MAXTTL) 904 inp->inp_ip_minttl = optval; 905 else 906 error = EINVAL; 907 break; 908 #define OPTSET(bit) \ 909 if (optval) \ 910 inp->inp_flags |= bit; \ 911 else \ 912 inp->inp_flags &= ~bit; 913 914 case IP_RECVOPTS: 915 OPTSET(INP_RECVOPTS); 916 break; 917 918 case IP_RECVRETOPTS: 919 OPTSET(INP_RECVRETOPTS); 920 break; 921 922 case IP_RECVDSTADDR: 923 OPTSET(INP_RECVDSTADDR); 924 break; 925 case IP_RECVIF: 926 OPTSET(INP_RECVIF); 927 break; 928 case IP_RECVTTL: 929 OPTSET(INP_RECVTTL); 930 break; 931 case IP_RECVDSTPORT: 932 OPTSET(INP_RECVDSTPORT); 933 break; 934 case IP_RECVRTABLE: 935 OPTSET(INP_RECVRTABLE); 936 break; 937 case IP_IPSECFLOWINFO: 938 OPTSET(INP_IPSECFLOWINFO); 939 break; 940 } 941 } 942 break; 943 #undef OPTSET 944 945 case IP_MULTICAST_IF: 946 case IP_MULTICAST_TTL: 947 case IP_MULTICAST_LOOP: 948 case IP_ADD_MEMBERSHIP: 949 case IP_DROP_MEMBERSHIP: 950 error = ip_setmoptions(optname, &inp->inp_moptions, m, 951 inp->inp_rtableid); 952 break; 953 954 case IP_PORTRANGE: 955 if (m == NULL || m->m_len != sizeof(int)) 956 error = EINVAL; 957 else { 958 optval = *mtod(m, int *); 959 960 switch (optval) { 961 962 case IP_PORTRANGE_DEFAULT: 963 inp->inp_flags &= ~(INP_LOWPORT); 964 inp->inp_flags &= ~(INP_HIGHPORT); 965 break; 966 967 case IP_PORTRANGE_HIGH: 968 inp->inp_flags &= ~(INP_LOWPORT); 969 inp->inp_flags |= INP_HIGHPORT; 970 break; 971 972 case IP_PORTRANGE_LOW: 973 inp->inp_flags &= ~(INP_HIGHPORT); 974 inp->inp_flags |= INP_LOWPORT; 975 break; 976 977 default: 978 979 error = EINVAL; 980 break; 981 } 982 } 983 break; 984 case IP_AUTH_LEVEL: 985 case IP_ESP_TRANS_LEVEL: 986 case IP_ESP_NETWORK_LEVEL: 987 case IP_IPCOMP_LEVEL: 988 #ifndef IPSEC 989 error = EOPNOTSUPP; 990 #else 991 if (m == NULL || m->m_len != sizeof(int)) { 992 error = EINVAL; 993 break; 994 } 995 optval = *mtod(m, int *); 996 997 if (optval < IPSEC_LEVEL_BYPASS || 998 optval > IPSEC_LEVEL_UNIQUE) { 999 error = EINVAL; 1000 break; 1001 } 1002 1003 switch (optname) { 1004 case IP_AUTH_LEVEL: 1005 if (optval < IPSEC_AUTH_LEVEL_DEFAULT && 1006 suser(p)) { 1007 error = EACCES; 1008 break; 1009 } 1010 inp->inp_seclevel[SL_AUTH] = optval; 1011 break; 1012 1013 case IP_ESP_TRANS_LEVEL: 1014 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT && 1015 suser(p)) { 1016 error = EACCES; 1017 break; 1018 } 1019 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1020 break; 1021 1022 case IP_ESP_NETWORK_LEVEL: 1023 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT && 1024 suser(p)) { 1025 error = EACCES; 1026 break; 1027 } 1028 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1029 break; 1030 case IP_IPCOMP_LEVEL: 1031 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT && 1032 suser(p)) { 1033 error = EACCES; 1034 break; 1035 } 1036 inp->inp_seclevel[SL_IPCOMP] = optval; 1037 break; 1038 } 1039 #endif 1040 break; 1041 1042 case IP_IPSEC_LOCAL_ID: 1043 case IP_IPSEC_REMOTE_ID: 1044 error = EOPNOTSUPP; 1045 break; 1046 case SO_RTABLE: 1047 if (m == NULL || m->m_len < sizeof(u_int)) { 1048 error = EINVAL; 1049 break; 1050 } 1051 rtid = *mtod(m, u_int *); 1052 if (inp->inp_rtableid == rtid) 1053 break; 1054 /* needs privileges to switch when already set */ 1055 if (rtableid != rtid && rtableid != 0 && 1056 (error = suser(p)) != 0) 1057 break; 1058 /* table must exist */ 1059 if (!rtable_exists(rtid)) { 1060 error = EINVAL; 1061 break; 1062 } 1063 if (inp->inp_lport) { 1064 error = EBUSY; 1065 break; 1066 } 1067 inp->inp_rtableid = rtid; 1068 in_pcbrehash(inp); 1069 break; 1070 case IP_PIPEX: 1071 if (m != NULL && m->m_len == sizeof(int)) 1072 inp->inp_pipex = *mtod(m, int *); 1073 else 1074 error = EINVAL; 1075 break; 1076 1077 default: 1078 error = ENOPROTOOPT; 1079 break; 1080 } 1081 break; 1082 1083 case PRCO_GETOPT: 1084 switch (optname) { 1085 case IP_OPTIONS: 1086 case IP_RETOPTS: 1087 if (inp->inp_options) { 1088 m->m_len = inp->inp_options->m_len; 1089 memcpy(mtod(m, caddr_t), 1090 mtod(inp->inp_options, caddr_t), m->m_len); 1091 } else 1092 m->m_len = 0; 1093 break; 1094 1095 case IP_TOS: 1096 case IP_TTL: 1097 case IP_MINTTL: 1098 case IP_RECVOPTS: 1099 case IP_RECVRETOPTS: 1100 case IP_RECVDSTADDR: 1101 case IP_RECVIF: 1102 case IP_RECVTTL: 1103 case IP_RECVDSTPORT: 1104 case IP_RECVRTABLE: 1105 case IP_IPSECFLOWINFO: 1106 case IP_IPDEFTTL: 1107 m->m_len = sizeof(int); 1108 switch (optname) { 1109 1110 case IP_TOS: 1111 optval = inp->inp_ip.ip_tos; 1112 break; 1113 1114 case IP_TTL: 1115 optval = inp->inp_ip.ip_ttl; 1116 break; 1117 1118 case IP_MINTTL: 1119 optval = inp->inp_ip_minttl; 1120 break; 1121 1122 case IP_IPDEFTTL: 1123 optval = ip_defttl; 1124 break; 1125 1126 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1127 1128 case IP_RECVOPTS: 1129 optval = OPTBIT(INP_RECVOPTS); 1130 break; 1131 1132 case IP_RECVRETOPTS: 1133 optval = OPTBIT(INP_RECVRETOPTS); 1134 break; 1135 1136 case IP_RECVDSTADDR: 1137 optval = OPTBIT(INP_RECVDSTADDR); 1138 break; 1139 case IP_RECVIF: 1140 optval = OPTBIT(INP_RECVIF); 1141 break; 1142 case IP_RECVTTL: 1143 optval = OPTBIT(INP_RECVTTL); 1144 break; 1145 case IP_RECVDSTPORT: 1146 optval = OPTBIT(INP_RECVDSTPORT); 1147 break; 1148 case IP_RECVRTABLE: 1149 optval = OPTBIT(INP_RECVRTABLE); 1150 break; 1151 case IP_IPSECFLOWINFO: 1152 optval = OPTBIT(INP_IPSECFLOWINFO); 1153 break; 1154 } 1155 *mtod(m, int *) = optval; 1156 break; 1157 1158 case IP_MULTICAST_IF: 1159 case IP_MULTICAST_TTL: 1160 case IP_MULTICAST_LOOP: 1161 case IP_ADD_MEMBERSHIP: 1162 case IP_DROP_MEMBERSHIP: 1163 error = ip_getmoptions(optname, inp->inp_moptions, m); 1164 break; 1165 1166 case IP_PORTRANGE: 1167 m->m_len = sizeof(int); 1168 1169 if (inp->inp_flags & INP_HIGHPORT) 1170 optval = IP_PORTRANGE_HIGH; 1171 else if (inp->inp_flags & INP_LOWPORT) 1172 optval = IP_PORTRANGE_LOW; 1173 else 1174 optval = 0; 1175 1176 *mtod(m, int *) = optval; 1177 break; 1178 1179 case IP_AUTH_LEVEL: 1180 case IP_ESP_TRANS_LEVEL: 1181 case IP_ESP_NETWORK_LEVEL: 1182 case IP_IPCOMP_LEVEL: 1183 #ifndef IPSEC 1184 m->m_len = sizeof(int); 1185 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1186 #else 1187 m->m_len = sizeof(int); 1188 switch (optname) { 1189 case IP_AUTH_LEVEL: 1190 optval = inp->inp_seclevel[SL_AUTH]; 1191 break; 1192 1193 case IP_ESP_TRANS_LEVEL: 1194 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1195 break; 1196 1197 case IP_ESP_NETWORK_LEVEL: 1198 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1199 break; 1200 case IP_IPCOMP_LEVEL: 1201 optval = inp->inp_seclevel[SL_IPCOMP]; 1202 break; 1203 } 1204 *mtod(m, int *) = optval; 1205 #endif 1206 break; 1207 case IP_IPSEC_LOCAL_ID: 1208 case IP_IPSEC_REMOTE_ID: 1209 error = EOPNOTSUPP; 1210 break; 1211 case SO_RTABLE: 1212 m->m_len = sizeof(u_int); 1213 *mtod(m, u_int *) = inp->inp_rtableid; 1214 break; 1215 case IP_PIPEX: 1216 m->m_len = sizeof(int); 1217 *mtod(m, int *) = inp->inp_pipex; 1218 break; 1219 default: 1220 error = ENOPROTOOPT; 1221 break; 1222 } 1223 break; 1224 } 1225 return (error); 1226 } 1227 1228 /* 1229 * Set up IP options in pcb for insertion in output packets. 1230 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1231 * with destination address if source routed. 1232 */ 1233 int 1234 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m) 1235 { 1236 struct mbuf *n; 1237 struct ipoption *p; 1238 int cnt, off, optlen; 1239 u_char *cp; 1240 u_char opt; 1241 1242 /* turn off any old options */ 1243 m_freem(*pcbopt); 1244 *pcbopt = NULL; 1245 if (m == NULL || m->m_len == 0) { 1246 /* 1247 * Only turning off any previous options. 1248 */ 1249 return (0); 1250 } 1251 1252 if (m->m_len % sizeof(int32_t) || 1253 m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1254 return (EINVAL); 1255 1256 /* Don't sleep because NET_LOCK() is hold. */ 1257 if ((n = m_get(M_NOWAIT, MT_SOOPTS)) == NULL) 1258 return (ENOBUFS); 1259 p = mtod(n, struct ipoption *); 1260 memset(p, 0, sizeof (*p)); /* 0 = IPOPT_EOL, needed for padding */ 1261 n->m_len = sizeof(struct in_addr); 1262 1263 off = 0; 1264 cnt = m->m_len; 1265 cp = mtod(m, u_char *); 1266 1267 while (cnt > 0) { 1268 opt = cp[IPOPT_OPTVAL]; 1269 1270 if (opt == IPOPT_NOP || opt == IPOPT_EOL) { 1271 optlen = 1; 1272 } else { 1273 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1274 goto bad; 1275 optlen = cp[IPOPT_OLEN]; 1276 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1277 goto bad; 1278 } 1279 switch (opt) { 1280 default: 1281 memcpy(p->ipopt_list + off, cp, optlen); 1282 break; 1283 1284 case IPOPT_LSRR: 1285 case IPOPT_SSRR: 1286 /* 1287 * user process specifies route as: 1288 * ->A->B->C->D 1289 * D must be our final destination (but we can't 1290 * check that since we may not have connected yet). 1291 * A is first hop destination, which doesn't appear in 1292 * actual IP option, but is stored before the options. 1293 */ 1294 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1295 goto bad; 1296 1297 /* 1298 * Optlen is smaller because first address is popped. 1299 * Cnt and cp will be adjusted a bit later to reflect 1300 * this. 1301 */ 1302 optlen -= sizeof(struct in_addr); 1303 p->ipopt_list[off + IPOPT_OPTVAL] = opt; 1304 p->ipopt_list[off + IPOPT_OLEN] = optlen; 1305 1306 /* 1307 * Move first hop before start of options. 1308 */ 1309 memcpy(&p->ipopt_dst, cp + IPOPT_OFFSET, 1310 sizeof(struct in_addr)); 1311 cp += sizeof(struct in_addr); 1312 cnt -= sizeof(struct in_addr); 1313 /* 1314 * Then copy rest of options 1315 */ 1316 memcpy(p->ipopt_list + off + IPOPT_OFFSET, 1317 cp + IPOPT_OFFSET, optlen - IPOPT_OFFSET); 1318 break; 1319 } 1320 off += optlen; 1321 cp += optlen; 1322 cnt -= optlen; 1323 1324 if (opt == IPOPT_EOL) 1325 break; 1326 } 1327 /* pad options to next word, since p was zeroed just adjust off */ 1328 off = (off + sizeof(int32_t) - 1) & ~(sizeof(int32_t) - 1); 1329 n->m_len += off; 1330 if (n->m_len > sizeof(*p)) { 1331 bad: 1332 m_freem(n); 1333 return (EINVAL); 1334 } 1335 1336 *pcbopt = n; 1337 return (0); 1338 } 1339 1340 /* 1341 * Lookup the interface based on the information in the ip_mreqn struct. 1342 */ 1343 int 1344 ip_multicast_if(struct ip_mreqn *mreq, u_int rtableid, unsigned int *ifidx) 1345 { 1346 struct sockaddr_in sin; 1347 struct rtentry *rt; 1348 1349 /* 1350 * In case userland provides the imr_ifindex use this as interface. 1351 * If no interface address was provided, use the interface of 1352 * the route to the given multicast address. 1353 */ 1354 if (mreq->imr_ifindex != 0) { 1355 *ifidx = mreq->imr_ifindex; 1356 } else if (mreq->imr_address.s_addr == INADDR_ANY) { 1357 memset(&sin, 0, sizeof(sin)); 1358 sin.sin_len = sizeof(sin); 1359 sin.sin_family = AF_INET; 1360 sin.sin_addr = mreq->imr_multiaddr; 1361 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 1362 if (!rtisvalid(rt)) { 1363 rtfree(rt); 1364 return EADDRNOTAVAIL; 1365 } 1366 *ifidx = rt->rt_ifidx; 1367 rtfree(rt); 1368 } else { 1369 memset(&sin, 0, sizeof(sin)); 1370 sin.sin_len = sizeof(sin); 1371 sin.sin_family = AF_INET; 1372 sin.sin_addr = mreq->imr_address; 1373 rt = rtalloc(sintosa(&sin), 0, rtableid); 1374 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) { 1375 rtfree(rt); 1376 return EADDRNOTAVAIL; 1377 } 1378 *ifidx = rt->rt_ifidx; 1379 rtfree(rt); 1380 } 1381 1382 return 0; 1383 } 1384 1385 /* 1386 * Set the IP multicast options in response to user setsockopt(). 1387 */ 1388 int 1389 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m, 1390 u_int rtableid) 1391 { 1392 struct in_addr addr; 1393 struct in_ifaddr *ia; 1394 struct ip_mreqn mreqn; 1395 struct ifnet *ifp = NULL; 1396 struct ip_moptions *imo = *imop; 1397 struct in_multi **immp; 1398 struct sockaddr_in sin; 1399 unsigned int ifidx; 1400 int i, error = 0; 1401 u_char loop; 1402 1403 if (imo == NULL) { 1404 /* 1405 * No multicast option buffer attached to the pcb; 1406 * allocate one and initialize to default values. 1407 */ 1408 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK|M_ZERO); 1409 immp = mallocarray(IP_MIN_MEMBERSHIPS, sizeof(*immp), M_IPMOPTS, 1410 M_WAITOK|M_ZERO); 1411 *imop = imo; 1412 imo->imo_ifidx = 0; 1413 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 1414 imo->imo_loop = IP_DEFAULT_MULTICAST_LOOP; 1415 imo->imo_num_memberships = 0; 1416 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1417 imo->imo_membership = immp; 1418 } 1419 1420 switch (optname) { 1421 1422 case IP_MULTICAST_IF: 1423 /* 1424 * Select the interface for outgoing multicast packets. 1425 */ 1426 if (m == NULL) { 1427 error = EINVAL; 1428 break; 1429 } 1430 if (m->m_len == sizeof(struct in_addr)) { 1431 addr = *(mtod(m, struct in_addr *)); 1432 } else if (m->m_len == sizeof(struct ip_mreq) || 1433 m->m_len == sizeof(struct ip_mreqn)) { 1434 memset(&mreqn, 0, sizeof(mreqn)); 1435 memcpy(&mreqn, mtod(m, void *), m->m_len); 1436 1437 /* 1438 * If an interface index is given use this 1439 * index to set the imo_ifidx but check first 1440 * that the interface actually exists. 1441 * In the other case just set the addr to 1442 * the imr_address and fall through to the 1443 * regular code. 1444 */ 1445 if (mreqn.imr_ifindex != 0) { 1446 ifp = if_get(mreqn.imr_ifindex); 1447 if (ifp == NULL || 1448 ifp->if_rdomain != rtable_l2(rtableid)) { 1449 error = EADDRNOTAVAIL; 1450 if_put(ifp); 1451 break; 1452 } 1453 imo->imo_ifidx = ifp->if_index; 1454 if_put(ifp); 1455 break; 1456 } else 1457 addr = mreqn.imr_address; 1458 } else { 1459 error = EINVAL; 1460 break; 1461 } 1462 /* 1463 * INADDR_ANY is used to remove a previous selection. 1464 * When no interface is selected, a default one is 1465 * chosen every time a multicast packet is sent. 1466 */ 1467 if (addr.s_addr == INADDR_ANY) { 1468 imo->imo_ifidx = 0; 1469 break; 1470 } 1471 /* 1472 * The selected interface is identified by its local 1473 * IP address. Find the interface and confirm that 1474 * it supports multicasting. 1475 */ 1476 memset(&sin, 0, sizeof(sin)); 1477 sin.sin_len = sizeof(sin); 1478 sin.sin_family = AF_INET; 1479 sin.sin_addr = addr; 1480 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1481 if (ia == NULL || 1482 (ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) { 1483 error = EADDRNOTAVAIL; 1484 break; 1485 } 1486 imo->imo_ifidx = ia->ia_ifp->if_index; 1487 break; 1488 1489 case IP_MULTICAST_TTL: 1490 /* 1491 * Set the IP time-to-live for outgoing multicast packets. 1492 */ 1493 if (m == NULL || m->m_len != 1) { 1494 error = EINVAL; 1495 break; 1496 } 1497 imo->imo_ttl = *(mtod(m, u_char *)); 1498 break; 1499 1500 case IP_MULTICAST_LOOP: 1501 /* 1502 * Set the loopback flag for outgoing multicast packets. 1503 * Must be zero or one. 1504 */ 1505 if (m == NULL || m->m_len != 1 || 1506 (loop = *(mtod(m, u_char *))) > 1) { 1507 error = EINVAL; 1508 break; 1509 } 1510 imo->imo_loop = loop; 1511 break; 1512 1513 case IP_ADD_MEMBERSHIP: 1514 /* 1515 * Add a multicast group membership. 1516 * Group must be a valid IP multicast address. 1517 */ 1518 if (m == NULL || !(m->m_len == sizeof(struct ip_mreq) || 1519 m->m_len == sizeof(struct ip_mreqn))) { 1520 error = EINVAL; 1521 break; 1522 } 1523 memset(&mreqn, 0, sizeof(mreqn)); 1524 memcpy(&mreqn, mtod(m, void *), m->m_len); 1525 if (!IN_MULTICAST(mreqn.imr_multiaddr.s_addr)) { 1526 error = EINVAL; 1527 break; 1528 } 1529 1530 error = ip_multicast_if(&mreqn, rtableid, &ifidx); 1531 if (error) 1532 break; 1533 1534 /* 1535 * See if we found an interface, and confirm that it 1536 * supports multicast. 1537 */ 1538 ifp = if_get(ifidx); 1539 if (ifp == NULL || ifp->if_rdomain != rtable_l2(rtableid) || 1540 (ifp->if_flags & IFF_MULTICAST) == 0) { 1541 error = EADDRNOTAVAIL; 1542 if_put(ifp); 1543 break; 1544 } 1545 1546 /* 1547 * See if the membership already exists or if all the 1548 * membership slots are full. 1549 */ 1550 for (i = 0; i < imo->imo_num_memberships; ++i) { 1551 if (imo->imo_membership[i]->inm_ifidx == ifidx && 1552 imo->imo_membership[i]->inm_addr.s_addr 1553 == mreqn.imr_multiaddr.s_addr) 1554 break; 1555 } 1556 if (i < imo->imo_num_memberships) { 1557 error = EADDRINUSE; 1558 if_put(ifp); 1559 break; 1560 } 1561 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1562 struct in_multi **nmships, **omships; 1563 size_t newmax; 1564 /* 1565 * Resize the vector to next power-of-two minus 1. If 1566 * the size would exceed the maximum then we know we've 1567 * really run out of entries. Otherwise, we reallocate 1568 * the vector. 1569 */ 1570 nmships = NULL; 1571 omships = imo->imo_membership; 1572 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1573 if (newmax <= IP_MAX_MEMBERSHIPS) { 1574 nmships = mallocarray(newmax, sizeof(*nmships), 1575 M_IPMOPTS, M_NOWAIT|M_ZERO); 1576 if (nmships != NULL) { 1577 memcpy(nmships, omships, 1578 sizeof(*omships) * 1579 imo->imo_max_memberships); 1580 free(omships, M_IPMOPTS, 1581 sizeof(*omships) * 1582 imo->imo_max_memberships); 1583 imo->imo_membership = nmships; 1584 imo->imo_max_memberships = newmax; 1585 } 1586 } 1587 if (nmships == NULL) { 1588 error = ENOBUFS; 1589 if_put(ifp); 1590 break; 1591 } 1592 } 1593 /* 1594 * Everything looks good; add a new record to the multicast 1595 * address list for the given interface. 1596 */ 1597 if ((imo->imo_membership[i] = 1598 in_addmulti(&mreqn.imr_multiaddr, ifp)) == NULL) { 1599 error = ENOBUFS; 1600 if_put(ifp); 1601 break; 1602 } 1603 ++imo->imo_num_memberships; 1604 if_put(ifp); 1605 break; 1606 1607 case IP_DROP_MEMBERSHIP: 1608 /* 1609 * Drop a multicast group membership. 1610 * Group must be a valid IP multicast address. 1611 */ 1612 if (m == NULL || !(m->m_len == sizeof(struct ip_mreq) || 1613 m->m_len == sizeof(struct ip_mreqn))) { 1614 error = EINVAL; 1615 break; 1616 } 1617 memset(&mreqn, 0, sizeof(mreqn)); 1618 memcpy(&mreqn, mtod(m, void *), m->m_len); 1619 if (!IN_MULTICAST(mreqn.imr_multiaddr.s_addr)) { 1620 error = EINVAL; 1621 break; 1622 } 1623 1624 /* 1625 * If an interface address was specified, get a pointer 1626 * to its ifnet structure. 1627 */ 1628 error = ip_multicast_if(&mreqn, rtableid, &ifidx); 1629 if (error) 1630 break; 1631 1632 /* 1633 * Find the membership in the membership array. 1634 */ 1635 for (i = 0; i < imo->imo_num_memberships; ++i) { 1636 if ((ifidx == 0 || 1637 imo->imo_membership[i]->inm_ifidx == ifidx) && 1638 imo->imo_membership[i]->inm_addr.s_addr == 1639 mreqn.imr_multiaddr.s_addr) 1640 break; 1641 } 1642 if (i == imo->imo_num_memberships) { 1643 error = EADDRNOTAVAIL; 1644 break; 1645 } 1646 /* 1647 * Give up the multicast address record to which the 1648 * membership points. 1649 */ 1650 in_delmulti(imo->imo_membership[i]); 1651 /* 1652 * Remove the gap in the membership array. 1653 */ 1654 for (++i; i < imo->imo_num_memberships; ++i) 1655 imo->imo_membership[i-1] = imo->imo_membership[i]; 1656 --imo->imo_num_memberships; 1657 break; 1658 1659 default: 1660 error = EOPNOTSUPP; 1661 break; 1662 } 1663 1664 /* 1665 * If all options have default values, no need to keep the data. 1666 */ 1667 if (imo->imo_ifidx == 0 && 1668 imo->imo_ttl == IP_DEFAULT_MULTICAST_TTL && 1669 imo->imo_loop == IP_DEFAULT_MULTICAST_LOOP && 1670 imo->imo_num_memberships == 0) { 1671 free(imo->imo_membership , M_IPMOPTS, 1672 imo->imo_max_memberships * sizeof(struct in_multi *)); 1673 free(*imop, M_IPMOPTS, sizeof(**imop)); 1674 *imop = NULL; 1675 } 1676 1677 return (error); 1678 } 1679 1680 /* 1681 * Return the IP multicast options in response to user getsockopt(). 1682 */ 1683 int 1684 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf *m) 1685 { 1686 u_char *ttl; 1687 u_char *loop; 1688 struct in_addr *addr; 1689 struct in_ifaddr *ia; 1690 struct ifnet *ifp; 1691 1692 switch (optname) { 1693 1694 case IP_MULTICAST_IF: 1695 addr = mtod(m, struct in_addr *); 1696 m->m_len = sizeof(struct in_addr); 1697 if (imo == NULL || (ifp = if_get(imo->imo_ifidx)) == NULL) 1698 addr->s_addr = INADDR_ANY; 1699 else { 1700 IFP_TO_IA(ifp, ia); 1701 if_put(ifp); 1702 addr->s_addr = (ia == NULL) ? INADDR_ANY 1703 : ia->ia_addr.sin_addr.s_addr; 1704 } 1705 return (0); 1706 1707 case IP_MULTICAST_TTL: 1708 ttl = mtod(m, u_char *); 1709 m->m_len = 1; 1710 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1711 : imo->imo_ttl; 1712 return (0); 1713 1714 case IP_MULTICAST_LOOP: 1715 loop = mtod(m, u_char *); 1716 m->m_len = 1; 1717 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1718 : imo->imo_loop; 1719 return (0); 1720 1721 default: 1722 return (EOPNOTSUPP); 1723 } 1724 } 1725 1726 /* 1727 * Discard the IP multicast options. 1728 */ 1729 void 1730 ip_freemoptions(struct ip_moptions *imo) 1731 { 1732 int i; 1733 1734 if (imo != NULL) { 1735 for (i = 0; i < imo->imo_num_memberships; ++i) 1736 in_delmulti(imo->imo_membership[i]); 1737 free(imo->imo_membership, M_IPMOPTS, 1738 imo->imo_max_memberships * sizeof(struct in_multi *)); 1739 free(imo, M_IPMOPTS, sizeof(*imo)); 1740 } 1741 } 1742 1743 /* 1744 * Routine called from ip_output() to loop back a copy of an IP multicast 1745 * packet to the input queue of a specified interface. 1746 */ 1747 void 1748 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst) 1749 { 1750 struct ip *ip; 1751 struct mbuf *copym; 1752 1753 copym = m_dup_pkt(m, max_linkhdr, M_DONTWAIT); 1754 if (copym != NULL) { 1755 /* 1756 * We don't bother to fragment if the IP length is greater 1757 * than the interface's MTU. Can this possibly matter? 1758 */ 1759 ip = mtod(copym, struct ip *); 1760 ip->ip_sum = 0; 1761 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1762 if_input_local(ifp, copym, dst->sin_family); 1763 } 1764 } 1765 1766 /* 1767 * Compute significant parts of the IPv4 checksum pseudo-header 1768 * for use in a delayed TCP/UDP checksum calculation. 1769 */ 1770 static __inline u_int16_t __attribute__((__unused__)) 1771 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto) 1772 { 1773 u_int32_t sum; 1774 1775 sum = lenproto + 1776 (u_int16_t)(src >> 16) + 1777 (u_int16_t)(src /*& 0xffff*/) + 1778 (u_int16_t)(dst >> 16) + 1779 (u_int16_t)(dst /*& 0xffff*/); 1780 1781 sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); 1782 1783 if (sum > 0xffff) 1784 sum -= 0xffff; 1785 1786 return (sum); 1787 } 1788 1789 /* 1790 * Process a delayed payload checksum calculation. 1791 */ 1792 void 1793 in_delayed_cksum(struct mbuf *m) 1794 { 1795 struct ip *ip; 1796 u_int16_t csum, offset; 1797 1798 ip = mtod(m, struct ip *); 1799 offset = ip->ip_hl << 2; 1800 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1801 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1802 csum = 0xffff; 1803 1804 switch (ip->ip_p) { 1805 case IPPROTO_TCP: 1806 offset += offsetof(struct tcphdr, th_sum); 1807 break; 1808 1809 case IPPROTO_UDP: 1810 offset += offsetof(struct udphdr, uh_sum); 1811 break; 1812 1813 case IPPROTO_ICMP: 1814 offset += offsetof(struct icmp, icmp_cksum); 1815 break; 1816 1817 default: 1818 return; 1819 } 1820 1821 if ((offset + sizeof(u_int16_t)) > m->m_len) 1822 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1823 else 1824 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1825 } 1826 1827 void 1828 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) 1829 { 1830 struct ip *ip = mtod(m, struct ip *); 1831 1832 /* some hw and in_delayed_cksum need the pseudo header cksum */ 1833 if (m->m_pkthdr.csum_flags & 1834 (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) { 1835 u_int16_t csum = 0, offset; 1836 1837 offset = ip->ip_hl << 2; 1838 if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) 1839 csum = in_cksum_phdr(ip->ip_src.s_addr, 1840 ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - 1841 offset + ip->ip_p)); 1842 if (ip->ip_p == IPPROTO_TCP) 1843 offset += offsetof(struct tcphdr, th_sum); 1844 else if (ip->ip_p == IPPROTO_UDP) 1845 offset += offsetof(struct udphdr, uh_sum); 1846 else if (ip->ip_p == IPPROTO_ICMP) 1847 offset += offsetof(struct icmp, icmp_cksum); 1848 if ((offset + sizeof(u_int16_t)) > m->m_len) 1849 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1850 else 1851 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1852 } 1853 1854 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { 1855 if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_TCPv4) || 1856 ip->ip_hl != 5) { 1857 tcpstat_inc(tcps_outswcsum); 1858 in_delayed_cksum(m); 1859 m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ 1860 } 1861 } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { 1862 if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_UDPv4) || 1863 ip->ip_hl != 5) { 1864 udpstat_inc(udps_outswcsum); 1865 in_delayed_cksum(m); 1866 m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */ 1867 } 1868 } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) { 1869 in_delayed_cksum(m); 1870 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */ 1871 } 1872 } 1873 1874 int 1875 in_ifcap_cksum(struct mbuf *m, struct ifnet *ifp, int ifcap) 1876 { 1877 if ((ifp == NULL) || 1878 !ISSET(ifp->if_capabilities, ifcap) || 1879 (ifp->if_bridgeidx != 0)) 1880 return (0); 1881 /* 1882 * Simplex interface sends packet back without hardware cksum. 1883 * Keep this check in sync with the condition where ether_resolve() 1884 * calls if_input_local(). 1885 */ 1886 if (ISSET(m->m_flags, M_BCAST) && 1887 ISSET(ifp->if_flags, IFF_SIMPLEX) && 1888 !m->m_pkthdr.pf.routed) 1889 return (0); 1890 return (1); 1891 } 1892