1 /* $OpenBSD: ip_output.c,v 1.381 2022/05/25 19:48:46 mvs Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 33 */ 34 35 #include "pf.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_enc.h> 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/ip.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_icmp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/udp.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/udp_var.h> 62 63 #if NPF > 0 64 #include <net/pfvar.h> 65 #endif 66 67 #ifdef IPSEC 68 #ifdef ENCDEBUG 69 #define DPRINTF(fmt, args...) \ 70 do { \ 71 if (encdebug) \ 72 printf("%s: " fmt "\n", __func__, ## args); \ 73 } while (0) 74 #else 75 #define DPRINTF(fmt, args...) \ 76 do { } while (0) 77 #endif 78 #endif /* IPSEC */ 79 80 int ip_pcbopts(struct mbuf **, struct mbuf *); 81 int ip_multicast_if(struct ip_mreqn *, u_int, unsigned int *); 82 int ip_setmoptions(int, struct ip_moptions **, struct mbuf *, u_int); 83 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *); 84 static __inline u_int16_t __attribute__((__unused__)) 85 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t); 86 void in_delayed_cksum(struct mbuf *); 87 int in_ifcap_cksum(struct mbuf *, struct ifnet *, int); 88 89 int ip_output_ipsec_lookup(struct mbuf *m, int hlen, struct inpcb *inp, 90 struct tdb **, int ipsecflowinfo); 91 void ip_output_ipsec_pmtu_update(struct tdb *, struct route *, struct in_addr, 92 int, int); 93 int ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int); 94 95 /* 96 * IP output. The packet in mbuf chain m contains a skeletal IP 97 * header (with len, off, ttl, proto, tos, src, dst). 98 * The mbuf chain containing the packet will be freed. 99 * The mbuf opt, if present, will not be freed. 100 */ 101 int 102 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, 103 struct ip_moptions *imo, struct inpcb *inp, u_int32_t ipsecflowinfo) 104 { 105 struct ip *ip; 106 struct ifnet *ifp = NULL; 107 struct mbuf_list fml; 108 int hlen = sizeof (struct ip); 109 int error = 0; 110 struct route iproute; 111 struct sockaddr_in *dst; 112 struct tdb *tdb = NULL; 113 u_long mtu; 114 #if NPF > 0 115 u_int orig_rtableid; 116 #endif 117 118 NET_ASSERT_LOCKED(); 119 120 #ifdef IPSEC 121 if (inp && (inp->inp_flags & INP_IPV6) != 0) 122 panic("ip_output: IPv6 pcb is passed"); 123 #endif /* IPSEC */ 124 125 #ifdef DIAGNOSTIC 126 if ((m->m_flags & M_PKTHDR) == 0) 127 panic("ip_output no HDR"); 128 #endif 129 if (opt) 130 m = ip_insertoptions(m, opt, &hlen); 131 132 ip = mtod(m, struct ip *); 133 134 /* 135 * Fill in IP header. 136 */ 137 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 138 ip->ip_v = IPVERSION; 139 ip->ip_off &= htons(IP_DF); 140 ip->ip_id = htons(ip_randomid()); 141 ip->ip_hl = hlen >> 2; 142 ipstat_inc(ips_localout); 143 } else { 144 hlen = ip->ip_hl << 2; 145 } 146 147 /* 148 * We should not send traffic to 0/8 say both Stevens and RFCs 149 * 5735 section 3 and 1122 sections 3.2.1.3 and 3.3.6. 150 */ 151 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == 0) { 152 error = ENETUNREACH; 153 goto bad; 154 } 155 156 #if NPF > 0 157 orig_rtableid = m->m_pkthdr.ph_rtableid; 158 reroute: 159 #endif 160 161 /* 162 * Do a route lookup now in case we need the source address to 163 * do an SPD lookup in IPsec; for most packets, the source address 164 * is set at a higher level protocol. ICMPs and other packets 165 * though (e.g., traceroute) have a source address of zeroes. 166 */ 167 if (ro == NULL) { 168 ro = &iproute; 169 memset(ro, 0, sizeof(*ro)); 170 } 171 172 dst = satosin(&ro->ro_dst); 173 174 /* 175 * If there is a cached route, check that it is to the same 176 * destination and is still up. If not, free it and try again. 177 */ 178 if (!rtisvalid(ro->ro_rt) || 179 dst->sin_addr.s_addr != ip->ip_dst.s_addr || 180 ro->ro_tableid != m->m_pkthdr.ph_rtableid) { 181 rtfree(ro->ro_rt); 182 ro->ro_rt = NULL; 183 } 184 185 if (ro->ro_rt == NULL) { 186 dst->sin_family = AF_INET; 187 dst->sin_len = sizeof(*dst); 188 dst->sin_addr = ip->ip_dst; 189 ro->ro_tableid = m->m_pkthdr.ph_rtableid; 190 } 191 192 if ((IN_MULTICAST(ip->ip_dst.s_addr) || 193 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 194 imo != NULL && (ifp = if_get(imo->imo_ifidx)) != NULL) { 195 196 mtu = ifp->if_mtu; 197 if (ip->ip_src.s_addr == INADDR_ANY) { 198 struct in_ifaddr *ia; 199 200 IFP_TO_IA(ifp, ia); 201 if (ia != NULL) 202 ip->ip_src = ia->ia_addr.sin_addr; 203 } 204 } else { 205 struct in_ifaddr *ia; 206 207 if (ro->ro_rt == NULL) 208 ro->ro_rt = rtalloc_mpath(&ro->ro_dst, 209 &ip->ip_src.s_addr, ro->ro_tableid); 210 211 if (ro->ro_rt == NULL) { 212 ipstat_inc(ips_noroute); 213 error = EHOSTUNREACH; 214 goto bad; 215 } 216 217 ia = ifatoia(ro->ro_rt->rt_ifa); 218 if (ISSET(ro->ro_rt->rt_flags, RTF_LOCAL)) 219 ifp = if_get(rtable_loindex(m->m_pkthdr.ph_rtableid)); 220 else 221 ifp = if_get(ro->ro_rt->rt_ifidx); 222 /* 223 * We aren't using rtisvalid() here because the UP/DOWN state 224 * machine is broken with some Ethernet drivers like em(4). 225 * As a result we might try to use an invalid cached route 226 * entry while an interface is being detached. 227 */ 228 if (ifp == NULL) { 229 ipstat_inc(ips_noroute); 230 error = EHOSTUNREACH; 231 goto bad; 232 } 233 if ((mtu = ro->ro_rt->rt_mtu) == 0) 234 mtu = ifp->if_mtu; 235 236 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 237 dst = satosin(ro->ro_rt->rt_gateway); 238 239 /* Set the source IP address */ 240 if (ip->ip_src.s_addr == INADDR_ANY && ia) 241 ip->ip_src = ia->ia_addr.sin_addr; 242 } 243 244 #ifdef IPSEC 245 if (ipsec_in_use || inp != NULL) { 246 /* Do we have any pending SAs to apply ? */ 247 error = ip_output_ipsec_lookup(m, hlen, inp, &tdb, 248 ipsecflowinfo); 249 if (error) { 250 /* Should silently drop packet */ 251 if (error == -EINVAL) 252 error = 0; 253 goto bad; 254 } 255 if (tdb != NULL) { 256 /* 257 * If it needs TCP/UDP hardware-checksumming, do the 258 * computation now. 259 */ 260 in_proto_cksum_out(m, NULL); 261 } 262 } 263 #endif /* IPSEC */ 264 265 if (IN_MULTICAST(ip->ip_dst.s_addr) || 266 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 267 268 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 269 M_BCAST : M_MCAST; 270 271 /* 272 * IP destination address is multicast. Make sure "dst" 273 * still points to the address in "ro". (It may have been 274 * changed to point to a gateway address, above.) 275 */ 276 dst = satosin(&ro->ro_dst); 277 278 /* 279 * See if the caller provided any multicast options 280 */ 281 if (imo != NULL) 282 ip->ip_ttl = imo->imo_ttl; 283 else 284 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 285 286 /* 287 * if we don't know the outgoing ifp yet, we can't generate 288 * output 289 */ 290 if (!ifp) { 291 ipstat_inc(ips_noroute); 292 error = EHOSTUNREACH; 293 goto bad; 294 } 295 296 /* 297 * Confirm that the outgoing interface supports multicast, 298 * but only if the packet actually is going out on that 299 * interface (i.e., no IPsec is applied). 300 */ 301 if ((((m->m_flags & M_MCAST) && 302 (ifp->if_flags & IFF_MULTICAST) == 0) || 303 ((m->m_flags & M_BCAST) && 304 (ifp->if_flags & IFF_BROADCAST) == 0)) && (tdb == NULL)) { 305 ipstat_inc(ips_noroute); 306 error = ENETUNREACH; 307 goto bad; 308 } 309 310 /* 311 * If source address not specified yet, use address 312 * of outgoing interface. 313 */ 314 if (ip->ip_src.s_addr == INADDR_ANY) { 315 struct in_ifaddr *ia; 316 317 IFP_TO_IA(ifp, ia); 318 if (ia != NULL) 319 ip->ip_src = ia->ia_addr.sin_addr; 320 } 321 322 if ((imo == NULL || imo->imo_loop) && 323 in_hasmulti(&ip->ip_dst, ifp)) { 324 /* 325 * If we belong to the destination multicast group 326 * on the outgoing interface, and the caller did not 327 * forbid loopback, loop back a copy. 328 * Can't defer TCP/UDP checksumming, do the 329 * computation now. 330 */ 331 in_proto_cksum_out(m, NULL); 332 ip_mloopback(ifp, m, dst); 333 } 334 #ifdef MROUTING 335 else { 336 /* 337 * If we are acting as a multicast router, perform 338 * multicast forwarding as if the packet had just 339 * arrived on the interface to which we are about 340 * to send. The multicast forwarding function 341 * recursively calls this function, using the 342 * IP_FORWARDING flag to prevent infinite recursion. 343 * 344 * Multicasts that are looped back by ip_mloopback(), 345 * above, will be forwarded by the ip_input() routine, 346 * if necessary. 347 */ 348 if (ipmforwarding && ip_mrouter[ifp->if_rdomain] && 349 (flags & IP_FORWARDING) == 0) { 350 int rv; 351 352 KERNEL_LOCK(); 353 rv = ip_mforward(m, ifp); 354 KERNEL_UNLOCK(); 355 if (rv != 0) 356 goto bad; 357 } 358 } 359 #endif 360 /* 361 * Multicasts with a time-to-live of zero may be looped- 362 * back, above, but must not be transmitted on a network. 363 * Also, multicasts addressed to the loopback interface 364 * are not sent -- the above call to ip_mloopback() will 365 * loop back a copy if this host actually belongs to the 366 * destination group on the loopback interface. 367 */ 368 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) 369 goto bad; 370 371 goto sendit; 372 } 373 374 /* 375 * Look for broadcast address and verify user is allowed to send 376 * such a packet; if the packet is going in an IPsec tunnel, skip 377 * this check. 378 */ 379 if ((tdb == NULL) && ((dst->sin_addr.s_addr == INADDR_BROADCAST) || 380 (ro && ro->ro_rt && ISSET(ro->ro_rt->rt_flags, RTF_BROADCAST)))) { 381 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 382 error = EADDRNOTAVAIL; 383 goto bad; 384 } 385 if ((flags & IP_ALLOWBROADCAST) == 0) { 386 error = EACCES; 387 goto bad; 388 } 389 390 /* Don't allow broadcast messages to be fragmented */ 391 if (ntohs(ip->ip_len) > ifp->if_mtu) { 392 error = EMSGSIZE; 393 goto bad; 394 } 395 m->m_flags |= M_BCAST; 396 } else 397 m->m_flags &= ~M_BCAST; 398 399 sendit: 400 /* 401 * If we're doing Path MTU discovery, we need to set DF unless 402 * the route's MTU is locked. 403 */ 404 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 405 (ro->ro_rt->rt_locks & RTV_MTU) == 0) 406 ip->ip_off |= htons(IP_DF); 407 408 #ifdef IPSEC 409 /* 410 * Check if the packet needs encapsulation. 411 */ 412 if (tdb != NULL) { 413 /* Callee frees mbuf */ 414 error = ip_output_ipsec_send(tdb, m, ro, 415 (flags & IP_FORWARDING) ? 1 : 0); 416 goto done; 417 } 418 #endif /* IPSEC */ 419 420 /* 421 * Packet filter 422 */ 423 #if NPF > 0 424 if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT, 425 ifp, &m) != PF_PASS) { 426 error = EACCES; 427 goto bad; 428 } 429 if (m == NULL) 430 goto done; 431 ip = mtod(m, struct ip *); 432 hlen = ip->ip_hl << 2; 433 if ((m->m_pkthdr.pf.flags & (PF_TAG_REROUTE | PF_TAG_GENERATED)) == 434 (PF_TAG_REROUTE | PF_TAG_GENERATED)) 435 /* already rerun the route lookup, go on */ 436 m->m_pkthdr.pf.flags &= ~(PF_TAG_GENERATED | PF_TAG_REROUTE); 437 else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { 438 /* tag as generated to skip over pf_test on rerun */ 439 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 440 ro = NULL; 441 if_put(ifp); /* drop reference since target changed */ 442 ifp = NULL; 443 goto reroute; 444 } 445 #endif 446 in_proto_cksum_out(m, ifp); 447 448 #ifdef IPSEC 449 if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) && 450 (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) { 451 error = EHOSTUNREACH; 452 goto bad; 453 } 454 #endif 455 456 /* 457 * If small enough for interface, can just send directly. 458 */ 459 if (ntohs(ip->ip_len) <= mtu) { 460 ip->ip_sum = 0; 461 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) 462 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 463 else { 464 ipstat_inc(ips_outswcsum); 465 ip->ip_sum = in_cksum(m, hlen); 466 } 467 468 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 469 goto done; 470 } 471 472 /* 473 * Too large for interface; fragment if possible. 474 * Must be able to put at least 8 bytes per fragment. 475 */ 476 if (ip->ip_off & htons(IP_DF)) { 477 #ifdef IPSEC 478 if (ip_mtudisc) 479 ipsec_adjust_mtu(m, ifp->if_mtu); 480 #endif 481 error = EMSGSIZE; 482 #if NPF > 0 483 /* pf changed routing table, use orig rtable for path MTU */ 484 if (ro->ro_tableid != orig_rtableid) { 485 rtfree(ro->ro_rt); 486 ro->ro_tableid = orig_rtableid; 487 ro->ro_rt = icmp_mtudisc_clone( 488 satosin(&ro->ro_dst)->sin_addr, ro->ro_tableid, 0); 489 } 490 #endif 491 /* 492 * This case can happen if the user changed the MTU 493 * of an interface after enabling IP on it. Because 494 * most netifs don't keep track of routes pointing to 495 * them, there is no way for one to update all its 496 * routes when the MTU is changed. 497 */ 498 if (rtisvalid(ro->ro_rt) && 499 ISSET(ro->ro_rt->rt_flags, RTF_HOST) && 500 !(ro->ro_rt->rt_locks & RTV_MTU) && 501 (ro->ro_rt->rt_mtu > ifp->if_mtu)) { 502 ro->ro_rt->rt_mtu = ifp->if_mtu; 503 } 504 ipstat_inc(ips_cantfrag); 505 goto bad; 506 } 507 508 error = ip_fragment(m, &fml, ifp, mtu); 509 if (error) 510 goto done; 511 512 while ((m = ml_dequeue(&fml)) != NULL) { 513 error = ifp->if_output(ifp, m, sintosa(dst), ro->ro_rt); 514 if (error) 515 break; 516 } 517 if (error) 518 ml_purge(&fml); 519 else 520 ipstat_inc(ips_fragmented); 521 522 done: 523 if (ro == &iproute && ro->ro_rt) 524 rtfree(ro->ro_rt); 525 if_put(ifp); 526 #ifdef IPSEC 527 tdb_unref(tdb); 528 #endif /* IPSEC */ 529 return (error); 530 531 bad: 532 m_freem(m); 533 goto done; 534 } 535 536 #ifdef IPSEC 537 int 538 ip_output_ipsec_lookup(struct mbuf *m, int hlen, struct inpcb *inp, 539 struct tdb **tdbout, int ipsecflowinfo) 540 { 541 struct m_tag *mtag; 542 struct tdb_ident *tdbi; 543 struct tdb *tdb; 544 struct ipsec_ids *ids = NULL; 545 int error; 546 547 /* Do we have any pending SAs to apply ? */ 548 if (ipsecflowinfo) 549 ids = ipsp_ids_lookup(ipsecflowinfo); 550 error = ipsp_spd_lookup(m, AF_INET, hlen, IPSP_DIRECTION_OUT, 551 NULL, inp, &tdb, ids); 552 ipsp_ids_free(ids); 553 if (error || tdb == NULL) { 554 *tdbout = NULL; 555 return error; 556 } 557 /* Loop detection */ 558 for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { 559 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE) 560 continue; 561 tdbi = (struct tdb_ident *)(mtag + 1); 562 if (tdbi->spi == tdb->tdb_spi && 563 tdbi->proto == tdb->tdb_sproto && 564 tdbi->rdomain == tdb->tdb_rdomain && 565 !memcmp(&tdbi->dst, &tdb->tdb_dst, 566 sizeof(union sockaddr_union))) { 567 /* no IPsec needed */ 568 tdb_unref(tdb); 569 *tdbout = NULL; 570 return 0; 571 } 572 } 573 *tdbout = tdb; 574 return 0; 575 } 576 577 void 578 ip_output_ipsec_pmtu_update(struct tdb *tdb, struct route *ro, 579 struct in_addr dst, int rtableid, int transportmode) 580 { 581 struct rtentry *rt = NULL; 582 int rt_mtucloned = 0; 583 584 /* Find a host route to store the mtu in */ 585 if (ro != NULL) 586 rt = ro->ro_rt; 587 /* but don't add a PMTU route for transport mode SAs */ 588 if (transportmode) 589 rt = NULL; 590 else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 591 rt = icmp_mtudisc_clone(dst, rtableid, 1); 592 rt_mtucloned = 1; 593 } 594 DPRINTF("spi %08x mtu %d rt %p cloned %d", 595 ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned); 596 if (rt != NULL) { 597 rt->rt_mtu = tdb->tdb_mtu; 598 if (ro != NULL && ro->ro_rt != NULL) { 599 rtfree(ro->ro_rt); 600 ro->ro_rt = rtalloc(&ro->ro_dst, RT_RESOLVE, rtableid); 601 } 602 if (rt_mtucloned) 603 rtfree(rt); 604 } 605 } 606 607 int 608 ip_output_ipsec_send(struct tdb *tdb, struct mbuf *m, struct route *ro, int fwd) 609 { 610 #if NPF > 0 611 struct ifnet *encif; 612 #endif 613 struct ip *ip; 614 struct in_addr dst; 615 int error, rtableid; 616 617 #if NPF > 0 618 /* 619 * Packet filter 620 */ 621 if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || 622 pf_test(AF_INET, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) { 623 m_freem(m); 624 return EACCES; 625 } 626 if (m == NULL) 627 return 0; 628 /* 629 * PF_TAG_REROUTE handling or not... 630 * Packet is entering IPsec so the routing is 631 * already overruled by the IPsec policy. 632 * Until now the change was not reconsidered. 633 * What's the behaviour? 634 */ 635 in_proto_cksum_out(m, encif); 636 #endif 637 638 /* Check if we are allowed to fragment */ 639 ip = mtod(m, struct ip *); 640 dst = ip->ip_dst; 641 rtableid = m->m_pkthdr.ph_rtableid; 642 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && 643 ntohs(ip->ip_len) > tdb->tdb_mtu && 644 tdb->tdb_mtutimeout > gettime()) { 645 int transportmode; 646 647 transportmode = (tdb->tdb_dst.sa.sa_family == AF_INET) && 648 (tdb->tdb_dst.sin.sin_addr.s_addr == dst.s_addr); 649 ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid, 650 transportmode); 651 ipsec_adjust_mtu(m, tdb->tdb_mtu); 652 m_freem(m); 653 return EMSGSIZE; 654 } 655 /* propagate IP_DF for v4-over-v6 */ 656 if (ip_mtudisc && ip->ip_off & htons(IP_DF)) 657 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 658 659 /* 660 * Clear these -- they'll be set in the recursive invocation 661 * as needed. 662 */ 663 m->m_flags &= ~(M_MCAST | M_BCAST); 664 665 /* Callee frees mbuf */ 666 KERNEL_LOCK(); 667 error = ipsp_process_packet(m, tdb, AF_INET, 0); 668 KERNEL_UNLOCK(); 669 if (error) { 670 ipsecstat_inc(ipsec_odrops); 671 tdbstat_inc(tdb, tdb_odrops); 672 } 673 if (ip_mtudisc && error == EMSGSIZE) 674 ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid, 0); 675 return error; 676 } 677 #endif /* IPSEC */ 678 679 int 680 ip_fragment(struct mbuf *m, struct mbuf_list *fml, struct ifnet *ifp, 681 u_long mtu) 682 { 683 struct ip *ip, *mhip; 684 struct mbuf *m0; 685 int len, hlen, off; 686 int mhlen, firstlen; 687 int error; 688 689 ml_init(fml); 690 ml_enqueue(fml, m); 691 692 ip = mtod(m, struct ip *); 693 hlen = ip->ip_hl << 2; 694 len = (mtu - hlen) &~ 7; 695 if (len < 8) { 696 error = EMSGSIZE; 697 goto bad; 698 } 699 700 /* 701 * If we are doing fragmentation, we can't defer TCP/UDP 702 * checksumming; compute the checksum and clear the flag. 703 */ 704 in_proto_cksum_out(m, NULL); 705 firstlen = len; 706 707 /* 708 * Loop through length of segment after first fragment, 709 * make new header and copy data of each part and link onto chain. 710 */ 711 m0 = m; 712 mhlen = sizeof (struct ip); 713 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 714 MGETHDR(m, M_DONTWAIT, MT_HEADER); 715 if (m == NULL) { 716 error = ENOBUFS; 717 goto bad; 718 } 719 ml_enqueue(fml, m); 720 if ((error = m_dup_pkthdr(m, m0, M_DONTWAIT)) != 0) 721 goto bad; 722 m->m_data += max_linkhdr; 723 mhip = mtod(m, struct ip *); 724 *mhip = *ip; 725 if (hlen > sizeof (struct ip)) { 726 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 727 mhip->ip_hl = mhlen >> 2; 728 } 729 m->m_len = mhlen; 730 mhip->ip_off = ((off - hlen) >> 3) + 731 (ntohs(ip->ip_off) & ~IP_MF); 732 if (ip->ip_off & htons(IP_MF)) 733 mhip->ip_off |= IP_MF; 734 if (off + len >= ntohs(ip->ip_len)) 735 len = ntohs(ip->ip_len) - off; 736 else 737 mhip->ip_off |= IP_MF; 738 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 739 m->m_next = m_copym(m0, off, len, M_NOWAIT); 740 if (m->m_next == NULL) { 741 error = ENOBUFS; 742 goto bad; 743 } 744 m->m_pkthdr.len = mhlen + len; 745 m->m_pkthdr.ph_ifidx = 0; 746 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 747 mhip->ip_sum = 0; 748 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) 749 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 750 else { 751 ipstat_inc(ips_outswcsum); 752 mhip->ip_sum = in_cksum(m, mhlen); 753 } 754 } 755 /* 756 * Update first fragment by trimming what's been copied out 757 * and updating header, then send each fragment (in order). 758 */ 759 m = m0; 760 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 761 m->m_pkthdr.len = hlen + firstlen; 762 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 763 ip->ip_off |= htons(IP_MF); 764 ip->ip_sum = 0; 765 if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4)) 766 m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT; 767 else { 768 ipstat_inc(ips_outswcsum); 769 ip->ip_sum = in_cksum(m, hlen); 770 } 771 772 ipstat_add(ips_ofragments, ml_len(fml)); 773 return (0); 774 775 bad: 776 ipstat_inc(ips_odropped); 777 ml_purge(fml); 778 return (error); 779 } 780 781 /* 782 * Insert IP options into preformed packet. 783 * Adjust IP destination as required for IP source routing, 784 * as indicated by a non-zero in_addr at the start of the options. 785 */ 786 struct mbuf * 787 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 788 { 789 struct ipoption *p = mtod(opt, struct ipoption *); 790 struct mbuf *n; 791 struct ip *ip = mtod(m, struct ip *); 792 unsigned int optlen; 793 794 optlen = opt->m_len - sizeof(p->ipopt_dst); 795 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 796 return (m); /* XXX should fail */ 797 798 /* check if options will fit to IP header */ 799 if ((optlen + sizeof(struct ip)) > (0x0f << 2)) { 800 *phlen = sizeof(struct ip); 801 return (m); 802 } 803 804 if (p->ipopt_dst.s_addr) 805 ip->ip_dst = p->ipopt_dst; 806 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 807 MGETHDR(n, M_DONTWAIT, MT_HEADER); 808 if (n == NULL) 809 return (m); 810 M_MOVE_HDR(n, m); 811 n->m_pkthdr.len += optlen; 812 m->m_len -= sizeof(struct ip); 813 m->m_data += sizeof(struct ip); 814 n->m_next = m; 815 m = n; 816 m->m_len = optlen + sizeof(struct ip); 817 m->m_data += max_linkhdr; 818 memcpy(mtod(m, caddr_t), ip, sizeof(struct ip)); 819 } else { 820 m->m_data -= optlen; 821 m->m_len += optlen; 822 m->m_pkthdr.len += optlen; 823 memmove(mtod(m, caddr_t), (caddr_t)ip, sizeof(struct ip)); 824 } 825 ip = mtod(m, struct ip *); 826 memcpy(ip + 1, p->ipopt_list, optlen); 827 *phlen = sizeof(struct ip) + optlen; 828 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 829 return (m); 830 } 831 832 /* 833 * Copy options from ip to jp, 834 * omitting those not copied during fragmentation. 835 */ 836 int 837 ip_optcopy(struct ip *ip, struct ip *jp) 838 { 839 u_char *cp, *dp; 840 int opt, optlen, cnt; 841 842 cp = (u_char *)(ip + 1); 843 dp = (u_char *)(jp + 1); 844 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 845 for (; cnt > 0; cnt -= optlen, cp += optlen) { 846 opt = cp[0]; 847 if (opt == IPOPT_EOL) 848 break; 849 if (opt == IPOPT_NOP) { 850 /* Preserve for IP mcast tunnel's LSRR alignment. */ 851 *dp++ = IPOPT_NOP; 852 optlen = 1; 853 continue; 854 } 855 #ifdef DIAGNOSTIC 856 if (cnt < IPOPT_OLEN + sizeof(*cp)) 857 panic("malformed IPv4 option passed to ip_optcopy"); 858 #endif 859 optlen = cp[IPOPT_OLEN]; 860 #ifdef DIAGNOSTIC 861 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 862 panic("malformed IPv4 option passed to ip_optcopy"); 863 #endif 864 /* bogus lengths should have been caught by ip_dooptions */ 865 if (optlen > cnt) 866 optlen = cnt; 867 if (IPOPT_COPIED(opt)) { 868 memcpy(dp, cp, optlen); 869 dp += optlen; 870 } 871 } 872 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 873 *dp++ = IPOPT_EOL; 874 return (optlen); 875 } 876 877 /* 878 * IP socket option processing. 879 */ 880 int 881 ip_ctloutput(int op, struct socket *so, int level, int optname, 882 struct mbuf *m) 883 { 884 struct inpcb *inp = sotoinpcb(so); 885 int optval = 0; 886 struct proc *p = curproc; /* XXX */ 887 int error = 0; 888 u_int rtableid, rtid = 0; 889 890 if (level != IPPROTO_IP) 891 return (EINVAL); 892 893 rtableid = p->p_p->ps_rtableid; 894 895 switch (op) { 896 case PRCO_SETOPT: 897 switch (optname) { 898 case IP_OPTIONS: 899 return (ip_pcbopts(&inp->inp_options, m)); 900 901 case IP_TOS: 902 case IP_TTL: 903 case IP_MINTTL: 904 case IP_RECVOPTS: 905 case IP_RECVRETOPTS: 906 case IP_RECVDSTADDR: 907 case IP_RECVIF: 908 case IP_RECVTTL: 909 case IP_RECVDSTPORT: 910 case IP_RECVRTABLE: 911 case IP_IPSECFLOWINFO: 912 if (m == NULL || m->m_len != sizeof(int)) 913 error = EINVAL; 914 else { 915 optval = *mtod(m, int *); 916 switch (optname) { 917 918 case IP_TOS: 919 inp->inp_ip.ip_tos = optval; 920 break; 921 922 case IP_TTL: 923 if (optval > 0 && optval <= MAXTTL) 924 inp->inp_ip.ip_ttl = optval; 925 else if (optval == -1) 926 inp->inp_ip.ip_ttl = ip_defttl; 927 else 928 error = EINVAL; 929 break; 930 931 case IP_MINTTL: 932 if (optval >= 0 && optval <= MAXTTL) 933 inp->inp_ip_minttl = optval; 934 else 935 error = EINVAL; 936 break; 937 #define OPTSET(bit) \ 938 if (optval) \ 939 inp->inp_flags |= bit; \ 940 else \ 941 inp->inp_flags &= ~bit; 942 943 case IP_RECVOPTS: 944 OPTSET(INP_RECVOPTS); 945 break; 946 947 case IP_RECVRETOPTS: 948 OPTSET(INP_RECVRETOPTS); 949 break; 950 951 case IP_RECVDSTADDR: 952 OPTSET(INP_RECVDSTADDR); 953 break; 954 case IP_RECVIF: 955 OPTSET(INP_RECVIF); 956 break; 957 case IP_RECVTTL: 958 OPTSET(INP_RECVTTL); 959 break; 960 case IP_RECVDSTPORT: 961 OPTSET(INP_RECVDSTPORT); 962 break; 963 case IP_RECVRTABLE: 964 OPTSET(INP_RECVRTABLE); 965 break; 966 case IP_IPSECFLOWINFO: 967 OPTSET(INP_IPSECFLOWINFO); 968 break; 969 } 970 } 971 break; 972 #undef OPTSET 973 974 case IP_MULTICAST_IF: 975 case IP_MULTICAST_TTL: 976 case IP_MULTICAST_LOOP: 977 case IP_ADD_MEMBERSHIP: 978 case IP_DROP_MEMBERSHIP: 979 error = ip_setmoptions(optname, &inp->inp_moptions, m, 980 inp->inp_rtableid); 981 break; 982 983 case IP_PORTRANGE: 984 if (m == NULL || m->m_len != sizeof(int)) 985 error = EINVAL; 986 else { 987 optval = *mtod(m, int *); 988 989 switch (optval) { 990 991 case IP_PORTRANGE_DEFAULT: 992 inp->inp_flags &= ~(INP_LOWPORT); 993 inp->inp_flags &= ~(INP_HIGHPORT); 994 break; 995 996 case IP_PORTRANGE_HIGH: 997 inp->inp_flags &= ~(INP_LOWPORT); 998 inp->inp_flags |= INP_HIGHPORT; 999 break; 1000 1001 case IP_PORTRANGE_LOW: 1002 inp->inp_flags &= ~(INP_HIGHPORT); 1003 inp->inp_flags |= INP_LOWPORT; 1004 break; 1005 1006 default: 1007 1008 error = EINVAL; 1009 break; 1010 } 1011 } 1012 break; 1013 case IP_AUTH_LEVEL: 1014 case IP_ESP_TRANS_LEVEL: 1015 case IP_ESP_NETWORK_LEVEL: 1016 case IP_IPCOMP_LEVEL: 1017 #ifndef IPSEC 1018 error = EOPNOTSUPP; 1019 #else 1020 if (m == NULL || m->m_len != sizeof(int)) { 1021 error = EINVAL; 1022 break; 1023 } 1024 optval = *mtod(m, int *); 1025 1026 if (optval < IPSEC_LEVEL_BYPASS || 1027 optval > IPSEC_LEVEL_UNIQUE) { 1028 error = EINVAL; 1029 break; 1030 } 1031 1032 switch (optname) { 1033 case IP_AUTH_LEVEL: 1034 if (optval < IPSEC_AUTH_LEVEL_DEFAULT && 1035 suser(p)) { 1036 error = EACCES; 1037 break; 1038 } 1039 inp->inp_seclevel[SL_AUTH] = optval; 1040 break; 1041 1042 case IP_ESP_TRANS_LEVEL: 1043 if (optval < IPSEC_ESP_TRANS_LEVEL_DEFAULT && 1044 suser(p)) { 1045 error = EACCES; 1046 break; 1047 } 1048 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1049 break; 1050 1051 case IP_ESP_NETWORK_LEVEL: 1052 if (optval < IPSEC_ESP_NETWORK_LEVEL_DEFAULT && 1053 suser(p)) { 1054 error = EACCES; 1055 break; 1056 } 1057 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1058 break; 1059 case IP_IPCOMP_LEVEL: 1060 if (optval < IPSEC_IPCOMP_LEVEL_DEFAULT && 1061 suser(p)) { 1062 error = EACCES; 1063 break; 1064 } 1065 inp->inp_seclevel[SL_IPCOMP] = optval; 1066 break; 1067 } 1068 #endif 1069 break; 1070 1071 case IP_IPSEC_LOCAL_ID: 1072 case IP_IPSEC_REMOTE_ID: 1073 error = EOPNOTSUPP; 1074 break; 1075 case SO_RTABLE: 1076 if (m == NULL || m->m_len < sizeof(u_int)) { 1077 error = EINVAL; 1078 break; 1079 } 1080 rtid = *mtod(m, u_int *); 1081 if (inp->inp_rtableid == rtid) 1082 break; 1083 /* needs privileges to switch when already set */ 1084 if (rtableid != rtid && rtableid != 0 && 1085 (error = suser(p)) != 0) 1086 break; 1087 /* table must exist */ 1088 if (!rtable_exists(rtid)) { 1089 error = EINVAL; 1090 break; 1091 } 1092 if (inp->inp_lport) { 1093 error = EBUSY; 1094 break; 1095 } 1096 inp->inp_rtableid = rtid; 1097 in_pcbrehash(inp); 1098 break; 1099 case IP_PIPEX: 1100 if (m != NULL && m->m_len == sizeof(int)) 1101 inp->inp_pipex = *mtod(m, int *); 1102 else 1103 error = EINVAL; 1104 break; 1105 1106 default: 1107 error = ENOPROTOOPT; 1108 break; 1109 } 1110 break; 1111 1112 case PRCO_GETOPT: 1113 switch (optname) { 1114 case IP_OPTIONS: 1115 case IP_RETOPTS: 1116 if (inp->inp_options) { 1117 m->m_len = inp->inp_options->m_len; 1118 memcpy(mtod(m, caddr_t), 1119 mtod(inp->inp_options, caddr_t), m->m_len); 1120 } else 1121 m->m_len = 0; 1122 break; 1123 1124 case IP_TOS: 1125 case IP_TTL: 1126 case IP_MINTTL: 1127 case IP_RECVOPTS: 1128 case IP_RECVRETOPTS: 1129 case IP_RECVDSTADDR: 1130 case IP_RECVIF: 1131 case IP_RECVTTL: 1132 case IP_RECVDSTPORT: 1133 case IP_RECVRTABLE: 1134 case IP_IPSECFLOWINFO: 1135 case IP_IPDEFTTL: 1136 m->m_len = sizeof(int); 1137 switch (optname) { 1138 1139 case IP_TOS: 1140 optval = inp->inp_ip.ip_tos; 1141 break; 1142 1143 case IP_TTL: 1144 optval = inp->inp_ip.ip_ttl; 1145 break; 1146 1147 case IP_MINTTL: 1148 optval = inp->inp_ip_minttl; 1149 break; 1150 1151 case IP_IPDEFTTL: 1152 optval = ip_defttl; 1153 break; 1154 1155 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1156 1157 case IP_RECVOPTS: 1158 optval = OPTBIT(INP_RECVOPTS); 1159 break; 1160 1161 case IP_RECVRETOPTS: 1162 optval = OPTBIT(INP_RECVRETOPTS); 1163 break; 1164 1165 case IP_RECVDSTADDR: 1166 optval = OPTBIT(INP_RECVDSTADDR); 1167 break; 1168 case IP_RECVIF: 1169 optval = OPTBIT(INP_RECVIF); 1170 break; 1171 case IP_RECVTTL: 1172 optval = OPTBIT(INP_RECVTTL); 1173 break; 1174 case IP_RECVDSTPORT: 1175 optval = OPTBIT(INP_RECVDSTPORT); 1176 break; 1177 case IP_RECVRTABLE: 1178 optval = OPTBIT(INP_RECVRTABLE); 1179 break; 1180 case IP_IPSECFLOWINFO: 1181 optval = OPTBIT(INP_IPSECFLOWINFO); 1182 break; 1183 } 1184 *mtod(m, int *) = optval; 1185 break; 1186 1187 case IP_MULTICAST_IF: 1188 case IP_MULTICAST_TTL: 1189 case IP_MULTICAST_LOOP: 1190 case IP_ADD_MEMBERSHIP: 1191 case IP_DROP_MEMBERSHIP: 1192 error = ip_getmoptions(optname, inp->inp_moptions, m); 1193 break; 1194 1195 case IP_PORTRANGE: 1196 m->m_len = sizeof(int); 1197 1198 if (inp->inp_flags & INP_HIGHPORT) 1199 optval = IP_PORTRANGE_HIGH; 1200 else if (inp->inp_flags & INP_LOWPORT) 1201 optval = IP_PORTRANGE_LOW; 1202 else 1203 optval = 0; 1204 1205 *mtod(m, int *) = optval; 1206 break; 1207 1208 case IP_AUTH_LEVEL: 1209 case IP_ESP_TRANS_LEVEL: 1210 case IP_ESP_NETWORK_LEVEL: 1211 case IP_IPCOMP_LEVEL: 1212 #ifndef IPSEC 1213 m->m_len = sizeof(int); 1214 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1215 #else 1216 m->m_len = sizeof(int); 1217 switch (optname) { 1218 case IP_AUTH_LEVEL: 1219 optval = inp->inp_seclevel[SL_AUTH]; 1220 break; 1221 1222 case IP_ESP_TRANS_LEVEL: 1223 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1224 break; 1225 1226 case IP_ESP_NETWORK_LEVEL: 1227 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1228 break; 1229 case IP_IPCOMP_LEVEL: 1230 optval = inp->inp_seclevel[SL_IPCOMP]; 1231 break; 1232 } 1233 *mtod(m, int *) = optval; 1234 #endif 1235 break; 1236 case IP_IPSEC_LOCAL_ID: 1237 case IP_IPSEC_REMOTE_ID: 1238 error = EOPNOTSUPP; 1239 break; 1240 case SO_RTABLE: 1241 m->m_len = sizeof(u_int); 1242 *mtod(m, u_int *) = inp->inp_rtableid; 1243 break; 1244 case IP_PIPEX: 1245 m->m_len = sizeof(int); 1246 *mtod(m, int *) = inp->inp_pipex; 1247 break; 1248 default: 1249 error = ENOPROTOOPT; 1250 break; 1251 } 1252 break; 1253 } 1254 return (error); 1255 } 1256 1257 /* 1258 * Set up IP options in pcb for insertion in output packets. 1259 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1260 * with destination address if source routed. 1261 */ 1262 int 1263 ip_pcbopts(struct mbuf **pcbopt, struct mbuf *m) 1264 { 1265 struct mbuf *n; 1266 struct ipoption *p; 1267 int cnt, off, optlen; 1268 u_char *cp; 1269 u_char opt; 1270 1271 /* turn off any old options */ 1272 m_freem(*pcbopt); 1273 *pcbopt = NULL; 1274 if (m == NULL || m->m_len == 0) { 1275 /* 1276 * Only turning off any previous options. 1277 */ 1278 return (0); 1279 } 1280 1281 if (m->m_len % sizeof(int32_t) || 1282 m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1283 return (EINVAL); 1284 1285 /* Don't sleep because NET_LOCK() is hold. */ 1286 if ((n = m_get(M_NOWAIT, MT_SOOPTS)) == NULL) 1287 return (ENOBUFS); 1288 p = mtod(n, struct ipoption *); 1289 memset(p, 0, sizeof (*p)); /* 0 = IPOPT_EOL, needed for padding */ 1290 n->m_len = sizeof(struct in_addr); 1291 1292 off = 0; 1293 cnt = m->m_len; 1294 cp = mtod(m, u_char *); 1295 1296 while (cnt > 0) { 1297 opt = cp[IPOPT_OPTVAL]; 1298 1299 if (opt == IPOPT_NOP || opt == IPOPT_EOL) { 1300 optlen = 1; 1301 } else { 1302 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1303 goto bad; 1304 optlen = cp[IPOPT_OLEN]; 1305 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1306 goto bad; 1307 } 1308 switch (opt) { 1309 default: 1310 memcpy(p->ipopt_list + off, cp, optlen); 1311 break; 1312 1313 case IPOPT_LSRR: 1314 case IPOPT_SSRR: 1315 /* 1316 * user process specifies route as: 1317 * ->A->B->C->D 1318 * D must be our final destination (but we can't 1319 * check that since we may not have connected yet). 1320 * A is first hop destination, which doesn't appear in 1321 * actual IP option, but is stored before the options. 1322 */ 1323 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1324 goto bad; 1325 1326 /* 1327 * Optlen is smaller because first address is popped. 1328 * Cnt and cp will be adjusted a bit later to reflect 1329 * this. 1330 */ 1331 optlen -= sizeof(struct in_addr); 1332 p->ipopt_list[off + IPOPT_OPTVAL] = opt; 1333 p->ipopt_list[off + IPOPT_OLEN] = optlen; 1334 1335 /* 1336 * Move first hop before start of options. 1337 */ 1338 memcpy(&p->ipopt_dst, cp + IPOPT_OFFSET, 1339 sizeof(struct in_addr)); 1340 cp += sizeof(struct in_addr); 1341 cnt -= sizeof(struct in_addr); 1342 /* 1343 * Then copy rest of options 1344 */ 1345 memcpy(p->ipopt_list + off + IPOPT_OFFSET, 1346 cp + IPOPT_OFFSET, optlen - IPOPT_OFFSET); 1347 break; 1348 } 1349 off += optlen; 1350 cp += optlen; 1351 cnt -= optlen; 1352 1353 if (opt == IPOPT_EOL) 1354 break; 1355 } 1356 /* pad options to next word, since p was zeroed just adjust off */ 1357 off = (off + sizeof(int32_t) - 1) & ~(sizeof(int32_t) - 1); 1358 n->m_len += off; 1359 if (n->m_len > sizeof(*p)) { 1360 bad: 1361 m_freem(n); 1362 return (EINVAL); 1363 } 1364 1365 *pcbopt = n; 1366 return (0); 1367 } 1368 1369 /* 1370 * Lookup the interface based on the information in the ip_mreqn struct. 1371 */ 1372 int 1373 ip_multicast_if(struct ip_mreqn *mreq, u_int rtableid, unsigned int *ifidx) 1374 { 1375 struct sockaddr_in sin; 1376 struct rtentry *rt; 1377 1378 /* 1379 * In case userland provides the imr_ifindex use this as interface. 1380 * If no interface address was provided, use the interface of 1381 * the route to the given multicast address. 1382 */ 1383 if (mreq->imr_ifindex != 0) { 1384 *ifidx = mreq->imr_ifindex; 1385 } else if (mreq->imr_address.s_addr == INADDR_ANY) { 1386 memset(&sin, 0, sizeof(sin)); 1387 sin.sin_len = sizeof(sin); 1388 sin.sin_family = AF_INET; 1389 sin.sin_addr = mreq->imr_multiaddr; 1390 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 1391 if (!rtisvalid(rt)) { 1392 rtfree(rt); 1393 return EADDRNOTAVAIL; 1394 } 1395 *ifidx = rt->rt_ifidx; 1396 rtfree(rt); 1397 } else { 1398 memset(&sin, 0, sizeof(sin)); 1399 sin.sin_len = sizeof(sin); 1400 sin.sin_family = AF_INET; 1401 sin.sin_addr = mreq->imr_address; 1402 rt = rtalloc(sintosa(&sin), 0, rtableid); 1403 if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) { 1404 rtfree(rt); 1405 return EADDRNOTAVAIL; 1406 } 1407 *ifidx = rt->rt_ifidx; 1408 rtfree(rt); 1409 } 1410 1411 return 0; 1412 } 1413 1414 /* 1415 * Set the IP multicast options in response to user setsockopt(). 1416 */ 1417 int 1418 ip_setmoptions(int optname, struct ip_moptions **imop, struct mbuf *m, 1419 u_int rtableid) 1420 { 1421 struct in_addr addr; 1422 struct in_ifaddr *ia; 1423 struct ip_mreqn mreqn; 1424 struct ifnet *ifp = NULL; 1425 struct ip_moptions *imo = *imop; 1426 struct in_multi **immp; 1427 struct sockaddr_in sin; 1428 unsigned int ifidx; 1429 int i, error = 0; 1430 u_char loop; 1431 1432 if (imo == NULL) { 1433 /* 1434 * No multicast option buffer attached to the pcb; 1435 * allocate one and initialize to default values. 1436 */ 1437 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK|M_ZERO); 1438 immp = mallocarray(IP_MIN_MEMBERSHIPS, sizeof(*immp), M_IPMOPTS, 1439 M_WAITOK|M_ZERO); 1440 *imop = imo; 1441 imo->imo_ifidx = 0; 1442 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 1443 imo->imo_loop = IP_DEFAULT_MULTICAST_LOOP; 1444 imo->imo_num_memberships = 0; 1445 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1446 imo->imo_membership = immp; 1447 } 1448 1449 switch (optname) { 1450 1451 case IP_MULTICAST_IF: 1452 /* 1453 * Select the interface for outgoing multicast packets. 1454 */ 1455 if (m == NULL) { 1456 error = EINVAL; 1457 break; 1458 } 1459 if (m->m_len == sizeof(struct in_addr)) { 1460 addr = *(mtod(m, struct in_addr *)); 1461 } else if (m->m_len == sizeof(struct ip_mreq) || 1462 m->m_len == sizeof(struct ip_mreqn)) { 1463 memset(&mreqn, 0, sizeof(mreqn)); 1464 memcpy(&mreqn, mtod(m, void *), m->m_len); 1465 1466 /* 1467 * If an interface index is given use this 1468 * index to set the imo_ifidx but check first 1469 * that the interface actually exists. 1470 * In the other case just set the addr to 1471 * the imr_address and fall through to the 1472 * regular code. 1473 */ 1474 if (mreqn.imr_ifindex != 0) { 1475 ifp = if_get(mreqn.imr_ifindex); 1476 if (ifp == NULL || 1477 ifp->if_rdomain != rtable_l2(rtableid)) { 1478 error = EADDRNOTAVAIL; 1479 if_put(ifp); 1480 break; 1481 } 1482 imo->imo_ifidx = ifp->if_index; 1483 if_put(ifp); 1484 break; 1485 } else 1486 addr = mreqn.imr_address; 1487 } else { 1488 error = EINVAL; 1489 break; 1490 } 1491 /* 1492 * INADDR_ANY is used to remove a previous selection. 1493 * When no interface is selected, a default one is 1494 * chosen every time a multicast packet is sent. 1495 */ 1496 if (addr.s_addr == INADDR_ANY) { 1497 imo->imo_ifidx = 0; 1498 break; 1499 } 1500 /* 1501 * The selected interface is identified by its local 1502 * IP address. Find the interface and confirm that 1503 * it supports multicasting. 1504 */ 1505 memset(&sin, 0, sizeof(sin)); 1506 sin.sin_len = sizeof(sin); 1507 sin.sin_family = AF_INET; 1508 sin.sin_addr = addr; 1509 ia = ifatoia(ifa_ifwithaddr(sintosa(&sin), rtableid)); 1510 if (ia == NULL || 1511 (ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) { 1512 error = EADDRNOTAVAIL; 1513 break; 1514 } 1515 imo->imo_ifidx = ia->ia_ifp->if_index; 1516 break; 1517 1518 case IP_MULTICAST_TTL: 1519 /* 1520 * Set the IP time-to-live for outgoing multicast packets. 1521 */ 1522 if (m == NULL || m->m_len != 1) { 1523 error = EINVAL; 1524 break; 1525 } 1526 imo->imo_ttl = *(mtod(m, u_char *)); 1527 break; 1528 1529 case IP_MULTICAST_LOOP: 1530 /* 1531 * Set the loopback flag for outgoing multicast packets. 1532 * Must be zero or one. 1533 */ 1534 if (m == NULL || m->m_len != 1 || 1535 (loop = *(mtod(m, u_char *))) > 1) { 1536 error = EINVAL; 1537 break; 1538 } 1539 imo->imo_loop = loop; 1540 break; 1541 1542 case IP_ADD_MEMBERSHIP: 1543 /* 1544 * Add a multicast group membership. 1545 * Group must be a valid IP multicast address. 1546 */ 1547 if (m == NULL || !(m->m_len == sizeof(struct ip_mreq) || 1548 m->m_len == sizeof(struct ip_mreqn))) { 1549 error = EINVAL; 1550 break; 1551 } 1552 memset(&mreqn, 0, sizeof(mreqn)); 1553 memcpy(&mreqn, mtod(m, void *), m->m_len); 1554 if (!IN_MULTICAST(mreqn.imr_multiaddr.s_addr)) { 1555 error = EINVAL; 1556 break; 1557 } 1558 1559 error = ip_multicast_if(&mreqn, rtableid, &ifidx); 1560 if (error) 1561 break; 1562 1563 /* 1564 * See if we found an interface, and confirm that it 1565 * supports multicast. 1566 */ 1567 ifp = if_get(ifidx); 1568 if (ifp == NULL || ifp->if_rdomain != rtable_l2(rtableid) || 1569 (ifp->if_flags & IFF_MULTICAST) == 0) { 1570 error = EADDRNOTAVAIL; 1571 if_put(ifp); 1572 break; 1573 } 1574 1575 /* 1576 * See if the membership already exists or if all the 1577 * membership slots are full. 1578 */ 1579 for (i = 0; i < imo->imo_num_memberships; ++i) { 1580 if (imo->imo_membership[i]->inm_ifidx == ifidx && 1581 imo->imo_membership[i]->inm_addr.s_addr 1582 == mreqn.imr_multiaddr.s_addr) 1583 break; 1584 } 1585 if (i < imo->imo_num_memberships) { 1586 error = EADDRINUSE; 1587 if_put(ifp); 1588 break; 1589 } 1590 if (imo->imo_num_memberships == imo->imo_max_memberships) { 1591 struct in_multi **nmships, **omships; 1592 size_t newmax; 1593 /* 1594 * Resize the vector to next power-of-two minus 1. If 1595 * the size would exceed the maximum then we know we've 1596 * really run out of entries. Otherwise, we reallocate 1597 * the vector. 1598 */ 1599 nmships = NULL; 1600 omships = imo->imo_membership; 1601 newmax = ((imo->imo_max_memberships + 1) * 2) - 1; 1602 if (newmax <= IP_MAX_MEMBERSHIPS) { 1603 nmships = mallocarray(newmax, sizeof(*nmships), 1604 M_IPMOPTS, M_NOWAIT|M_ZERO); 1605 if (nmships != NULL) { 1606 memcpy(nmships, omships, 1607 sizeof(*omships) * 1608 imo->imo_max_memberships); 1609 free(omships, M_IPMOPTS, 1610 sizeof(*omships) * 1611 imo->imo_max_memberships); 1612 imo->imo_membership = nmships; 1613 imo->imo_max_memberships = newmax; 1614 } 1615 } 1616 if (nmships == NULL) { 1617 error = ENOBUFS; 1618 if_put(ifp); 1619 break; 1620 } 1621 } 1622 /* 1623 * Everything looks good; add a new record to the multicast 1624 * address list for the given interface. 1625 */ 1626 if ((imo->imo_membership[i] = 1627 in_addmulti(&mreqn.imr_multiaddr, ifp)) == NULL) { 1628 error = ENOBUFS; 1629 if_put(ifp); 1630 break; 1631 } 1632 ++imo->imo_num_memberships; 1633 if_put(ifp); 1634 break; 1635 1636 case IP_DROP_MEMBERSHIP: 1637 /* 1638 * Drop a multicast group membership. 1639 * Group must be a valid IP multicast address. 1640 */ 1641 if (m == NULL || !(m->m_len == sizeof(struct ip_mreq) || 1642 m->m_len == sizeof(struct ip_mreqn))) { 1643 error = EINVAL; 1644 break; 1645 } 1646 memset(&mreqn, 0, sizeof(mreqn)); 1647 memcpy(&mreqn, mtod(m, void *), m->m_len); 1648 if (!IN_MULTICAST(mreqn.imr_multiaddr.s_addr)) { 1649 error = EINVAL; 1650 break; 1651 } 1652 1653 /* 1654 * If an interface address was specified, get a pointer 1655 * to its ifnet structure. 1656 */ 1657 error = ip_multicast_if(&mreqn, rtableid, &ifidx); 1658 if (error) 1659 break; 1660 1661 /* 1662 * Find the membership in the membership array. 1663 */ 1664 for (i = 0; i < imo->imo_num_memberships; ++i) { 1665 if ((ifidx == 0 || 1666 imo->imo_membership[i]->inm_ifidx == ifidx) && 1667 imo->imo_membership[i]->inm_addr.s_addr == 1668 mreqn.imr_multiaddr.s_addr) 1669 break; 1670 } 1671 if (i == imo->imo_num_memberships) { 1672 error = EADDRNOTAVAIL; 1673 break; 1674 } 1675 /* 1676 * Give up the multicast address record to which the 1677 * membership points. 1678 */ 1679 in_delmulti(imo->imo_membership[i]); 1680 /* 1681 * Remove the gap in the membership array. 1682 */ 1683 for (++i; i < imo->imo_num_memberships; ++i) 1684 imo->imo_membership[i-1] = imo->imo_membership[i]; 1685 --imo->imo_num_memberships; 1686 break; 1687 1688 default: 1689 error = EOPNOTSUPP; 1690 break; 1691 } 1692 1693 /* 1694 * If all options have default values, no need to keep the data. 1695 */ 1696 if (imo->imo_ifidx == 0 && 1697 imo->imo_ttl == IP_DEFAULT_MULTICAST_TTL && 1698 imo->imo_loop == IP_DEFAULT_MULTICAST_LOOP && 1699 imo->imo_num_memberships == 0) { 1700 free(imo->imo_membership , M_IPMOPTS, 1701 imo->imo_max_memberships * sizeof(struct in_multi *)); 1702 free(*imop, M_IPMOPTS, sizeof(**imop)); 1703 *imop = NULL; 1704 } 1705 1706 return (error); 1707 } 1708 1709 /* 1710 * Return the IP multicast options in response to user getsockopt(). 1711 */ 1712 int 1713 ip_getmoptions(int optname, struct ip_moptions *imo, struct mbuf *m) 1714 { 1715 u_char *ttl; 1716 u_char *loop; 1717 struct in_addr *addr; 1718 struct in_ifaddr *ia; 1719 struct ifnet *ifp; 1720 1721 switch (optname) { 1722 1723 case IP_MULTICAST_IF: 1724 addr = mtod(m, struct in_addr *); 1725 m->m_len = sizeof(struct in_addr); 1726 if (imo == NULL || (ifp = if_get(imo->imo_ifidx)) == NULL) 1727 addr->s_addr = INADDR_ANY; 1728 else { 1729 IFP_TO_IA(ifp, ia); 1730 addr->s_addr = (ia == NULL) ? INADDR_ANY 1731 : ia->ia_addr.sin_addr.s_addr; 1732 if_put(ifp); 1733 } 1734 return (0); 1735 1736 case IP_MULTICAST_TTL: 1737 ttl = mtod(m, u_char *); 1738 m->m_len = 1; 1739 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1740 : imo->imo_ttl; 1741 return (0); 1742 1743 case IP_MULTICAST_LOOP: 1744 loop = mtod(m, u_char *); 1745 m->m_len = 1; 1746 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1747 : imo->imo_loop; 1748 return (0); 1749 1750 default: 1751 return (EOPNOTSUPP); 1752 } 1753 } 1754 1755 /* 1756 * Discard the IP multicast options. 1757 */ 1758 void 1759 ip_freemoptions(struct ip_moptions *imo) 1760 { 1761 int i; 1762 1763 if (imo != NULL) { 1764 for (i = 0; i < imo->imo_num_memberships; ++i) 1765 in_delmulti(imo->imo_membership[i]); 1766 free(imo->imo_membership, M_IPMOPTS, 1767 imo->imo_max_memberships * sizeof(struct in_multi *)); 1768 free(imo, M_IPMOPTS, sizeof(*imo)); 1769 } 1770 } 1771 1772 /* 1773 * Routine called from ip_output() to loop back a copy of an IP multicast 1774 * packet to the input queue of a specified interface. 1775 */ 1776 void 1777 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst) 1778 { 1779 struct ip *ip; 1780 struct mbuf *copym; 1781 1782 copym = m_dup_pkt(m, max_linkhdr, M_DONTWAIT); 1783 if (copym != NULL) { 1784 /* 1785 * We don't bother to fragment if the IP length is greater 1786 * than the interface's MTU. Can this possibly matter? 1787 */ 1788 ip = mtod(copym, struct ip *); 1789 ip->ip_sum = 0; 1790 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1791 if_input_local(ifp, copym, dst->sin_family); 1792 } 1793 } 1794 1795 /* 1796 * Compute significant parts of the IPv4 checksum pseudo-header 1797 * for use in a delayed TCP/UDP checksum calculation. 1798 */ 1799 static __inline u_int16_t __attribute__((__unused__)) 1800 in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto) 1801 { 1802 u_int32_t sum; 1803 1804 sum = lenproto + 1805 (u_int16_t)(src >> 16) + 1806 (u_int16_t)(src /*& 0xffff*/) + 1807 (u_int16_t)(dst >> 16) + 1808 (u_int16_t)(dst /*& 0xffff*/); 1809 1810 sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); 1811 1812 if (sum > 0xffff) 1813 sum -= 0xffff; 1814 1815 return (sum); 1816 } 1817 1818 /* 1819 * Process a delayed payload checksum calculation. 1820 */ 1821 void 1822 in_delayed_cksum(struct mbuf *m) 1823 { 1824 struct ip *ip; 1825 u_int16_t csum, offset; 1826 1827 ip = mtod(m, struct ip *); 1828 offset = ip->ip_hl << 2; 1829 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1830 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1831 csum = 0xffff; 1832 1833 switch (ip->ip_p) { 1834 case IPPROTO_TCP: 1835 offset += offsetof(struct tcphdr, th_sum); 1836 break; 1837 1838 case IPPROTO_UDP: 1839 offset += offsetof(struct udphdr, uh_sum); 1840 break; 1841 1842 case IPPROTO_ICMP: 1843 offset += offsetof(struct icmp, icmp_cksum); 1844 break; 1845 1846 default: 1847 return; 1848 } 1849 1850 if ((offset + sizeof(u_int16_t)) > m->m_len) 1851 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1852 else 1853 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1854 } 1855 1856 void 1857 in_proto_cksum_out(struct mbuf *m, struct ifnet *ifp) 1858 { 1859 struct ip *ip = mtod(m, struct ip *); 1860 1861 /* some hw and in_delayed_cksum need the pseudo header cksum */ 1862 if (m->m_pkthdr.csum_flags & 1863 (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_ICMP_CSUM_OUT)) { 1864 u_int16_t csum = 0, offset; 1865 1866 offset = ip->ip_hl << 2; 1867 if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) 1868 csum = in_cksum_phdr(ip->ip_src.s_addr, 1869 ip->ip_dst.s_addr, htonl(ntohs(ip->ip_len) - 1870 offset + ip->ip_p)); 1871 if (ip->ip_p == IPPROTO_TCP) 1872 offset += offsetof(struct tcphdr, th_sum); 1873 else if (ip->ip_p == IPPROTO_UDP) 1874 offset += offsetof(struct udphdr, uh_sum); 1875 else if (ip->ip_p == IPPROTO_ICMP) 1876 offset += offsetof(struct icmp, icmp_cksum); 1877 if ((offset + sizeof(u_int16_t)) > m->m_len) 1878 m_copyback(m, offset, sizeof(csum), &csum, M_NOWAIT); 1879 else 1880 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1881 } 1882 1883 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { 1884 if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_TCPv4) || 1885 ip->ip_hl != 5) { 1886 tcpstat_inc(tcps_outswcsum); 1887 in_delayed_cksum(m); 1888 m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ 1889 } 1890 } else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) { 1891 if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_UDPv4) || 1892 ip->ip_hl != 5) { 1893 udpstat_inc(udps_outswcsum); 1894 in_delayed_cksum(m); 1895 m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */ 1896 } 1897 } else if (m->m_pkthdr.csum_flags & M_ICMP_CSUM_OUT) { 1898 in_delayed_cksum(m); 1899 m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */ 1900 } 1901 } 1902 1903 int 1904 in_ifcap_cksum(struct mbuf *m, struct ifnet *ifp, int ifcap) 1905 { 1906 if ((ifp == NULL) || 1907 !ISSET(ifp->if_capabilities, ifcap) || 1908 (ifp->if_bridgeidx != 0)) 1909 return (0); 1910 /* 1911 * Simplex interface sends packet back without hardware cksum. 1912 * Keep this check in sync with the condition where ether_resolve() 1913 * calls if_input_local(). 1914 */ 1915 if (ISSET(m->m_flags, M_BCAST) && 1916 ISSET(ifp->if_flags, IFF_SIMPLEX) && 1917 !m->m_pkthdr.pf.routed) 1918 return (0); 1919 return (1); 1920 } 1921