1 /* $OpenBSD: ip_output.c,v 1.159 2003/11/06 16:57:41 dhartmei Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 33 */ 34 35 #include "pf.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mbuf.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_enc.h> 48 #include <net/route.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_systm.h> 52 #include <netinet/ip.h> 53 #include <netinet/in_pcb.h> 54 #include <netinet/in_var.h> 55 #include <netinet/ip_var.h> 56 #include <netinet/ip_icmp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/udp.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/udp_var.h> 62 63 #if NPF > 0 64 #include <net/pfvar.h> 65 #endif 66 67 #ifdef IPSEC 68 #ifdef ENCDEBUG 69 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 70 #else 71 #define DPRINTF(x) 72 #endif 73 74 extern u_int8_t get_sa_require(struct inpcb *); 75 76 extern int ipsec_auth_default_level; 77 extern int ipsec_esp_trans_default_level; 78 extern int ipsec_esp_network_default_level; 79 extern int ipsec_ipcomp_default_level; 80 #endif /* IPSEC */ 81 82 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 83 static void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *); 84 85 /* 86 * IP output. The packet in mbuf chain m contains a skeletal IP 87 * header (with len, off, ttl, proto, tos, src, dst). 88 * The mbuf chain containing the packet will be freed. 89 * The mbuf opt, if present, will not be freed. 90 */ 91 int 92 ip_output(struct mbuf *m0, ...) 93 { 94 struct ip *ip; 95 struct ifnet *ifp; 96 struct mbuf *m = m0; 97 int hlen = sizeof (struct ip); 98 int len, error = 0; 99 struct route iproute; 100 struct sockaddr_in *dst; 101 struct in_ifaddr *ia; 102 struct mbuf *opt; 103 struct route *ro; 104 int flags; 105 struct ip_moptions *imo; 106 va_list ap; 107 u_int8_t sproto = 0, donerouting = 0; 108 u_long mtu; 109 #ifdef IPSEC 110 u_int32_t icmp_mtu = 0; 111 union sockaddr_union sdst; 112 u_int32_t sspi; 113 struct m_tag *mtag; 114 struct tdb_ident *tdbi; 115 116 struct inpcb *inp; 117 struct tdb *tdb; 118 int s; 119 #endif /* IPSEC */ 120 121 va_start(ap, m0); 122 opt = va_arg(ap, struct mbuf *); 123 ro = va_arg(ap, struct route *); 124 flags = va_arg(ap, int); 125 imo = va_arg(ap, struct ip_moptions *); 126 #ifdef IPSEC 127 inp = va_arg(ap, struct inpcb *); 128 if (inp && (inp->inp_flags & INP_IPV6) != 0) 129 panic("ip_output: IPv6 pcb is passed"); 130 #endif /* IPSEC */ 131 va_end(ap); 132 133 #ifdef DIAGNOSTIC 134 if ((m->m_flags & M_PKTHDR) == 0) 135 panic("ip_output no HDR"); 136 #endif 137 if (opt) { 138 m = ip_insertoptions(m, opt, &len); 139 hlen = len; 140 } 141 142 ip = mtod(m, struct ip *); 143 144 /* 145 * Fill in IP header. 146 */ 147 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 148 ip->ip_v = IPVERSION; 149 ip->ip_off &= htons(IP_DF); 150 ip->ip_id = htons(ip_randomid()); 151 ip->ip_hl = hlen >> 2; 152 ipstat.ips_localout++; 153 } else { 154 hlen = ip->ip_hl << 2; 155 } 156 157 /* 158 * If we're missing the IP source address, do a route lookup. We'll 159 * remember this result, in case we don't need to do any IPsec 160 * processing on the packet. We need the source address so we can 161 * do an SPD lookup in IPsec; for most packets, the source address 162 * is set at a higher level protocol. ICMPs and other packets 163 * though (e.g., traceroute) have a source address of zeroes. 164 */ 165 if (ip->ip_src.s_addr == INADDR_ANY) { 166 donerouting = 1; 167 168 if (ro == 0) { 169 ro = &iproute; 170 bzero((caddr_t)ro, sizeof (*ro)); 171 } 172 173 dst = satosin(&ro->ro_dst); 174 175 /* 176 * If there is a cached route, check that it is to the same 177 * destination and is still up. If not, free it and try again. 178 */ 179 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 180 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 181 RTFREE(ro->ro_rt); 182 ro->ro_rt = (struct rtentry *)0; 183 } 184 185 if (ro->ro_rt == 0) { 186 dst->sin_family = AF_INET; 187 dst->sin_len = sizeof(*dst); 188 dst->sin_addr = ip->ip_dst; 189 } 190 191 /* 192 * If routing to interface only, short-circuit routing lookup. 193 */ 194 if (flags & IP_ROUTETOIF) { 195 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 196 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 197 ipstat.ips_noroute++; 198 error = ENETUNREACH; 199 goto bad; 200 } 201 202 ifp = ia->ia_ifp; 203 mtu = ifp->if_mtu; 204 ip->ip_ttl = 1; 205 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) || 206 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 207 imo != NULL && imo->imo_multicast_ifp != NULL) { 208 ifp = imo->imo_multicast_ifp; 209 mtu = ifp->if_mtu; 210 IFP_TO_IA(ifp, ia); 211 } else { 212 if (ro->ro_rt == 0) 213 rtalloc(ro); 214 215 if (ro->ro_rt == 0) { 216 ipstat.ips_noroute++; 217 error = EHOSTUNREACH; 218 goto bad; 219 } 220 221 ia = ifatoia(ro->ro_rt->rt_ifa); 222 ifp = ro->ro_rt->rt_ifp; 223 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0) 224 mtu = ifp->if_mtu; 225 ro->ro_rt->rt_use++; 226 227 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 228 dst = satosin(ro->ro_rt->rt_gateway); 229 } 230 231 /* Set the source IP address */ 232 if (!IN_MULTICAST(ip->ip_dst.s_addr)) 233 ip->ip_src = ia->ia_addr.sin_addr; 234 } 235 236 #ifdef IPSEC 237 /* 238 * splnet is chosen over spltdb because we are not allowed to 239 * lower the level, and udp_output calls us in splnet(). 240 */ 241 s = splnet(); 242 243 /* Do we have any pending SAs to apply ? */ 244 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 245 if (mtag != NULL) { 246 #ifdef DIAGNOSTIC 247 if (mtag->m_tag_len != sizeof (struct tdb_ident)) 248 panic("ip_output: tag of length %d (should be %d", 249 mtag->m_tag_len, sizeof (struct tdb_ident)); 250 #endif 251 tdbi = (struct tdb_ident *)(mtag + 1); 252 tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto); 253 if (tdb == NULL) 254 error = -EINVAL; 255 m_tag_delete(m, mtag); 256 } 257 else 258 tdb = ipsp_spd_lookup(m, AF_INET, hlen, &error, 259 IPSP_DIRECTION_OUT, NULL, inp); 260 261 if (tdb == NULL) { 262 splx(s); 263 264 if (error == 0) { 265 /* 266 * No IPsec processing required, we'll just send the 267 * packet out. 268 */ 269 sproto = 0; 270 271 /* Fall through to routing/multicast handling */ 272 } else { 273 /* 274 * -EINVAL is used to indicate that the packet should 275 * be silently dropped, typically because we've asked 276 * key management for an SA. 277 */ 278 if (error == -EINVAL) /* Should silently drop packet */ 279 error = 0; 280 281 m_freem(m); 282 goto done; 283 } 284 } else { 285 /* Loop detection */ 286 for (mtag = m_tag_first(m); mtag != NULL; 287 mtag = m_tag_next(m, mtag)) { 288 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 289 mtag->m_tag_id != 290 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 291 continue; 292 tdbi = (struct tdb_ident *)(mtag + 1); 293 if (tdbi->spi == tdb->tdb_spi && 294 tdbi->proto == tdb->tdb_sproto && 295 !bcmp(&tdbi->dst, &tdb->tdb_dst, 296 sizeof(union sockaddr_union))) { 297 splx(s); 298 sproto = 0; /* mark as no-IPsec-needed */ 299 goto done_spd; 300 } 301 } 302 303 /* We need to do IPsec */ 304 bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst)); 305 sspi = tdb->tdb_spi; 306 sproto = tdb->tdb_sproto; 307 splx(s); 308 309 /* 310 * If it needs TCP/UDP hardware-checksumming, do the 311 * computation now. 312 */ 313 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 314 in_delayed_cksum(m); 315 m->m_pkthdr.csum &= 316 ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 317 } 318 319 /* If it's not a multicast packet, try to fast-path */ 320 if (!IN_MULTICAST(ip->ip_dst.s_addr)) { 321 goto sendit; 322 } 323 } 324 325 /* Fall through to the routing/multicast handling code */ 326 done_spd: 327 #endif /* IPSEC */ 328 329 if (donerouting == 0) { 330 if (ro == 0) { 331 ro = &iproute; 332 bzero((caddr_t)ro, sizeof (*ro)); 333 } 334 335 dst = satosin(&ro->ro_dst); 336 337 /* 338 * If there is a cached route, check that it is to the same 339 * destination and is still up. If not, free it and try again. 340 */ 341 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 342 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 343 RTFREE(ro->ro_rt); 344 ro->ro_rt = (struct rtentry *)0; 345 } 346 347 if (ro->ro_rt == 0) { 348 dst->sin_family = AF_INET; 349 dst->sin_len = sizeof(*dst); 350 dst->sin_addr = ip->ip_dst; 351 } 352 353 /* 354 * If routing to interface only, short-circuit routing lookup. 355 */ 356 if (flags & IP_ROUTETOIF) { 357 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 358 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 359 ipstat.ips_noroute++; 360 error = ENETUNREACH; 361 goto bad; 362 } 363 364 ifp = ia->ia_ifp; 365 mtu = ifp->if_mtu; 366 ip->ip_ttl = 1; 367 } else if ((IN_MULTICAST(ip->ip_dst.s_addr) || 368 (ip->ip_dst.s_addr == INADDR_BROADCAST)) && 369 imo != NULL && imo->imo_multicast_ifp != NULL) { 370 ifp = imo->imo_multicast_ifp; 371 mtu = ifp->if_mtu; 372 IFP_TO_IA(ifp, ia); 373 } else { 374 if (ro->ro_rt == 0) 375 rtalloc(ro); 376 377 if (ro->ro_rt == 0) { 378 ipstat.ips_noroute++; 379 error = EHOSTUNREACH; 380 goto bad; 381 } 382 383 ia = ifatoia(ro->ro_rt->rt_ifa); 384 ifp = ro->ro_rt->rt_ifp; 385 if ((mtu = ro->ro_rt->rt_rmx.rmx_mtu) == 0) 386 mtu = ifp->if_mtu; 387 ro->ro_rt->rt_use++; 388 389 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 390 dst = satosin(ro->ro_rt->rt_gateway); 391 } 392 393 /* Set the source IP address */ 394 if (ip->ip_src.s_addr == INADDR_ANY) 395 ip->ip_src = ia->ia_addr.sin_addr; 396 } 397 398 if (IN_MULTICAST(ip->ip_dst.s_addr) || 399 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 400 struct in_multi *inm; 401 402 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 403 M_BCAST : M_MCAST; 404 405 /* 406 * IP destination address is multicast. Make sure "dst" 407 * still points to the address in "ro". (It may have been 408 * changed to point to a gateway address, above.) 409 */ 410 dst = satosin(&ro->ro_dst); 411 412 /* 413 * See if the caller provided any multicast options 414 */ 415 if (imo != NULL) 416 ip->ip_ttl = imo->imo_multicast_ttl; 417 else 418 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 419 420 /* 421 * if we don't know the outgoing ifp yet, we can't generate 422 * output 423 */ 424 if (!ifp) { 425 ipstat.ips_noroute++; 426 error = EHOSTUNREACH; 427 goto bad; 428 } 429 430 /* 431 * Confirm that the outgoing interface supports multicast, 432 * but only if the packet actually is going out on that 433 * interface (i.e., no IPsec is applied). 434 */ 435 if ((((m->m_flags & M_MCAST) && 436 (ifp->if_flags & IFF_MULTICAST) == 0) || 437 ((m->m_flags & M_BCAST) && 438 (ifp->if_flags & IFF_BROADCAST) == 0)) && (sproto == 0)) { 439 ipstat.ips_noroute++; 440 error = ENETUNREACH; 441 goto bad; 442 } 443 444 /* 445 * If source address not specified yet, use address 446 * of outgoing interface. 447 */ 448 if (ip->ip_src.s_addr == INADDR_ANY) { 449 struct in_ifaddr *ia; 450 451 for (ia = in_ifaddr.tqh_first; 452 ia; 453 ia = ia->ia_list.tqe_next) 454 if (ia->ia_ifp == ifp) { 455 ip->ip_src = ia->ia_addr.sin_addr; 456 break; 457 } 458 } 459 460 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 461 if (inm != NULL && 462 (imo == NULL || imo->imo_multicast_loop)) { 463 /* 464 * If we belong to the destination multicast group 465 * on the outgoing interface, and the caller did not 466 * forbid loopback, loop back a copy. 467 * Can't defer TCP/UDP checksumming, do the 468 * computation now. 469 */ 470 if (m->m_pkthdr.csum & 471 (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 472 in_delayed_cksum(m); 473 m->m_pkthdr.csum &= 474 ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 475 } 476 ip_mloopback(ifp, m, dst); 477 } 478 #ifdef MROUTING 479 else { 480 /* 481 * If we are acting as a multicast router, perform 482 * multicast forwarding as if the packet had just 483 * arrived on the interface to which we are about 484 * to send. The multicast forwarding function 485 * recursively calls this function, using the 486 * IP_FORWARDING flag to prevent infinite recursion. 487 * 488 * Multicasts that are looped back by ip_mloopback(), 489 * above, will be forwarded by the ip_input() routine, 490 * if necessary. 491 */ 492 extern struct socket *ip_mrouter; 493 494 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 495 if (ip_mforward(m, ifp) != 0) { 496 m_freem(m); 497 goto done; 498 } 499 } 500 } 501 #endif 502 /* 503 * Multicasts with a time-to-live of zero may be looped- 504 * back, above, but must not be transmitted on a network. 505 * Also, multicasts addressed to the loopback interface 506 * are not sent -- the above call to ip_mloopback() will 507 * loop back a copy if this host actually belongs to the 508 * destination group on the loopback interface. 509 */ 510 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 511 m_freem(m); 512 goto done; 513 } 514 515 goto sendit; 516 } 517 518 /* 519 * Look for broadcast address and and verify user is allowed to send 520 * such a packet; if the packet is going in an IPsec tunnel, skip 521 * this check. 522 */ 523 if ((sproto == 0) && (in_broadcast(dst->sin_addr, ifp))) { 524 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 525 error = EADDRNOTAVAIL; 526 goto bad; 527 } 528 if ((flags & IP_ALLOWBROADCAST) == 0) { 529 error = EACCES; 530 goto bad; 531 } 532 533 /* Don't allow broadcast messages to be fragmented */ 534 if (ntohs(ip->ip_len) > ifp->if_mtu) { 535 error = EMSGSIZE; 536 goto bad; 537 } 538 m->m_flags |= M_BCAST; 539 } else 540 m->m_flags &= ~M_BCAST; 541 542 sendit: 543 /* 544 * If we're doing Path MTU discovery, we need to set DF unless 545 * the route's MTU is locked. 546 */ 547 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 548 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 549 ip->ip_off |= htons(IP_DF); 550 551 #ifdef IPSEC 552 /* 553 * Check if the packet needs encapsulation. 554 */ 555 if (sproto != 0) { 556 s = splnet(); 557 558 /* 559 * Packet filter 560 */ 561 #if NPF > 0 562 563 if (pf_test(PF_OUT, &encif[0].sc_if, &m) != PF_PASS) { 564 error = EHOSTUNREACH; 565 splx(s); 566 m_freem(m); 567 goto done; 568 } 569 if (m == NULL) { 570 splx(s); 571 goto done; 572 } 573 ip = mtod(m, struct ip *); 574 hlen = ip->ip_hl << 2; 575 #endif 576 577 tdb = gettdb(sspi, &sdst, sproto); 578 if (tdb == NULL) { 579 DPRINTF(("ip_output: unknown TDB")); 580 error = EHOSTUNREACH; 581 splx(s); 582 m_freem(m); 583 goto done; 584 } 585 586 /* Check if we are allowed to fragment */ 587 if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu && 588 ntohs(ip->ip_len) > tdb->tdb_mtu && 589 tdb->tdb_mtutimeout > time.tv_sec) { 590 struct rtentry *rt = NULL; 591 592 icmp_mtu = tdb->tdb_mtu; 593 splx(s); 594 595 /* Find a host route to store the mtu in */ 596 if (ro != NULL) 597 rt = ro->ro_rt; 598 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 599 struct sockaddr_in dst = { 600 sizeof(struct sockaddr_in), AF_INET}; 601 dst.sin_addr = ip->ip_dst; 602 rt = icmp_mtudisc_clone((struct sockaddr *)&dst); 603 } 604 if (rt != NULL) { 605 rt->rt_rmx.rmx_mtu = icmp_mtu; 606 if (ro && ro->ro_rt != NULL) { 607 RTFREE(ro->ro_rt); 608 ro->ro_rt = (struct rtentry *) 0; 609 rtalloc(ro); 610 } 611 } 612 error = EMSGSIZE; 613 goto bad; 614 } 615 616 /* 617 * Clear these -- they'll be set in the recursive invocation 618 * as needed. 619 */ 620 m->m_flags &= ~(M_MCAST | M_BCAST); 621 622 /* Callee frees mbuf */ 623 error = ipsp_process_packet(m, tdb, AF_INET, 0); 624 splx(s); 625 return error; /* Nothing more to be done */ 626 } 627 628 /* 629 * If deferred crypto processing is needed, check that the 630 * interface supports it. 631 */ 632 if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) 633 != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) { 634 /* Notify IPsec to do its own crypto. */ 635 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 636 m_freem(m); 637 error = EHOSTUNREACH; 638 goto done; 639 } 640 #endif /* IPSEC */ 641 642 /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ 643 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) { 644 if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 645 ifp->if_bridge != NULL) { 646 in_delayed_cksum(m); 647 m->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */ 648 } 649 } else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) { 650 if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 651 ifp->if_bridge != NULL) { 652 in_delayed_cksum(m); 653 m->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */ 654 } 655 } 656 657 /* 658 * Packet filter 659 */ 660 #if NPF > 0 661 if (pf_test(PF_OUT, ifp, &m) != PF_PASS) { 662 error = EHOSTUNREACH; 663 m_freem(m); 664 goto done; 665 } 666 if (m == NULL) 667 goto done; 668 669 ip = mtod(m, struct ip *); 670 hlen = ip->ip_hl << 2; 671 #endif 672 673 /* 674 * If small enough for interface, can just send directly. 675 */ 676 if (ntohs(ip->ip_len) <= mtu) { 677 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 678 ifp->if_bridge == NULL) { 679 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 680 ipstat.ips_outhwcsum++; 681 } else { 682 ip->ip_sum = 0; 683 ip->ip_sum = in_cksum(m, hlen); 684 } 685 /* Update relevant hardware checksum stats for TCP/UDP */ 686 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) 687 tcpstat.tcps_outhwcsum++; 688 else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) 689 udpstat.udps_outhwcsum++; 690 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt); 691 goto done; 692 } 693 694 /* 695 * Too large for interface; fragment if possible. 696 * Must be able to put at least 8 bytes per fragment. 697 */ 698 if (ip->ip_off & htons(IP_DF)) { 699 #ifdef IPSEC 700 icmp_mtu = ifp->if_mtu; 701 #endif 702 error = EMSGSIZE; 703 /* 704 * This case can happen if the user changed the MTU 705 * of an interface after enabling IP on it. Because 706 * most netifs don't keep track of routes pointing to 707 * them, there is no way for one to update all its 708 * routes when the MTU is changed. 709 */ 710 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && 711 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && 712 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 713 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 714 } 715 ipstat.ips_cantfrag++; 716 goto bad; 717 } 718 719 error = ip_fragment(m, ifp, mtu); 720 if (error) 721 goto bad; 722 723 for (; m; m = m0) { 724 m0 = m->m_nextpkt; 725 m->m_nextpkt = 0; 726 if (error == 0) 727 error = (*ifp->if_output)(ifp, m, sintosa(dst), 728 ro->ro_rt); 729 else 730 m_freem(m); 731 } 732 733 if (error == 0) 734 ipstat.ips_fragmented++; 735 736 done: 737 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) 738 RTFREE(ro->ro_rt); 739 return (error); 740 bad: 741 #ifdef IPSEC 742 if (error == EMSGSIZE && ip_mtudisc && icmp_mtu != 0) 743 ipsec_adjust_mtu(m, icmp_mtu); 744 #endif 745 m_freem(m0); 746 goto done; 747 } 748 749 int 750 ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu) 751 { 752 struct ip *ip, *mhip; 753 struct mbuf *m0; 754 int len, hlen, off; 755 int mhlen, firstlen; 756 struct mbuf **mnext; 757 758 ip = mtod(m, struct ip *); 759 hlen = ip->ip_hl << 2; 760 761 len = (mtu - hlen) &~ 7; 762 if (len < 8) 763 return (EMSGSIZE); 764 765 /* 766 * If we are doing fragmentation, we can't defer TCP/UDP 767 * checksumming; compute the checksum and clear the flag. 768 */ 769 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 770 in_delayed_cksum(m); 771 m->m_pkthdr.csum &= ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 772 } 773 774 firstlen = len; 775 mnext = &m->m_nextpkt; 776 777 /* 778 * Loop through length of segment after first fragment, 779 * make new header and copy data of each part and link onto chain. 780 */ 781 m0 = m; 782 mhlen = sizeof (struct ip); 783 for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { 784 MGETHDR(m, M_DONTWAIT, MT_HEADER); 785 if (m == 0) { 786 ipstat.ips_odropped++; 787 return (ENOBUFS); 788 } 789 *mnext = m; 790 mnext = &m->m_nextpkt; 791 m->m_data += max_linkhdr; 792 mhip = mtod(m, struct ip *); 793 *mhip = *ip; 794 /* we must inherit MCAST and BCAST flags */ 795 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 796 if (hlen > sizeof (struct ip)) { 797 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 798 mhip->ip_hl = mhlen >> 2; 799 } 800 m->m_len = mhlen; 801 mhip->ip_off = ((off - hlen) >> 3) + 802 (ntohs(ip->ip_off) & ~IP_MF); 803 if (ip->ip_off & htons(IP_MF)) 804 mhip->ip_off |= IP_MF; 805 if (off + len >= ntohs(ip->ip_len)) 806 len = ntohs(ip->ip_len) - off; 807 else 808 mhip->ip_off |= IP_MF; 809 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 810 m->m_next = m_copy(m0, off, len); 811 if (m->m_next == 0) { 812 ipstat.ips_odropped++; 813 return (ENOBUFS); /* ??? */ 814 } 815 m->m_pkthdr.len = mhlen + len; 816 m->m_pkthdr.rcvif = (struct ifnet *)0; 817 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 818 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 819 ifp->if_bridge == NULL) { 820 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 821 ipstat.ips_outhwcsum++; 822 } else { 823 mhip->ip_sum = 0; 824 mhip->ip_sum = in_cksum(m, mhlen); 825 } 826 ipstat.ips_ofragments++; 827 } 828 /* 829 * Update first fragment by trimming what's been copied out 830 * and updating header, then send each fragment (in order). 831 */ 832 m = m0; 833 m_adj(m, hlen + firstlen - ntohs(ip->ip_len)); 834 m->m_pkthdr.len = hlen + firstlen; 835 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 836 ip->ip_off |= htons(IP_MF); 837 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 838 ifp->if_bridge == NULL) { 839 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 840 ipstat.ips_outhwcsum++; 841 } else { 842 ip->ip_sum = 0; 843 ip->ip_sum = in_cksum(m, hlen); 844 } 845 846 return (0); 847 } 848 849 /* 850 * Insert IP options into preformed packet. 851 * Adjust IP destination as required for IP source routing, 852 * as indicated by a non-zero in_addr at the start of the options. 853 */ 854 static struct mbuf * 855 ip_insertoptions(m, opt, phlen) 856 struct mbuf *m; 857 struct mbuf *opt; 858 int *phlen; 859 { 860 struct ipoption *p = mtod(opt, struct ipoption *); 861 struct mbuf *n; 862 struct ip *ip = mtod(m, struct ip *); 863 unsigned optlen; 864 865 optlen = opt->m_len - sizeof(p->ipopt_dst); 866 if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET) 867 return (m); /* XXX should fail */ 868 if (p->ipopt_dst.s_addr) 869 ip->ip_dst = p->ipopt_dst; 870 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 871 MGETHDR(n, M_DONTWAIT, MT_HEADER); 872 if (n == 0) 873 return (m); 874 M_MOVE_HDR(n, m); 875 n->m_pkthdr.len += optlen; 876 m->m_len -= sizeof(struct ip); 877 m->m_data += sizeof(struct ip); 878 n->m_next = m; 879 m = n; 880 m->m_len = optlen + sizeof(struct ip); 881 m->m_data += max_linkhdr; 882 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 883 } else { 884 m->m_data -= optlen; 885 m->m_len += optlen; 886 m->m_pkthdr.len += optlen; 887 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 888 } 889 ip = mtod(m, struct ip *); 890 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen); 891 *phlen = sizeof(struct ip) + optlen; 892 ip->ip_len = htons(ntohs(ip->ip_len) + optlen); 893 return (m); 894 } 895 896 /* 897 * Copy options from ip to jp, 898 * omitting those not copied during fragmentation. 899 */ 900 int 901 ip_optcopy(ip, jp) 902 struct ip *ip, *jp; 903 { 904 u_char *cp, *dp; 905 int opt, optlen, cnt; 906 907 cp = (u_char *)(ip + 1); 908 dp = (u_char *)(jp + 1); 909 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 910 for (; cnt > 0; cnt -= optlen, cp += optlen) { 911 opt = cp[0]; 912 if (opt == IPOPT_EOL) 913 break; 914 if (opt == IPOPT_NOP) { 915 /* Preserve for IP mcast tunnel's LSRR alignment. */ 916 *dp++ = IPOPT_NOP; 917 optlen = 1; 918 continue; 919 } 920 #ifdef DIAGNOSTIC 921 if (cnt < IPOPT_OLEN + sizeof(*cp)) 922 panic("malformed IPv4 option passed to ip_optcopy"); 923 #endif 924 optlen = cp[IPOPT_OLEN]; 925 #ifdef DIAGNOSTIC 926 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 927 panic("malformed IPv4 option passed to ip_optcopy"); 928 #endif 929 /* bogus lengths should have been caught by ip_dooptions */ 930 if (optlen > cnt) 931 optlen = cnt; 932 if (IPOPT_COPIED(opt)) { 933 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen); 934 dp += optlen; 935 } 936 } 937 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 938 *dp++ = IPOPT_EOL; 939 return (optlen); 940 } 941 942 /* 943 * IP socket option processing. 944 */ 945 int 946 ip_ctloutput(op, so, level, optname, mp) 947 int op; 948 struct socket *so; 949 int level, optname; 950 struct mbuf **mp; 951 { 952 struct inpcb *inp = sotoinpcb(so); 953 struct mbuf *m = *mp; 954 int optval = 0; 955 #ifdef IPSEC 956 struct proc *p = curproc; /* XXX */ 957 struct ipsec_ref *ipr; 958 u_int16_t opt16val; 959 #endif 960 int error = 0; 961 962 if (level != IPPROTO_IP) { 963 error = EINVAL; 964 if (op == PRCO_SETOPT && *mp) 965 (void) m_free(*mp); 966 } else switch (op) { 967 case PRCO_SETOPT: 968 switch (optname) { 969 case IP_OPTIONS: 970 #ifdef notyet 971 case IP_RETOPTS: 972 return (ip_pcbopts(optname, &inp->inp_options, m)); 973 #else 974 return (ip_pcbopts(&inp->inp_options, m)); 975 #endif 976 977 case IP_TOS: 978 case IP_TTL: 979 case IP_RECVOPTS: 980 case IP_RECVRETOPTS: 981 case IP_RECVDSTADDR: 982 if (m == NULL || m->m_len != sizeof(int)) 983 error = EINVAL; 984 else { 985 optval = *mtod(m, int *); 986 switch (optname) { 987 988 case IP_TOS: 989 inp->inp_ip.ip_tos = optval; 990 break; 991 992 case IP_TTL: 993 inp->inp_ip.ip_ttl = optval; 994 break; 995 #define OPTSET(bit) \ 996 if (optval) \ 997 inp->inp_flags |= bit; \ 998 else \ 999 inp->inp_flags &= ~bit; 1000 1001 case IP_RECVOPTS: 1002 OPTSET(INP_RECVOPTS); 1003 break; 1004 1005 case IP_RECVRETOPTS: 1006 OPTSET(INP_RECVRETOPTS); 1007 break; 1008 1009 case IP_RECVDSTADDR: 1010 OPTSET(INP_RECVDSTADDR); 1011 break; 1012 } 1013 } 1014 break; 1015 #undef OPTSET 1016 1017 case IP_MULTICAST_IF: 1018 case IP_MULTICAST_TTL: 1019 case IP_MULTICAST_LOOP: 1020 case IP_ADD_MEMBERSHIP: 1021 case IP_DROP_MEMBERSHIP: 1022 error = ip_setmoptions(optname, &inp->inp_moptions, m); 1023 break; 1024 1025 case IP_PORTRANGE: 1026 if (m == 0 || m->m_len != sizeof(int)) 1027 error = EINVAL; 1028 else { 1029 optval = *mtod(m, int *); 1030 1031 switch (optval) { 1032 1033 case IP_PORTRANGE_DEFAULT: 1034 inp->inp_flags &= ~(INP_LOWPORT); 1035 inp->inp_flags &= ~(INP_HIGHPORT); 1036 break; 1037 1038 case IP_PORTRANGE_HIGH: 1039 inp->inp_flags &= ~(INP_LOWPORT); 1040 inp->inp_flags |= INP_HIGHPORT; 1041 break; 1042 1043 case IP_PORTRANGE_LOW: 1044 inp->inp_flags &= ~(INP_HIGHPORT); 1045 inp->inp_flags |= INP_LOWPORT; 1046 break; 1047 1048 default: 1049 1050 error = EINVAL; 1051 break; 1052 } 1053 } 1054 break; 1055 case IP_AUTH_LEVEL: 1056 case IP_ESP_TRANS_LEVEL: 1057 case IP_ESP_NETWORK_LEVEL: 1058 case IP_IPCOMP_LEVEL: 1059 #ifndef IPSEC 1060 error = EOPNOTSUPP; 1061 #else 1062 if (m == 0 || m->m_len != sizeof(int)) { 1063 error = EINVAL; 1064 break; 1065 } 1066 optval = *mtod(m, int *); 1067 1068 if (optval < IPSEC_LEVEL_BYPASS || 1069 optval > IPSEC_LEVEL_UNIQUE) { 1070 error = EINVAL; 1071 break; 1072 } 1073 1074 /* Unlink cached output TDB to force a re-search */ 1075 if (inp->inp_tdb_out) { 1076 int s = spltdb(); 1077 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1078 inp, inp_tdb_out_next); 1079 splx(s); 1080 } 1081 1082 if (inp->inp_tdb_in) { 1083 int s = spltdb(); 1084 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1085 inp, inp_tdb_in_next); 1086 splx(s); 1087 } 1088 1089 switch (optname) { 1090 case IP_AUTH_LEVEL: 1091 if (optval < ipsec_auth_default_level && 1092 suser(p, 0)) { 1093 error = EACCES; 1094 break; 1095 } 1096 inp->inp_seclevel[SL_AUTH] = optval; 1097 break; 1098 1099 case IP_ESP_TRANS_LEVEL: 1100 if (optval < ipsec_esp_trans_default_level && 1101 suser(p, 0)) { 1102 error = EACCES; 1103 break; 1104 } 1105 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1106 break; 1107 1108 case IP_ESP_NETWORK_LEVEL: 1109 if (optval < ipsec_esp_network_default_level && 1110 suser(p, 0)) { 1111 error = EACCES; 1112 break; 1113 } 1114 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1115 break; 1116 case IP_IPCOMP_LEVEL: 1117 if (optval < ipsec_ipcomp_default_level && 1118 suser(p, 0)) { 1119 error = EACCES; 1120 break; 1121 } 1122 inp->inp_seclevel[SL_IPCOMP] = optval; 1123 break; 1124 } 1125 if (!error) 1126 inp->inp_secrequire = get_sa_require(inp); 1127 #endif 1128 break; 1129 1130 case IP_IPSEC_REMOTE_CRED: 1131 case IP_IPSEC_REMOTE_AUTH: 1132 /* Can't set the remote credential or key */ 1133 error = EOPNOTSUPP; 1134 break; 1135 1136 case IP_IPSEC_LOCAL_ID: 1137 case IP_IPSEC_REMOTE_ID: 1138 case IP_IPSEC_LOCAL_CRED: 1139 case IP_IPSEC_LOCAL_AUTH: 1140 #ifndef IPSEC 1141 error = EOPNOTSUPP; 1142 #else 1143 if (m->m_len < 2) { 1144 error = EINVAL; 1145 break; 1146 } 1147 1148 m_copydata(m, 0, 2, (caddr_t) &opt16val); 1149 1150 /* If the type is 0, then we cleanup and return */ 1151 if (opt16val == 0) { 1152 switch (optname) { 1153 case IP_IPSEC_LOCAL_ID: 1154 if (inp->inp_ipo != NULL && 1155 inp->inp_ipo->ipo_srcid != NULL) { 1156 ipsp_reffree(inp->inp_ipo->ipo_srcid); 1157 inp->inp_ipo->ipo_srcid = NULL; 1158 } 1159 break; 1160 1161 case IP_IPSEC_REMOTE_ID: 1162 if (inp->inp_ipo != NULL && 1163 inp->inp_ipo->ipo_dstid != NULL) { 1164 ipsp_reffree(inp->inp_ipo->ipo_dstid); 1165 inp->inp_ipo->ipo_dstid = NULL; 1166 } 1167 break; 1168 1169 case IP_IPSEC_LOCAL_CRED: 1170 if (inp->inp_ipo != NULL && 1171 inp->inp_ipo->ipo_local_cred != NULL) { 1172 ipsp_reffree(inp->inp_ipo->ipo_local_cred); 1173 inp->inp_ipo->ipo_local_cred = NULL; 1174 } 1175 break; 1176 1177 case IP_IPSEC_LOCAL_AUTH: 1178 if (inp->inp_ipo != NULL && 1179 inp->inp_ipo->ipo_local_auth != NULL) { 1180 ipsp_reffree(inp->inp_ipo->ipo_local_auth); 1181 inp->inp_ipo->ipo_local_auth = NULL; 1182 } 1183 break; 1184 } 1185 1186 error = 0; 1187 break; 1188 } 1189 1190 /* Can't have an empty payload */ 1191 if (m->m_len == 2) { 1192 error = EINVAL; 1193 break; 1194 } 1195 1196 /* Allocate if needed */ 1197 if (inp->inp_ipo == NULL) { 1198 inp->inp_ipo = ipsec_add_policy(inp, 1199 AF_INET, IPSP_DIRECTION_OUT); 1200 if (inp->inp_ipo == NULL) { 1201 error = ENOBUFS; 1202 break; 1203 } 1204 } 1205 1206 MALLOC(ipr, struct ipsec_ref *, 1207 sizeof(struct ipsec_ref) + m->m_len - 2, 1208 M_CREDENTIALS, M_NOWAIT); 1209 if (ipr == NULL) { 1210 error = ENOBUFS; 1211 break; 1212 } 1213 1214 ipr->ref_count = 1; 1215 ipr->ref_malloctype = M_CREDENTIALS; 1216 ipr->ref_len = m->m_len - 2; 1217 ipr->ref_type = opt16val; 1218 m_copydata(m, 2, m->m_len - 2, (caddr_t)(ipr + 1)); 1219 1220 switch (optname) { 1221 case IP_IPSEC_LOCAL_ID: 1222 /* Check valid types and NUL-termination */ 1223 if (ipr->ref_type < IPSP_IDENTITY_PREFIX || 1224 ipr->ref_type > IPSP_IDENTITY_CONNECTION || 1225 ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1226 FREE(ipr, M_CREDENTIALS); 1227 error = EINVAL; 1228 } else { 1229 if (inp->inp_ipo->ipo_srcid != NULL) 1230 ipsp_reffree(inp->inp_ipo->ipo_srcid); 1231 inp->inp_ipo->ipo_srcid = ipr; 1232 } 1233 break; 1234 case IP_IPSEC_REMOTE_ID: 1235 /* Check valid types and NUL-termination */ 1236 if (ipr->ref_type < IPSP_IDENTITY_PREFIX || 1237 ipr->ref_type > IPSP_IDENTITY_CONNECTION || 1238 ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1239 FREE(ipr, M_CREDENTIALS); 1240 error = EINVAL; 1241 } else { 1242 if (inp->inp_ipo->ipo_dstid != NULL) 1243 ipsp_reffree(inp->inp_ipo->ipo_dstid); 1244 inp->inp_ipo->ipo_dstid = ipr; 1245 } 1246 break; 1247 case IP_IPSEC_LOCAL_CRED: 1248 if (ipr->ref_type < IPSP_CRED_KEYNOTE || 1249 ipr->ref_type > IPSP_CRED_X509) { 1250 FREE(ipr, M_CREDENTIALS); 1251 error = EINVAL; 1252 } else { 1253 if (inp->inp_ipo->ipo_local_cred != NULL) 1254 ipsp_reffree(inp->inp_ipo->ipo_local_cred); 1255 inp->inp_ipo->ipo_local_cred = ipr; 1256 } 1257 break; 1258 case IP_IPSEC_LOCAL_AUTH: 1259 if (ipr->ref_type < IPSP_AUTH_PASSPHRASE || 1260 ipr->ref_type > IPSP_AUTH_RSA) { 1261 FREE(ipr, M_CREDENTIALS); 1262 error = EINVAL; 1263 } else { 1264 if (inp->inp_ipo->ipo_local_auth != NULL) 1265 ipsp_reffree(inp->inp_ipo->ipo_local_auth); 1266 inp->inp_ipo->ipo_local_auth = ipr; 1267 } 1268 break; 1269 } 1270 1271 /* Unlink cached output TDB to force a re-search */ 1272 if (inp->inp_tdb_out) { 1273 int s = spltdb(); 1274 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1275 inp, inp_tdb_out_next); 1276 splx(s); 1277 } 1278 1279 if (inp->inp_tdb_in) { 1280 int s = spltdb(); 1281 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1282 inp, inp_tdb_in_next); 1283 splx(s); 1284 } 1285 #endif 1286 break; 1287 default: 1288 error = ENOPROTOOPT; 1289 break; 1290 } 1291 if (m) 1292 (void)m_free(m); 1293 break; 1294 1295 case PRCO_GETOPT: 1296 switch (optname) { 1297 case IP_OPTIONS: 1298 case IP_RETOPTS: 1299 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1300 if (inp->inp_options) { 1301 m->m_len = inp->inp_options->m_len; 1302 bcopy(mtod(inp->inp_options, caddr_t), 1303 mtod(m, caddr_t), (unsigned)m->m_len); 1304 } else 1305 m->m_len = 0; 1306 break; 1307 1308 case IP_TOS: 1309 case IP_TTL: 1310 case IP_RECVOPTS: 1311 case IP_RECVRETOPTS: 1312 case IP_RECVDSTADDR: 1313 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1314 m->m_len = sizeof(int); 1315 switch (optname) { 1316 1317 case IP_TOS: 1318 optval = inp->inp_ip.ip_tos; 1319 break; 1320 1321 case IP_TTL: 1322 optval = inp->inp_ip.ip_ttl; 1323 break; 1324 1325 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1326 1327 case IP_RECVOPTS: 1328 optval = OPTBIT(INP_RECVOPTS); 1329 break; 1330 1331 case IP_RECVRETOPTS: 1332 optval = OPTBIT(INP_RECVRETOPTS); 1333 break; 1334 1335 case IP_RECVDSTADDR: 1336 optval = OPTBIT(INP_RECVDSTADDR); 1337 break; 1338 } 1339 *mtod(m, int *) = optval; 1340 break; 1341 1342 case IP_MULTICAST_IF: 1343 case IP_MULTICAST_TTL: 1344 case IP_MULTICAST_LOOP: 1345 case IP_ADD_MEMBERSHIP: 1346 case IP_DROP_MEMBERSHIP: 1347 error = ip_getmoptions(optname, inp->inp_moptions, mp); 1348 break; 1349 1350 case IP_PORTRANGE: 1351 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1352 m->m_len = sizeof(int); 1353 1354 if (inp->inp_flags & INP_HIGHPORT) 1355 optval = IP_PORTRANGE_HIGH; 1356 else if (inp->inp_flags & INP_LOWPORT) 1357 optval = IP_PORTRANGE_LOW; 1358 else 1359 optval = 0; 1360 1361 *mtod(m, int *) = optval; 1362 break; 1363 1364 case IP_AUTH_LEVEL: 1365 case IP_ESP_TRANS_LEVEL: 1366 case IP_ESP_NETWORK_LEVEL: 1367 case IP_IPCOMP_LEVEL: 1368 #ifndef IPSEC 1369 m->m_len = sizeof(int); 1370 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1371 #else 1372 m->m_len = sizeof(int); 1373 switch (optname) { 1374 case IP_AUTH_LEVEL: 1375 optval = inp->inp_seclevel[SL_AUTH]; 1376 break; 1377 1378 case IP_ESP_TRANS_LEVEL: 1379 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1380 break; 1381 1382 case IP_ESP_NETWORK_LEVEL: 1383 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1384 break; 1385 case IP_IPCOMP_LEVEL: 1386 optval = inp->inp_seclevel[SL_IPCOMP]; 1387 break; 1388 } 1389 *mtod(m, int *) = optval; 1390 #endif 1391 break; 1392 case IP_IPSEC_LOCAL_ID: 1393 case IP_IPSEC_REMOTE_ID: 1394 case IP_IPSEC_LOCAL_CRED: 1395 case IP_IPSEC_REMOTE_CRED: 1396 case IP_IPSEC_LOCAL_AUTH: 1397 case IP_IPSEC_REMOTE_AUTH: 1398 #ifndef IPSEC 1399 error = EOPNOTSUPP; 1400 #else 1401 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1402 m->m_len = sizeof(u_int16_t); 1403 ipr = NULL; 1404 switch (optname) { 1405 case IP_IPSEC_LOCAL_ID: 1406 if (inp->inp_ipo != NULL) 1407 ipr = inp->inp_ipo->ipo_srcid; 1408 opt16val = IPSP_IDENTITY_NONE; 1409 break; 1410 case IP_IPSEC_REMOTE_ID: 1411 if (inp->inp_ipo != NULL) 1412 ipr = inp->inp_ipo->ipo_dstid; 1413 opt16val = IPSP_IDENTITY_NONE; 1414 break; 1415 case IP_IPSEC_LOCAL_CRED: 1416 if (inp->inp_ipo != NULL) 1417 ipr = inp->inp_ipo->ipo_local_cred; 1418 opt16val = IPSP_CRED_NONE; 1419 break; 1420 case IP_IPSEC_REMOTE_CRED: 1421 ipr = inp->inp_ipsec_remotecred; 1422 opt16val = IPSP_CRED_NONE; 1423 break; 1424 case IP_IPSEC_LOCAL_AUTH: 1425 if (inp->inp_ipo != NULL) 1426 ipr = inp->inp_ipo->ipo_local_auth; 1427 break; 1428 case IP_IPSEC_REMOTE_AUTH: 1429 ipr = inp->inp_ipsec_remoteauth; 1430 break; 1431 } 1432 if (ipr == NULL) 1433 *mtod(m, u_int16_t *) = opt16val; 1434 else { 1435 m->m_len += ipr->ref_len; 1436 *mtod(m, u_int16_t *) = ipr->ref_type; 1437 m_copyback(m, sizeof(u_int16_t), ipr->ref_len, 1438 ipr + 1); 1439 } 1440 #endif 1441 break; 1442 default: 1443 error = ENOPROTOOPT; 1444 break; 1445 } 1446 break; 1447 } 1448 return (error); 1449 } 1450 1451 /* 1452 * Set up IP options in pcb for insertion in output packets. 1453 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1454 * with destination address if source routed. 1455 */ 1456 int 1457 #ifdef notyet 1458 ip_pcbopts(optname, pcbopt, m) 1459 int optname; 1460 #else 1461 ip_pcbopts(pcbopt, m) 1462 #endif 1463 struct mbuf **pcbopt; 1464 struct mbuf *m; 1465 { 1466 int cnt, optlen; 1467 u_char *cp; 1468 u_char opt; 1469 1470 /* turn off any old options */ 1471 if (*pcbopt) 1472 (void)m_free(*pcbopt); 1473 *pcbopt = 0; 1474 if (m == (struct mbuf *)0 || m->m_len == 0) { 1475 /* 1476 * Only turning off any previous options. 1477 */ 1478 if (m) 1479 (void)m_free(m); 1480 return (0); 1481 } 1482 1483 if (m->m_len % sizeof(int32_t)) 1484 goto bad; 1485 1486 /* 1487 * IP first-hop destination address will be stored before 1488 * actual options; move other options back 1489 * and clear it when none present. 1490 */ 1491 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1492 goto bad; 1493 cnt = m->m_len; 1494 m->m_len += sizeof(struct in_addr); 1495 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1496 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1497 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1498 1499 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1500 opt = cp[IPOPT_OPTVAL]; 1501 if (opt == IPOPT_EOL) 1502 break; 1503 if (opt == IPOPT_NOP) 1504 optlen = 1; 1505 else { 1506 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1507 goto bad; 1508 optlen = cp[IPOPT_OLEN]; 1509 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1510 goto bad; 1511 } 1512 switch (opt) { 1513 1514 default: 1515 break; 1516 1517 case IPOPT_LSRR: 1518 case IPOPT_SSRR: 1519 /* 1520 * user process specifies route as: 1521 * ->A->B->C->D 1522 * D must be our final destination (but we can't 1523 * check that since we may not have connected yet). 1524 * A is first hop destination, which doesn't appear in 1525 * actual IP option, but is stored before the options. 1526 */ 1527 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1528 goto bad; 1529 m->m_len -= sizeof(struct in_addr); 1530 cnt -= sizeof(struct in_addr); 1531 optlen -= sizeof(struct in_addr); 1532 cp[IPOPT_OLEN] = optlen; 1533 /* 1534 * Move first hop before start of options. 1535 */ 1536 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1537 sizeof(struct in_addr)); 1538 /* 1539 * Then copy rest of options back 1540 * to close up the deleted entry. 1541 */ 1542 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1543 sizeof(struct in_addr)), 1544 (caddr_t)&cp[IPOPT_OFFSET+1], 1545 (unsigned)cnt + sizeof(struct in_addr)); 1546 break; 1547 } 1548 } 1549 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1550 goto bad; 1551 *pcbopt = m; 1552 return (0); 1553 1554 bad: 1555 (void)m_free(m); 1556 return (EINVAL); 1557 } 1558 1559 /* 1560 * Set the IP multicast options in response to user setsockopt(). 1561 */ 1562 int 1563 ip_setmoptions(optname, imop, m) 1564 int optname; 1565 struct ip_moptions **imop; 1566 struct mbuf *m; 1567 { 1568 int error = 0; 1569 u_char loop; 1570 int i; 1571 struct in_addr addr; 1572 struct ip_mreq *mreq; 1573 struct ifnet *ifp; 1574 struct ip_moptions *imo = *imop; 1575 struct route ro; 1576 struct sockaddr_in *dst; 1577 1578 if (imo == NULL) { 1579 /* 1580 * No multicast option buffer attached to the pcb; 1581 * allocate one and initialize to default values. 1582 */ 1583 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS, 1584 M_WAITOK); 1585 1586 *imop = imo; 1587 imo->imo_multicast_ifp = NULL; 1588 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1589 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1590 imo->imo_num_memberships = 0; 1591 } 1592 1593 switch (optname) { 1594 1595 case IP_MULTICAST_IF: 1596 /* 1597 * Select the interface for outgoing multicast packets. 1598 */ 1599 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1600 error = EINVAL; 1601 break; 1602 } 1603 addr = *(mtod(m, struct in_addr *)); 1604 /* 1605 * INADDR_ANY is used to remove a previous selection. 1606 * When no interface is selected, a default one is 1607 * chosen every time a multicast packet is sent. 1608 */ 1609 if (addr.s_addr == INADDR_ANY) { 1610 imo->imo_multicast_ifp = NULL; 1611 break; 1612 } 1613 /* 1614 * The selected interface is identified by its local 1615 * IP address. Find the interface and confirm that 1616 * it supports multicasting. 1617 */ 1618 INADDR_TO_IFP(addr, ifp); 1619 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1620 error = EADDRNOTAVAIL; 1621 break; 1622 } 1623 imo->imo_multicast_ifp = ifp; 1624 break; 1625 1626 case IP_MULTICAST_TTL: 1627 /* 1628 * Set the IP time-to-live for outgoing multicast packets. 1629 */ 1630 if (m == NULL || m->m_len != 1) { 1631 error = EINVAL; 1632 break; 1633 } 1634 imo->imo_multicast_ttl = *(mtod(m, u_char *)); 1635 break; 1636 1637 case IP_MULTICAST_LOOP: 1638 /* 1639 * Set the loopback flag for outgoing multicast packets. 1640 * Must be zero or one. 1641 */ 1642 if (m == NULL || m->m_len != 1 || 1643 (loop = *(mtod(m, u_char *))) > 1) { 1644 error = EINVAL; 1645 break; 1646 } 1647 imo->imo_multicast_loop = loop; 1648 break; 1649 1650 case IP_ADD_MEMBERSHIP: 1651 /* 1652 * Add a multicast group membership. 1653 * Group must be a valid IP multicast address. 1654 */ 1655 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1656 error = EINVAL; 1657 break; 1658 } 1659 mreq = mtod(m, struct ip_mreq *); 1660 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1661 error = EINVAL; 1662 break; 1663 } 1664 /* 1665 * If no interface address was provided, use the interface of 1666 * the route to the given multicast address. 1667 */ 1668 if (mreq->imr_interface.s_addr == INADDR_ANY) { 1669 ro.ro_rt = NULL; 1670 dst = satosin(&ro.ro_dst); 1671 dst->sin_len = sizeof(*dst); 1672 dst->sin_family = AF_INET; 1673 dst->sin_addr = mreq->imr_multiaddr; 1674 rtalloc(&ro); 1675 if (ro.ro_rt == NULL) { 1676 error = EADDRNOTAVAIL; 1677 break; 1678 } 1679 ifp = ro.ro_rt->rt_ifp; 1680 rtfree(ro.ro_rt); 1681 } else { 1682 INADDR_TO_IFP(mreq->imr_interface, ifp); 1683 } 1684 /* 1685 * See if we found an interface, and confirm that it 1686 * supports multicast. 1687 */ 1688 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1689 error = EADDRNOTAVAIL; 1690 break; 1691 } 1692 /* 1693 * See if the membership already exists or if all the 1694 * membership slots are full. 1695 */ 1696 for (i = 0; i < imo->imo_num_memberships; ++i) { 1697 if (imo->imo_membership[i]->inm_ifp == ifp && 1698 imo->imo_membership[i]->inm_addr.s_addr 1699 == mreq->imr_multiaddr.s_addr) 1700 break; 1701 } 1702 if (i < imo->imo_num_memberships) { 1703 error = EADDRINUSE; 1704 break; 1705 } 1706 if (i == IP_MAX_MEMBERSHIPS) { 1707 error = ETOOMANYREFS; 1708 break; 1709 } 1710 /* 1711 * Everything looks good; add a new record to the multicast 1712 * address list for the given interface. 1713 */ 1714 if ((imo->imo_membership[i] = 1715 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1716 error = ENOBUFS; 1717 break; 1718 } 1719 ++imo->imo_num_memberships; 1720 break; 1721 1722 case IP_DROP_MEMBERSHIP: 1723 /* 1724 * Drop a multicast group membership. 1725 * Group must be a valid IP multicast address. 1726 */ 1727 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1728 error = EINVAL; 1729 break; 1730 } 1731 mreq = mtod(m, struct ip_mreq *); 1732 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1733 error = EINVAL; 1734 break; 1735 } 1736 /* 1737 * If an interface address was specified, get a pointer 1738 * to its ifnet structure. 1739 */ 1740 if (mreq->imr_interface.s_addr == INADDR_ANY) 1741 ifp = NULL; 1742 else { 1743 INADDR_TO_IFP(mreq->imr_interface, ifp); 1744 if (ifp == NULL) { 1745 error = EADDRNOTAVAIL; 1746 break; 1747 } 1748 } 1749 /* 1750 * Find the membership in the membership array. 1751 */ 1752 for (i = 0; i < imo->imo_num_memberships; ++i) { 1753 if ((ifp == NULL || 1754 imo->imo_membership[i]->inm_ifp == ifp) && 1755 imo->imo_membership[i]->inm_addr.s_addr == 1756 mreq->imr_multiaddr.s_addr) 1757 break; 1758 } 1759 if (i == imo->imo_num_memberships) { 1760 error = EADDRNOTAVAIL; 1761 break; 1762 } 1763 /* 1764 * Give up the multicast address record to which the 1765 * membership points. 1766 */ 1767 in_delmulti(imo->imo_membership[i]); 1768 /* 1769 * Remove the gap in the membership array. 1770 */ 1771 for (++i; i < imo->imo_num_memberships; ++i) 1772 imo->imo_membership[i-1] = imo->imo_membership[i]; 1773 --imo->imo_num_memberships; 1774 break; 1775 1776 default: 1777 error = EOPNOTSUPP; 1778 break; 1779 } 1780 1781 /* 1782 * If all options have default values, no need to keep the mbuf. 1783 */ 1784 if (imo->imo_multicast_ifp == NULL && 1785 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1786 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1787 imo->imo_num_memberships == 0) { 1788 free(*imop, M_IPMOPTS); 1789 *imop = NULL; 1790 } 1791 1792 return (error); 1793 } 1794 1795 /* 1796 * Return the IP multicast options in response to user getsockopt(). 1797 */ 1798 int 1799 ip_getmoptions(optname, imo, mp) 1800 int optname; 1801 struct ip_moptions *imo; 1802 struct mbuf **mp; 1803 { 1804 u_char *ttl; 1805 u_char *loop; 1806 struct in_addr *addr; 1807 struct in_ifaddr *ia; 1808 1809 *mp = m_get(M_WAIT, MT_SOOPTS); 1810 1811 switch (optname) { 1812 1813 case IP_MULTICAST_IF: 1814 addr = mtod(*mp, struct in_addr *); 1815 (*mp)->m_len = sizeof(struct in_addr); 1816 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1817 addr->s_addr = INADDR_ANY; 1818 else { 1819 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1820 addr->s_addr = (ia == NULL) ? INADDR_ANY 1821 : ia->ia_addr.sin_addr.s_addr; 1822 } 1823 return (0); 1824 1825 case IP_MULTICAST_TTL: 1826 ttl = mtod(*mp, u_char *); 1827 (*mp)->m_len = 1; 1828 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1829 : imo->imo_multicast_ttl; 1830 return (0); 1831 1832 case IP_MULTICAST_LOOP: 1833 loop = mtod(*mp, u_char *); 1834 (*mp)->m_len = 1; 1835 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1836 : imo->imo_multicast_loop; 1837 return (0); 1838 1839 default: 1840 return (EOPNOTSUPP); 1841 } 1842 } 1843 1844 /* 1845 * Discard the IP multicast options. 1846 */ 1847 void 1848 ip_freemoptions(imo) 1849 struct ip_moptions *imo; 1850 { 1851 int i; 1852 1853 if (imo != NULL) { 1854 for (i = 0; i < imo->imo_num_memberships; ++i) 1855 in_delmulti(imo->imo_membership[i]); 1856 free(imo, M_IPMOPTS); 1857 } 1858 } 1859 1860 /* 1861 * Routine called from ip_output() to loop back a copy of an IP multicast 1862 * packet to the input queue of a specified interface. Note that this 1863 * calls the output routine of the loopback "driver", but with an interface 1864 * pointer that might NOT be &loif -- easier than replicating that code here. 1865 */ 1866 static void 1867 ip_mloopback(ifp, m, dst) 1868 struct ifnet *ifp; 1869 struct mbuf *m; 1870 struct sockaddr_in *dst; 1871 { 1872 struct ip *ip; 1873 struct mbuf *copym; 1874 1875 copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1876 if (copym != NULL) { 1877 /* 1878 * We don't bother to fragment if the IP length is greater 1879 * than the interface's MTU. Can this possibly matter? 1880 */ 1881 ip = mtod(copym, struct ip *); 1882 ip->ip_sum = 0; 1883 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1884 (void) looutput(ifp, copym, sintosa(dst), NULL); 1885 } 1886 } 1887 1888 /* 1889 * Process a delayed payload checksum calculation. 1890 */ 1891 void 1892 in_delayed_cksum(struct mbuf *m) 1893 { 1894 struct ip *ip; 1895 u_int16_t csum, offset; 1896 1897 ip = mtod(m, struct ip *); 1898 offset = ip->ip_hl << 2; 1899 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1900 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1901 csum = 0xffff; 1902 1903 switch (ip->ip_p) { 1904 case IPPROTO_TCP: 1905 offset += offsetof(struct tcphdr, th_sum); 1906 break; 1907 1908 case IPPROTO_UDP: 1909 offset += offsetof(struct udphdr, uh_sum); 1910 break; 1911 1912 default: 1913 return; 1914 } 1915 1916 if ((offset + sizeof(u_int16_t)) > m->m_len) 1917 m_copyback(m, offset, sizeof(csum), &csum); 1918 else 1919 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1920 } 1921