1 /* $OpenBSD: ip_output.c,v 1.134 2001/07/17 20:34:50 provos Exp $ */ 2 /* $NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 37 */ 38 39 #include "pf.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/mbuf.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/proc.h> 48 #include <sys/kernel.h> 49 50 #include <net/if.h> 51 #include <net/if_enc.h> 52 #include <net/route.h> 53 54 #include <netinet/in.h> 55 #include <netinet/in_systm.h> 56 #include <netinet/ip.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_var.h> 59 #include <netinet/ip_var.h> 60 #include <netinet/ip_icmp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/udp.h> 63 #include <netinet/tcp_timer.h> 64 #include <netinet/tcp_var.h> 65 #include <netinet/udp_var.h> 66 67 #if NPF > 0 68 #include <net/pfvar.h> 69 #endif 70 71 #ifdef vax 72 #include <machine/mtpr.h> 73 #endif 74 75 #ifdef IPSEC 76 #ifdef ENCDEBUG 77 #define DPRINTF(x) do { if (encdebug) printf x ; } while (0) 78 #else 79 #define DPRINTF(x) 80 #endif 81 82 extern u_int8_t get_sa_require __P((struct inpcb *)); 83 84 extern int ipsec_auth_default_level; 85 extern int ipsec_esp_trans_default_level; 86 extern int ipsec_esp_network_default_level; 87 extern int ipsec_ipcomp_default_level; 88 #endif /* IPSEC */ 89 90 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); 91 static void ip_mloopback 92 __P((struct ifnet *, struct mbuf *, struct sockaddr_in *)); 93 94 /* 95 * IP output. The packet in mbuf chain m contains a skeletal IP 96 * header (with len, off, ttl, proto, tos, src, dst). 97 * The mbuf chain containing the packet will be freed. 98 * The mbuf opt, if present, will not be freed. 99 */ 100 int 101 #if __STDC__ 102 ip_output(struct mbuf *m0, ...) 103 #else 104 ip_output(m0, va_alist) 105 struct mbuf *m0; 106 va_dcl 107 #endif 108 { 109 register struct ip *ip, *mhip; 110 register struct ifnet *ifp; 111 struct mbuf *m = m0; 112 register int hlen = sizeof (struct ip); 113 int len, off, error = 0; 114 struct route iproute; 115 struct sockaddr_in *dst; 116 struct in_ifaddr *ia; 117 struct mbuf *opt; 118 struct route *ro; 119 int flags; 120 struct ip_moptions *imo; 121 va_list ap; 122 u_int8_t sproto = 0, donerouting = 0; 123 #ifdef IPSEC 124 u_int32_t icmp_mtu = 0; 125 union sockaddr_union sdst; 126 u_int32_t sspi; 127 struct m_tag *mtag; 128 struct tdb_ident *tdbi; 129 130 struct inpcb *inp; 131 struct tdb *tdb; 132 int s; 133 #endif /* IPSEC */ 134 135 va_start(ap, m0); 136 opt = va_arg(ap, struct mbuf *); 137 ro = va_arg(ap, struct route *); 138 flags = va_arg(ap, int); 139 imo = va_arg(ap, struct ip_moptions *); 140 #ifdef IPSEC 141 inp = va_arg(ap, struct inpcb *); 142 if (inp && (inp->inp_flags & INP_IPV6) != 0) 143 panic("ip_output: IPv6 pcb is passed"); 144 #endif /* IPSEC */ 145 va_end(ap); 146 147 #ifdef DIAGNOSTIC 148 if ((m->m_flags & M_PKTHDR) == 0) 149 panic("ip_output no HDR"); 150 #endif 151 if (opt) { 152 m = ip_insertoptions(m, opt, &len); 153 hlen = len; 154 } 155 156 ip = mtod(m, struct ip *); 157 158 /* 159 * Fill in IP header. 160 */ 161 if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { 162 ip->ip_v = IPVERSION; 163 ip->ip_off &= IP_DF; 164 ip->ip_id = htons(ip_randomid()); 165 ip->ip_hl = hlen >> 2; 166 ipstat.ips_localout++; 167 } else { 168 hlen = ip->ip_hl << 2; 169 } 170 171 /* 172 * If we're missing the IP source address, do a route lookup. We'll 173 * remember this result, in case we don't need to do any IPsec 174 * processing on the packet. We need the source address so we can 175 * do an SPD lookup in IPsec; for most packets, the source address 176 * is set at a higher level protocol. ICMPs and other packets 177 * though (e.g., traceroute) have a source address of zeroes. 178 */ 179 if (ip->ip_src.s_addr == INADDR_ANY) { 180 donerouting = 1; 181 182 if (ro == 0) { 183 ro = &iproute; 184 bzero((caddr_t)ro, sizeof (*ro)); 185 } 186 187 dst = satosin(&ro->ro_dst); 188 189 /* 190 * If there is a cached route, check that it is to the same 191 * destination and is still up. If not, free it and try again. 192 */ 193 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 194 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 195 RTFREE(ro->ro_rt); 196 ro->ro_rt = (struct rtentry *)0; 197 } 198 199 if (ro->ro_rt == 0) { 200 dst->sin_family = AF_INET; 201 dst->sin_len = sizeof(*dst); 202 dst->sin_addr = ip->ip_dst; 203 } 204 205 /* 206 * If routing to interface only, short-circuit routing lookup. 207 */ 208 if (flags & IP_ROUTETOIF) { 209 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 210 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 211 ipstat.ips_noroute++; 212 error = ENETUNREACH; 213 goto bad; 214 } 215 216 ifp = ia->ia_ifp; 217 ip->ip_ttl = 1; 218 } else { 219 if (ro->ro_rt == 0) 220 rtalloc(ro); 221 222 if (ro->ro_rt == 0) { 223 ipstat.ips_noroute++; 224 error = EHOSTUNREACH; 225 goto bad; 226 } 227 228 ia = ifatoia(ro->ro_rt->rt_ifa); 229 ifp = ro->ro_rt->rt_ifp; 230 ro->ro_rt->rt_use++; 231 232 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 233 dst = satosin(ro->ro_rt->rt_gateway); 234 } 235 236 /* Set the source IP address */ 237 if (!IN_MULTICAST(ip->ip_dst.s_addr)) 238 ip->ip_src = ia->ia_addr.sin_addr; 239 } 240 241 #ifdef IPSEC 242 /* 243 * splnet is chosen over spltdb because we are not allowed to 244 * lower the level, and udp_output calls us in splnet(). 245 */ 246 s = splnet(); 247 248 /* Do we have any pending SAs to apply ? */ 249 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 250 if (mtag != NULL) { 251 #ifdef DIAGNOSTIC 252 if (mtag->m_tag_len != sizeof (struct tdb_ident)) 253 panic("ip_output: tag of length %d (should be %d", 254 mtag->m_tag_len, sizeof (struct tdb_ident)); 255 #endif 256 tdbi = (struct tdb_ident *)(mtag + 1); 257 tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto); 258 if (tdb == NULL) 259 error = -EINVAL; 260 m_tag_delete(m, mtag); 261 } 262 else 263 tdb = ipsp_spd_lookup(m, AF_INET, hlen, &error, 264 IPSP_DIRECTION_OUT, NULL, inp); 265 266 if (tdb == NULL) { 267 splx(s); 268 269 if (error == 0) { 270 /* 271 * No IPsec processing required, we'll just send the 272 * packet out. 273 */ 274 sproto = 0; 275 276 /* Fall through to routing/multicast handling */ 277 } else { 278 /* 279 * -EINVAL is used to indicate that the packet should 280 * be silently dropped, typically because we've asked 281 * key management for an SA. 282 */ 283 if (error == -EINVAL) /* Should silently drop packet */ 284 error = 0; 285 286 m_freem(m); 287 goto done; 288 } 289 } else { 290 /* 291 * If the socket has set the bypass flags and SA 292 * destination matches the IP destination, skip 293 * IPsec. This allows IKE packets to travel through 294 * IPsec tunnels. 295 */ 296 if ((inp != NULL) && 297 (inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_BYPASS) && 298 (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_BYPASS) && 299 (inp->inp_seclevel[SL_ESP_NETWORK] == IPSEC_LEVEL_BYPASS) 300 && (inp->inp_seclevel[SL_IPCOMP] == IPSEC_LEVEL_BYPASS) 301 && (sdst.sa.sa_family == AF_INET) && 302 (sdst.sin.sin_addr.s_addr == ip->ip_dst.s_addr)) { 303 splx(s); 304 sproto = 0; /* mark as no-IPsec-needed */ 305 goto done_spd; 306 } 307 308 /* Loop detection */ 309 for (mtag = m_tag_first(m); mtag != NULL; 310 mtag = m_tag_next(m, mtag)) { 311 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 312 mtag->m_tag_id != 313 PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 314 continue; 315 tdbi = (struct tdb_ident *)(mtag + 1); 316 if (tdbi->spi == tdb->tdb_spi && 317 tdbi->proto == tdb->tdb_sproto && 318 !bcmp(&tdbi->dst, &tdb->tdb_dst, 319 sizeof(union sockaddr_union))) { 320 splx(s); 321 sproto = 0; /* mark as no-IPsec-needed */ 322 goto done_spd; 323 } 324 } 325 326 /* We need to do IPsec */ 327 bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst)); 328 sspi = tdb->tdb_spi; 329 sproto = tdb->tdb_sproto; 330 splx(s); 331 332 /* 333 * If it needs TCP/UDP hardware-checksumming, do the 334 * computation now. 335 */ 336 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 337 in_delayed_cksum(m); 338 m->m_pkthdr.csum &= 339 ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 340 } 341 342 /* If it's not a multicast packet, try to fast-path */ 343 if (!IN_MULTICAST(ip->ip_dst.s_addr)) { 344 goto sendit; 345 } 346 } 347 348 /* Fall through to the routing/multicast handling code */ 349 done_spd: 350 #endif /* IPSEC */ 351 352 if (donerouting == 0) { 353 if (ro == 0) { 354 ro = &iproute; 355 bzero((caddr_t)ro, sizeof (*ro)); 356 } 357 358 dst = satosin(&ro->ro_dst); 359 360 /* 361 * If there is a cached route, check that it is to the same 362 * destination and is still up. If not, free it and try again. 363 */ 364 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 365 dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { 366 RTFREE(ro->ro_rt); 367 ro->ro_rt = (struct rtentry *)0; 368 } 369 370 if (ro->ro_rt == 0) { 371 dst->sin_family = AF_INET; 372 dst->sin_len = sizeof(*dst); 373 dst->sin_addr = ip->ip_dst; 374 } 375 376 /* 377 * If routing to interface only, short-circuit routing lookup. 378 */ 379 if (flags & IP_ROUTETOIF) { 380 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && 381 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { 382 ipstat.ips_noroute++; 383 error = ENETUNREACH; 384 goto bad; 385 } 386 387 ifp = ia->ia_ifp; 388 ip->ip_ttl = 1; 389 } else { 390 if (ro->ro_rt == 0) 391 rtalloc(ro); 392 393 if (ro->ro_rt == 0) { 394 ipstat.ips_noroute++; 395 error = EHOSTUNREACH; 396 goto bad; 397 } 398 399 ia = ifatoia(ro->ro_rt->rt_ifa); 400 ifp = ro->ro_rt->rt_ifp; 401 ro->ro_rt->rt_use++; 402 403 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 404 dst = satosin(ro->ro_rt->rt_gateway); 405 } 406 407 /* Set the source IP address */ 408 if (ip->ip_src.s_addr == INADDR_ANY) 409 ip->ip_src = ia->ia_addr.sin_addr; 410 } 411 412 if (IN_MULTICAST(ip->ip_dst.s_addr) || 413 (ip->ip_dst.s_addr == INADDR_BROADCAST)) { 414 struct in_multi *inm; 415 416 m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ? 417 M_BCAST : M_MCAST; 418 419 /* 420 * IP destination address is multicast. Make sure "dst" 421 * still points to the address in "ro". (It may have been 422 * changed to point to a gateway address, above.) 423 */ 424 dst = satosin(&ro->ro_dst); 425 426 /* 427 * See if the caller provided any multicast options 428 */ 429 if (imo != NULL) { 430 ip->ip_ttl = imo->imo_multicast_ttl; 431 if (imo->imo_multicast_ifp != NULL) 432 ifp = imo->imo_multicast_ifp; 433 } else 434 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 435 436 /* 437 * Confirm that the outgoing interface supports multicast, 438 * but only if the packet actually is going out on that 439 * interface (i.e., no IPsec is applied). 440 */ 441 if ((((m->m_flags & M_MCAST) && 442 (ifp->if_flags & IFF_MULTICAST) == 0) || 443 ((m->m_flags & M_BCAST) && 444 (ifp->if_flags & IFF_BROADCAST) == 0)) && (sproto == 0)) { 445 ipstat.ips_noroute++; 446 error = ENETUNREACH; 447 goto bad; 448 } 449 450 /* 451 * If source address not specified yet, use address 452 * of outgoing interface. 453 */ 454 if (ip->ip_src.s_addr == INADDR_ANY) { 455 register struct in_ifaddr *ia; 456 457 for (ia = in_ifaddr.tqh_first; 458 ia; 459 ia = ia->ia_list.tqe_next) 460 if (ia->ia_ifp == ifp) { 461 ip->ip_src = ia->ia_addr.sin_addr; 462 break; 463 } 464 } 465 466 IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); 467 if (inm != NULL && 468 (imo == NULL || imo->imo_multicast_loop)) { 469 /* 470 * If we belong to the destination multicast group 471 * on the outgoing interface, and the caller did not 472 * forbid loopback, loop back a copy. 473 */ 474 ip_mloopback(ifp, m, dst); 475 } 476 #ifdef MROUTING 477 else { 478 /* 479 * If we are acting as a multicast router, perform 480 * multicast forwarding as if the packet had just 481 * arrived on the interface to which we are about 482 * to send. The multicast forwarding function 483 * recursively calls this function, using the 484 * IP_FORWARDING flag to prevent infinite recursion. 485 * 486 * Multicasts that are looped back by ip_mloopback(), 487 * above, will be forwarded by the ip_input() routine, 488 * if necessary. 489 */ 490 extern struct socket *ip_mrouter; 491 492 if (ip_mrouter && (flags & IP_FORWARDING) == 0) { 493 if (ip_mforward(m, ifp) != 0) { 494 m_freem(m); 495 goto done; 496 } 497 } 498 } 499 #endif 500 /* 501 * Multicasts with a time-to-live of zero may be looped- 502 * back, above, but must not be transmitted on a network. 503 * Also, multicasts addressed to the loopback interface 504 * are not sent -- the above call to ip_mloopback() will 505 * loop back a copy if this host actually belongs to the 506 * destination group on the loopback interface. 507 */ 508 if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) { 509 m_freem(m); 510 goto done; 511 } 512 513 goto sendit; 514 } 515 516 /* 517 * Look for broadcast address and and verify user is allowed to send 518 * such a packet; if the packet is going in an IPsec tunnel, skip 519 * this check. 520 */ 521 if ((sproto == 0) && (in_broadcast(dst->sin_addr, ifp))) { 522 if ((ifp->if_flags & IFF_BROADCAST) == 0) { 523 error = EADDRNOTAVAIL; 524 goto bad; 525 } 526 if ((flags & IP_ALLOWBROADCAST) == 0) { 527 error = EACCES; 528 goto bad; 529 } 530 531 /* Don't allow broadcast messages to be fragmented */ 532 if ((u_int16_t)ip->ip_len > ifp->if_mtu) { 533 error = EMSGSIZE; 534 goto bad; 535 } 536 m->m_flags |= M_BCAST; 537 } else 538 m->m_flags &= ~M_BCAST; 539 540 sendit: 541 /* 542 * If we're doing Path MTU discovery, we need to set DF unless 543 * the route's MTU is locked. 544 */ 545 if ((flags & IP_MTUDISC) && ro && ro->ro_rt && 546 (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) 547 ip->ip_off |= IP_DF; 548 549 #ifdef IPSEC 550 /* 551 * Check if the packet needs encapsulation. 552 */ 553 if (sproto != 0) { 554 s = splnet(); 555 556 /* 557 * Packet filter 558 */ 559 #if NPF > 0 560 561 if (pf_test(PF_OUT, &encif[0].sc_if, &m) != PF_PASS) { 562 error = EHOSTUNREACH; 563 splx(s); 564 m_freem(m); 565 goto done; 566 } 567 ip = mtod(m, struct ip *); 568 hlen = ip->ip_hl << 2; 569 #endif 570 571 tdb = gettdb(sspi, &sdst, sproto); 572 if (tdb == NULL) { 573 error = EHOSTUNREACH; 574 splx(s); 575 m_freem(m); 576 goto done; 577 } 578 579 /* Latch to PCB */ 580 if (inp) 581 tdb_add_inp(tdb, inp, 0); 582 583 /* Check if we are allowed to fragment */ 584 if ((ip->ip_off & IP_DF) && tdb->tdb_mtu && 585 (u_int16_t)ip->ip_len > tdb->tdb_mtu && 586 tdb->tdb_mtutimeout > time.tv_sec) { 587 struct rtentry *rt; 588 589 icmp_mtu = tdb->tdb_mtu; 590 splx(s); 591 592 /* Find a host route to store the mtu in */ 593 if (ro != NULL) 594 rt = ro->ro_rt; 595 if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) { 596 struct sockaddr_in dst = { 597 sizeof(struct sockaddr_in), AF_INET}; 598 dst.sin_addr = ip->ip_dst; 599 rt = icmp_mtudisc_clone((struct sockaddr *)&dst); 600 } 601 if (rt != NULL) { 602 rt->rt_rmx.rmx_mtu = icmp_mtu; 603 if (ro && ro->ro_rt != NULL) { 604 RTFREE(ro->ro_rt); 605 ro->ro_rt = (struct rtentry *) 0; 606 rtalloc(ro); 607 } 608 } 609 error = EMSGSIZE; 610 goto bad; 611 } 612 613 /* Massage the IP header for use by the IPsec code */ 614 ip->ip_len = htons((u_short) ip->ip_len); 615 ip->ip_off = htons((u_short) ip->ip_off); 616 617 /* 618 * Clear these -- they'll be set in the recursive invocation 619 * as needed. 620 */ 621 m->m_flags &= ~(M_MCAST | M_BCAST); 622 623 /* Callee frees mbuf */ 624 error = ipsp_process_packet(m, tdb, AF_INET, 0); 625 splx(s); 626 return error; /* Nothing more to be done */ 627 } 628 629 /* 630 * If deferred crypto processing is needed, check that the 631 * interface supports it. 632 */ 633 if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL)) 634 != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) { 635 /* Notify IPsec to do its own crypto. */ 636 ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1)); 637 m_freem(m); 638 error = EHOSTUNREACH; 639 goto done; 640 } 641 #endif /* IPSEC */ 642 643 /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ 644 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) { 645 if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || 646 ifp->if_bridge != NULL) { 647 in_delayed_cksum(m); 648 m->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */ 649 } 650 } else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) { 651 if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || 652 ifp->if_bridge != NULL) { 653 in_delayed_cksum(m); 654 m->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */ 655 } 656 } 657 658 /* 659 * Packet filter 660 */ 661 #if NPF > 0 662 if (pf_test(PF_OUT, ifp, &m) != PF_PASS) { 663 error = EHOSTUNREACH; 664 m_freem(m); 665 goto done; 666 } 667 ip = mtod(m, struct ip *); 668 hlen = ip->ip_hl << 2; 669 #endif 670 671 /* 672 * If small enough for interface, can just send directly. 673 */ 674 if ((u_int16_t)ip->ip_len <= ifp->if_mtu) { 675 ip->ip_len = htons((u_int16_t)ip->ip_len); 676 ip->ip_off = htons((u_int16_t)ip->ip_off); 677 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 678 ifp->if_bridge == NULL) { 679 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 680 ipstat.ips_outhwcsum++; 681 } else { 682 ip->ip_sum = 0; 683 ip->ip_sum = in_cksum(m, hlen); 684 } 685 /* Update relevant hardware checksum stats for TCP/UDP */ 686 if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) 687 tcpstat.tcps_outhwcsum++; 688 else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) 689 udpstat.udps_outhwcsum++; 690 error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt); 691 goto done; 692 } 693 694 /* 695 * Too large for interface; fragment if possible. 696 * Must be able to put at least 8 bytes per fragment. 697 */ 698 if (ip->ip_off & IP_DF) { 699 #ifdef IPSEC 700 icmp_mtu = ifp->if_mtu; 701 #endif 702 error = EMSGSIZE; 703 /* 704 * This case can happen if the user changed the MTU 705 * of an interface after enabling IP on it. Because 706 * most netifs don't keep track of routes pointing to 707 * them, there is no way for one to update all its 708 * routes when the MTU is changed. 709 */ 710 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) 711 && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) 712 && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 713 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 714 } 715 ipstat.ips_cantfrag++; 716 goto bad; 717 } 718 len = (ifp->if_mtu - hlen) &~ 7; 719 if (len < 8) { 720 error = EMSGSIZE; 721 goto bad; 722 } 723 724 /* 725 * If we are doing fragmentation, we can't defer TCP/UDP 726 * checksumming; compute the checksum and clear the flag. 727 */ 728 if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) { 729 in_delayed_cksum(m); 730 m->m_pkthdr.csum &= ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT); 731 } 732 733 { 734 int mhlen, firstlen = len; 735 struct mbuf **mnext = &m->m_nextpkt; 736 737 /* 738 * Loop through length of segment after first fragment, 739 * make new header and copy data of each part and link onto chain. 740 */ 741 m0 = m; 742 mhlen = sizeof (struct ip); 743 for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) { 744 MGETHDR(m, M_DONTWAIT, MT_HEADER); 745 if (m == 0) { 746 error = ENOBUFS; 747 ipstat.ips_odropped++; 748 goto sendorfree; 749 } 750 *mnext = m; 751 mnext = &m->m_nextpkt; 752 m->m_data += max_linkhdr; 753 mhip = mtod(m, struct ip *); 754 *mhip = *ip; 755 /* we must inherit MCAST and BCAST flags */ 756 m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST); 757 if (hlen > sizeof (struct ip)) { 758 mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); 759 mhip->ip_hl = mhlen >> 2; 760 } 761 m->m_len = mhlen; 762 mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); 763 if (ip->ip_off & IP_MF) 764 mhip->ip_off |= IP_MF; 765 if (off + len >= (u_int16_t)ip->ip_len) 766 len = (u_int16_t)ip->ip_len - off; 767 else 768 mhip->ip_off |= IP_MF; 769 mhip->ip_len = htons((u_int16_t)(len + mhlen)); 770 m->m_next = m_copy(m0, off, len); 771 if (m->m_next == 0) { 772 error = ENOBUFS; /* ??? */ 773 ipstat.ips_odropped++; 774 goto sendorfree; 775 } 776 m->m_pkthdr.len = mhlen + len; 777 m->m_pkthdr.rcvif = (struct ifnet *)0; 778 mhip->ip_off = htons((u_int16_t)mhip->ip_off); 779 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 780 ifp->if_bridge == NULL) { 781 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 782 ipstat.ips_outhwcsum++; 783 } else { 784 mhip->ip_sum = 0; 785 mhip->ip_sum = in_cksum(m, mhlen); 786 } 787 ipstat.ips_ofragments++; 788 } 789 /* 790 * Update first fragment by trimming what's been copied out 791 * and updating header, then send each fragment (in order). 792 */ 793 m = m0; 794 m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len); 795 m->m_pkthdr.len = hlen + firstlen; 796 ip->ip_len = htons((u_int16_t)m->m_pkthdr.len); 797 ip->ip_off = htons((u_int16_t)(ip->ip_off | IP_MF)); 798 if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) && 799 ifp->if_bridge == NULL) { 800 m->m_pkthdr.csum |= M_IPV4_CSUM_OUT; 801 ipstat.ips_outhwcsum++; 802 } else { 803 ip->ip_sum = 0; 804 ip->ip_sum = in_cksum(m, hlen); 805 } 806 sendorfree: 807 for (m = m0; m; m = m0) { 808 m0 = m->m_nextpkt; 809 m->m_nextpkt = 0; 810 if (error == 0) 811 error = (*ifp->if_output)(ifp, m, sintosa(dst), 812 ro->ro_rt); 813 else 814 m_freem(m); 815 } 816 817 if (error == 0) 818 ipstat.ips_fragmented++; 819 } 820 done: 821 if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) 822 RTFREE(ro->ro_rt); 823 return (error); 824 bad: 825 #ifdef IPSEC 826 if (error == EMSGSIZE && icmp_mtu != 0) 827 ipsec_adjust_mtu(m, icmp_mtu); 828 #endif 829 m_freem(m0); 830 goto done; 831 } 832 833 /* 834 * Insert IP options into preformed packet. 835 * Adjust IP destination as required for IP source routing, 836 * as indicated by a non-zero in_addr at the start of the options. 837 */ 838 static struct mbuf * 839 ip_insertoptions(m, opt, phlen) 840 register struct mbuf *m; 841 struct mbuf *opt; 842 int *phlen; 843 { 844 register struct ipoption *p = mtod(opt, struct ipoption *); 845 struct mbuf *n; 846 register struct ip *ip = mtod(m, struct ip *); 847 unsigned optlen; 848 849 optlen = opt->m_len - sizeof(p->ipopt_dst); 850 if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET) 851 return (m); /* XXX should fail */ 852 if (p->ipopt_dst.s_addr) 853 ip->ip_dst = p->ipopt_dst; 854 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 855 MGETHDR(n, M_DONTWAIT, MT_HEADER); 856 if (n == 0) 857 return (m); 858 M_MOVE_HDR(n, m); 859 n->m_pkthdr.len += optlen; 860 m->m_len -= sizeof(struct ip); 861 m->m_data += sizeof(struct ip); 862 n->m_next = m; 863 m = n; 864 m->m_len = optlen + sizeof(struct ip); 865 m->m_data += max_linkhdr; 866 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 867 } else { 868 m->m_data -= optlen; 869 m->m_len += optlen; 870 m->m_pkthdr.len += optlen; 871 ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 872 } 873 ip = mtod(m, struct ip *); 874 bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen); 875 *phlen = sizeof(struct ip) + optlen; 876 ip->ip_len += optlen; 877 return (m); 878 } 879 880 /* 881 * Copy options from ip to jp, 882 * omitting those not copied during fragmentation. 883 */ 884 int 885 ip_optcopy(ip, jp) 886 struct ip *ip, *jp; 887 { 888 register u_char *cp, *dp; 889 int opt, optlen, cnt; 890 891 cp = (u_char *)(ip + 1); 892 dp = (u_char *)(jp + 1); 893 cnt = (ip->ip_hl << 2) - sizeof (struct ip); 894 for (; cnt > 0; cnt -= optlen, cp += optlen) { 895 opt = cp[0]; 896 if (opt == IPOPT_EOL) 897 break; 898 if (opt == IPOPT_NOP) { 899 /* Preserve for IP mcast tunnel's LSRR alignment. */ 900 *dp++ = IPOPT_NOP; 901 optlen = 1; 902 continue; 903 } 904 #ifdef DIAGNOSTIC 905 if (cnt < IPOPT_OLEN + sizeof(*cp)) 906 panic("malformed IPv4 option passed to ip_optcopy"); 907 #endif 908 optlen = cp[IPOPT_OLEN]; 909 #ifdef DIAGNOSTIC 910 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 911 panic("malformed IPv4 option passed to ip_optcopy"); 912 #endif 913 /* bogus lengths should have been caught by ip_dooptions */ 914 if (optlen > cnt) 915 optlen = cnt; 916 if (IPOPT_COPIED(opt)) { 917 bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen); 918 dp += optlen; 919 } 920 } 921 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 922 *dp++ = IPOPT_EOL; 923 return (optlen); 924 } 925 926 /* 927 * IP socket option processing. 928 */ 929 int 930 ip_ctloutput(op, so, level, optname, mp) 931 int op; 932 struct socket *so; 933 int level, optname; 934 struct mbuf **mp; 935 { 936 register struct inpcb *inp = sotoinpcb(so); 937 register struct mbuf *m = *mp; 938 register int optval = 0; 939 #ifdef IPSEC 940 struct proc *p = curproc; /* XXX */ 941 struct ipsec_ref *ipr; 942 u_int16_t opt16val; 943 #endif 944 int error = 0; 945 946 if (level != IPPROTO_IP) { 947 error = EINVAL; 948 if (op == PRCO_SETOPT && *mp) 949 (void) m_free(*mp); 950 } else switch (op) { 951 case PRCO_SETOPT: 952 switch (optname) { 953 case IP_OPTIONS: 954 #ifdef notyet 955 case IP_RETOPTS: 956 return (ip_pcbopts(optname, &inp->inp_options, m)); 957 #else 958 return (ip_pcbopts(&inp->inp_options, m)); 959 #endif 960 961 case IP_TOS: 962 case IP_TTL: 963 case IP_RECVOPTS: 964 case IP_RECVRETOPTS: 965 case IP_RECVDSTADDR: 966 if (m == NULL || m->m_len != sizeof(int)) 967 error = EINVAL; 968 else { 969 optval = *mtod(m, int *); 970 switch (optname) { 971 972 case IP_TOS: 973 inp->inp_ip.ip_tos = optval; 974 break; 975 976 case IP_TTL: 977 inp->inp_ip.ip_ttl = optval; 978 break; 979 #define OPTSET(bit) \ 980 if (optval) \ 981 inp->inp_flags |= bit; \ 982 else \ 983 inp->inp_flags &= ~bit; 984 985 case IP_RECVOPTS: 986 OPTSET(INP_RECVOPTS); 987 break; 988 989 case IP_RECVRETOPTS: 990 OPTSET(INP_RECVRETOPTS); 991 break; 992 993 case IP_RECVDSTADDR: 994 OPTSET(INP_RECVDSTADDR); 995 break; 996 } 997 } 998 break; 999 #undef OPTSET 1000 1001 case IP_MULTICAST_IF: 1002 case IP_MULTICAST_TTL: 1003 case IP_MULTICAST_LOOP: 1004 case IP_ADD_MEMBERSHIP: 1005 case IP_DROP_MEMBERSHIP: 1006 error = ip_setmoptions(optname, &inp->inp_moptions, m); 1007 break; 1008 1009 case IP_PORTRANGE: 1010 if (m == 0 || m->m_len != sizeof(int)) 1011 error = EINVAL; 1012 else { 1013 optval = *mtod(m, int *); 1014 1015 switch (optval) { 1016 1017 case IP_PORTRANGE_DEFAULT: 1018 inp->inp_flags &= ~(INP_LOWPORT); 1019 inp->inp_flags &= ~(INP_HIGHPORT); 1020 break; 1021 1022 case IP_PORTRANGE_HIGH: 1023 inp->inp_flags &= ~(INP_LOWPORT); 1024 inp->inp_flags |= INP_HIGHPORT; 1025 break; 1026 1027 case IP_PORTRANGE_LOW: 1028 inp->inp_flags &= ~(INP_HIGHPORT); 1029 inp->inp_flags |= INP_LOWPORT; 1030 break; 1031 1032 default: 1033 1034 error = EINVAL; 1035 break; 1036 } 1037 } 1038 break; 1039 case IP_AUTH_LEVEL: 1040 case IP_ESP_TRANS_LEVEL: 1041 case IP_ESP_NETWORK_LEVEL: 1042 #ifndef IPSEC 1043 error = EOPNOTSUPP; 1044 #else 1045 if (m == 0 || m->m_len != sizeof(int)) { 1046 error = EINVAL; 1047 break; 1048 } 1049 optval = *mtod(m, int *); 1050 1051 if (optval < IPSEC_LEVEL_BYPASS || 1052 optval > IPSEC_LEVEL_UNIQUE) { 1053 error = EINVAL; 1054 break; 1055 } 1056 1057 /* Unlink cached output TDB to force a re-search */ 1058 if (inp->inp_tdb_out) { 1059 int s = spltdb(); 1060 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1061 inp, inp_tdb_out_next); 1062 splx(s); 1063 } 1064 1065 if (inp->inp_tdb_in) { 1066 int s = spltdb(); 1067 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1068 inp, inp_tdb_in_next); 1069 splx(s); 1070 } 1071 1072 switch (optname) { 1073 case IP_AUTH_LEVEL: 1074 if (optval < ipsec_auth_default_level && 1075 suser(p->p_ucred, &p->p_acflag)) { 1076 error = EACCES; 1077 break; 1078 } 1079 inp->inp_seclevel[SL_AUTH] = optval; 1080 break; 1081 1082 case IP_ESP_TRANS_LEVEL: 1083 if (optval < ipsec_esp_trans_default_level && 1084 suser(p->p_ucred, &p->p_acflag)) { 1085 error = EACCES; 1086 break; 1087 } 1088 inp->inp_seclevel[SL_ESP_TRANS] = optval; 1089 break; 1090 1091 case IP_ESP_NETWORK_LEVEL: 1092 if (optval < ipsec_esp_network_default_level && 1093 suser(p->p_ucred, &p->p_acflag)) { 1094 error = EACCES; 1095 break; 1096 } 1097 inp->inp_seclevel[SL_ESP_NETWORK] = optval; 1098 break; 1099 case IP_IPCOMP_LEVEL: 1100 if (optval < ipsec_ipcomp_default_level && 1101 suser(p->p_ucred, &p->p_acflag)) { 1102 error = EACCES; 1103 break; 1104 } 1105 inp->inp_seclevel[SL_IPCOMP] = optval; 1106 break; 1107 } 1108 if (!error) 1109 inp->inp_secrequire = get_sa_require(inp); 1110 #endif 1111 break; 1112 1113 case IP_IPSEC_REMOTE_CRED: 1114 case IP_IPSEC_REMOTE_AUTH: 1115 /* Can't set the remote credential or key */ 1116 error = EOPNOTSUPP; 1117 break; 1118 1119 case IP_IPSEC_LOCAL_ID: 1120 case IP_IPSEC_REMOTE_ID: 1121 case IP_IPSEC_LOCAL_CRED: 1122 case IP_IPSEC_LOCAL_AUTH: 1123 #ifndef IPSEC 1124 error = EOPNOTSUPP; 1125 #else 1126 if (m->m_len < 2) { 1127 error = EINVAL; 1128 break; 1129 } 1130 1131 m_copydata(m, 0, 2, (caddr_t) &opt16val); 1132 1133 /* If the type is 0, then we cleanup and return */ 1134 if (opt16val == 0) { 1135 switch (optname) { 1136 case IP_IPSEC_LOCAL_ID: 1137 if (inp->inp_ipsec_localid != NULL) 1138 ipsp_reffree(inp->inp_ipsec_localid); 1139 inp->inp_ipsec_localid = NULL; 1140 break; 1141 1142 case IP_IPSEC_REMOTE_ID: 1143 if (inp->inp_ipsec_remoteid != NULL) 1144 ipsp_reffree(inp->inp_ipsec_remoteid); 1145 inp->inp_ipsec_remoteid = NULL; 1146 break; 1147 1148 case IP_IPSEC_LOCAL_CRED: 1149 if (inp->inp_ipsec_localcred != NULL) 1150 ipsp_reffree(inp->inp_ipsec_localcred); 1151 inp->inp_ipsec_localcred = NULL; 1152 break; 1153 1154 case IP_IPSEC_LOCAL_AUTH: 1155 if (inp->inp_ipsec_localauth != NULL) 1156 ipsp_reffree(inp->inp_ipsec_localauth); 1157 inp->inp_ipsec_localauth = NULL; 1158 break; 1159 } 1160 1161 error = 0; 1162 break; 1163 } 1164 1165 /* Can't have an empty payload */ 1166 if (m->m_len == 2) { 1167 error = EINVAL; 1168 break; 1169 } 1170 1171 MALLOC(ipr, struct ipsec_ref *, 1172 sizeof(struct ipsec_ref) + m->m_len - 2, 1173 M_CREDENTIALS, M_NOWAIT); 1174 if (ipr == NULL) { 1175 error = ENOBUFS; 1176 break; 1177 } 1178 ipr->ref_count = 1; 1179 ipr->ref_malloctype = M_CREDENTIALS; 1180 ipr->ref_len = m->m_len - 2; 1181 ipr->ref_type = opt16val; 1182 m_copydata(m, 2, m->m_len - 2, (caddr_t)(ipr + 1)); 1183 1184 switch (optname) { 1185 case IP_IPSEC_LOCAL_ID: 1186 /* Check valid types and NUL-termination */ 1187 if (ipr->ref_type < IPSP_IDENTITY_PREFIX 1188 || ipr->ref_type > IPSP_IDENTITY_CONNECTION 1189 || ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1190 FREE(ipr, M_CREDENTIALS); 1191 error = EINVAL; 1192 } else { 1193 if (inp->inp_ipsec_localid != NULL) 1194 ipsp_reffree(inp->inp_ipsec_localid); 1195 inp->inp_ipsec_localid = ipr; 1196 } 1197 break; 1198 case IP_IPSEC_REMOTE_ID: 1199 /* Check valid types and NUL-termination */ 1200 if (ipr->ref_type < IPSP_IDENTITY_PREFIX 1201 || ipr->ref_type > IPSP_IDENTITY_CONNECTION 1202 || ((char *)(ipr + 1))[ipr->ref_len - 1]) { 1203 FREE(ipr, M_CREDENTIALS); 1204 error = EINVAL; 1205 } else { 1206 if (inp->inp_ipsec_remoteid != NULL) 1207 ipsp_reffree(inp->inp_ipsec_remoteid); 1208 inp->inp_ipsec_remoteid = ipr; 1209 } 1210 break; 1211 case IP_IPSEC_LOCAL_CRED: 1212 if (ipr->ref_type < IPSP_CRED_KEYNOTE || 1213 ipr->ref_type > IPSP_CRED_X509) { 1214 FREE(ipr, M_CREDENTIALS); 1215 error = EINVAL; 1216 } else { 1217 if (inp->inp_ipsec_localcred != NULL) 1218 ipsp_reffree(inp->inp_ipsec_localcred); 1219 inp->inp_ipsec_localcred = ipr; 1220 } 1221 break; 1222 case IP_IPSEC_LOCAL_AUTH: 1223 if (ipr->ref_type < IPSP_AUTH_PASSPHRASE || 1224 ipr->ref_type > IPSP_AUTH_RSA) { 1225 FREE(ipr, M_CREDENTIALS); 1226 error = EINVAL; 1227 } else { 1228 if (inp->inp_ipsec_localauth != NULL) 1229 ipsp_reffree(inp->inp_ipsec_localauth); 1230 inp->inp_ipsec_localauth = ipr; 1231 } 1232 break; 1233 } 1234 1235 /* Unlink cached output TDB to force a re-search */ 1236 if (inp->inp_tdb_out) { 1237 int s = spltdb(); 1238 TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out, 1239 inp, inp_tdb_out_next); 1240 splx(s); 1241 } 1242 1243 if (inp->inp_tdb_in) { 1244 int s = spltdb(); 1245 TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in, 1246 inp, inp_tdb_in_next); 1247 splx(s); 1248 } 1249 #endif 1250 break; 1251 default: 1252 error = ENOPROTOOPT; 1253 break; 1254 } 1255 if (m) 1256 (void)m_free(m); 1257 break; 1258 1259 case PRCO_GETOPT: 1260 switch (optname) { 1261 case IP_OPTIONS: 1262 case IP_RETOPTS: 1263 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1264 if (inp->inp_options) { 1265 m->m_len = inp->inp_options->m_len; 1266 bcopy(mtod(inp->inp_options, caddr_t), 1267 mtod(m, caddr_t), (unsigned)m->m_len); 1268 } else 1269 m->m_len = 0; 1270 break; 1271 1272 case IP_TOS: 1273 case IP_TTL: 1274 case IP_RECVOPTS: 1275 case IP_RECVRETOPTS: 1276 case IP_RECVDSTADDR: 1277 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1278 m->m_len = sizeof(int); 1279 switch (optname) { 1280 1281 case IP_TOS: 1282 optval = inp->inp_ip.ip_tos; 1283 break; 1284 1285 case IP_TTL: 1286 optval = inp->inp_ip.ip_ttl; 1287 break; 1288 1289 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1290 1291 case IP_RECVOPTS: 1292 optval = OPTBIT(INP_RECVOPTS); 1293 break; 1294 1295 case IP_RECVRETOPTS: 1296 optval = OPTBIT(INP_RECVRETOPTS); 1297 break; 1298 1299 case IP_RECVDSTADDR: 1300 optval = OPTBIT(INP_RECVDSTADDR); 1301 break; 1302 } 1303 *mtod(m, int *) = optval; 1304 break; 1305 1306 case IP_MULTICAST_IF: 1307 case IP_MULTICAST_TTL: 1308 case IP_MULTICAST_LOOP: 1309 case IP_ADD_MEMBERSHIP: 1310 case IP_DROP_MEMBERSHIP: 1311 error = ip_getmoptions(optname, inp->inp_moptions, mp); 1312 break; 1313 1314 case IP_PORTRANGE: 1315 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1316 m->m_len = sizeof(int); 1317 1318 if (inp->inp_flags & INP_HIGHPORT) 1319 optval = IP_PORTRANGE_HIGH; 1320 else if (inp->inp_flags & INP_LOWPORT) 1321 optval = IP_PORTRANGE_LOW; 1322 else 1323 optval = 0; 1324 1325 *mtod(m, int *) = optval; 1326 break; 1327 1328 case IP_AUTH_LEVEL: 1329 case IP_ESP_TRANS_LEVEL: 1330 case IP_ESP_NETWORK_LEVEL: 1331 case IP_IPCOMP_LEVEL: 1332 #ifndef IPSEC 1333 m->m_len = sizeof(int); 1334 *mtod(m, int *) = IPSEC_LEVEL_NONE; 1335 #else 1336 m->m_len = sizeof(int); 1337 switch (optname) { 1338 case IP_AUTH_LEVEL: 1339 optval = inp->inp_seclevel[SL_AUTH]; 1340 break; 1341 1342 case IP_ESP_TRANS_LEVEL: 1343 optval = inp->inp_seclevel[SL_ESP_TRANS]; 1344 break; 1345 1346 case IP_ESP_NETWORK_LEVEL: 1347 optval = inp->inp_seclevel[SL_ESP_NETWORK]; 1348 break; 1349 case IP_IPCOMP_LEVEL: 1350 optval = inp->inp_seclevel[SL_IPCOMP]; 1351 break; 1352 } 1353 *mtod(m, int *) = optval; 1354 #endif 1355 break; 1356 case IP_IPSEC_LOCAL_ID: 1357 case IP_IPSEC_REMOTE_ID: 1358 case IP_IPSEC_LOCAL_CRED: 1359 case IP_IPSEC_REMOTE_CRED: 1360 case IP_IPSEC_LOCAL_AUTH: 1361 case IP_IPSEC_REMOTE_AUTH: 1362 #ifndef IPSEC 1363 error = EOPNOTSUPP; 1364 #else 1365 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1366 m->m_len = sizeof(u_int16_t); 1367 switch (optname) { 1368 case IP_IPSEC_LOCAL_ID: 1369 ipr = inp->inp_ipsec_localid; 1370 opt16val = IPSP_IDENTITY_NONE; 1371 break; 1372 case IP_IPSEC_REMOTE_ID: 1373 ipr = inp->inp_ipsec_remoteid; 1374 opt16val = IPSP_IDENTITY_NONE; 1375 break; 1376 case IP_IPSEC_LOCAL_CRED: 1377 ipr = inp->inp_ipsec_localcred; 1378 opt16val = IPSP_CRED_NONE; 1379 break; 1380 case IP_IPSEC_REMOTE_CRED: 1381 ipr = inp->inp_ipsec_remotecred; 1382 opt16val = IPSP_CRED_NONE; 1383 break; 1384 case IP_IPSEC_LOCAL_AUTH: 1385 ipr = inp->inp_ipsec_localauth; 1386 break; 1387 case IP_IPSEC_REMOTE_AUTH: 1388 ipr = inp->inp_ipsec_remoteauth; 1389 break; 1390 } 1391 if (ipr == NULL) 1392 *mtod(m, u_int16_t *) = opt16val; 1393 else { 1394 m->m_len += ipr->ref_len; 1395 *mtod(m, u_int16_t *) = ipr->ref_type; 1396 m_copyback(m, sizeof(u_int16_t), ipr->ref_len, 1397 (caddr_t)(ipr + 1)); 1398 } 1399 #endif 1400 break; 1401 default: 1402 error = ENOPROTOOPT; 1403 break; 1404 } 1405 break; 1406 } 1407 return (error); 1408 } 1409 1410 /* 1411 * Set up IP options in pcb for insertion in output packets. 1412 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1413 * with destination address if source routed. 1414 */ 1415 int 1416 #ifdef notyet 1417 ip_pcbopts(optname, pcbopt, m) 1418 int optname; 1419 #else 1420 ip_pcbopts(pcbopt, m) 1421 #endif 1422 struct mbuf **pcbopt; 1423 register struct mbuf *m; 1424 { 1425 register int cnt, optlen; 1426 register u_char *cp; 1427 u_char opt; 1428 1429 /* turn off any old options */ 1430 if (*pcbopt) 1431 (void)m_free(*pcbopt); 1432 *pcbopt = 0; 1433 if (m == (struct mbuf *)0 || m->m_len == 0) { 1434 /* 1435 * Only turning off any previous options. 1436 */ 1437 if (m) 1438 (void)m_free(m); 1439 return (0); 1440 } 1441 1442 #ifndef vax 1443 if (m->m_len % sizeof(int32_t)) 1444 goto bad; 1445 #endif 1446 /* 1447 * IP first-hop destination address will be stored before 1448 * actual options; move other options back 1449 * and clear it when none present. 1450 */ 1451 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1452 goto bad; 1453 cnt = m->m_len; 1454 m->m_len += sizeof(struct in_addr); 1455 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1456 ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); 1457 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1458 1459 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1460 opt = cp[IPOPT_OPTVAL]; 1461 if (opt == IPOPT_EOL) 1462 break; 1463 if (opt == IPOPT_NOP) 1464 optlen = 1; 1465 else { 1466 if (cnt < IPOPT_OLEN + sizeof(*cp)) 1467 goto bad; 1468 optlen = cp[IPOPT_OLEN]; 1469 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) 1470 goto bad; 1471 } 1472 switch (opt) { 1473 1474 default: 1475 break; 1476 1477 case IPOPT_LSRR: 1478 case IPOPT_SSRR: 1479 /* 1480 * user process specifies route as: 1481 * ->A->B->C->D 1482 * D must be our final destination (but we can't 1483 * check that since we may not have connected yet). 1484 * A is first hop destination, which doesn't appear in 1485 * actual IP option, but is stored before the options. 1486 */ 1487 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1488 goto bad; 1489 m->m_len -= sizeof(struct in_addr); 1490 cnt -= sizeof(struct in_addr); 1491 optlen -= sizeof(struct in_addr); 1492 cp[IPOPT_OLEN] = optlen; 1493 /* 1494 * Move first hop before start of options. 1495 */ 1496 bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1497 sizeof(struct in_addr)); 1498 /* 1499 * Then copy rest of options back 1500 * to close up the deleted entry. 1501 */ 1502 ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + 1503 sizeof(struct in_addr)), 1504 (caddr_t)&cp[IPOPT_OFFSET+1], 1505 (unsigned)cnt + sizeof(struct in_addr)); 1506 break; 1507 } 1508 } 1509 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1510 goto bad; 1511 *pcbopt = m; 1512 return (0); 1513 1514 bad: 1515 (void)m_free(m); 1516 return (EINVAL); 1517 } 1518 1519 /* 1520 * Set the IP multicast options in response to user setsockopt(). 1521 */ 1522 int 1523 ip_setmoptions(optname, imop, m) 1524 int optname; 1525 struct ip_moptions **imop; 1526 struct mbuf *m; 1527 { 1528 register int error = 0; 1529 u_char loop; 1530 register int i; 1531 struct in_addr addr; 1532 register struct ip_mreq *mreq; 1533 register struct ifnet *ifp; 1534 register struct ip_moptions *imo = *imop; 1535 struct route ro; 1536 register struct sockaddr_in *dst; 1537 1538 if (imo == NULL) { 1539 /* 1540 * No multicast option buffer attached to the pcb; 1541 * allocate one and initialize to default values. 1542 */ 1543 imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS, 1544 M_WAITOK); 1545 1546 *imop = imo; 1547 imo->imo_multicast_ifp = NULL; 1548 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1549 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1550 imo->imo_num_memberships = 0; 1551 } 1552 1553 switch (optname) { 1554 1555 case IP_MULTICAST_IF: 1556 /* 1557 * Select the interface for outgoing multicast packets. 1558 */ 1559 if (m == NULL || m->m_len != sizeof(struct in_addr)) { 1560 error = EINVAL; 1561 break; 1562 } 1563 addr = *(mtod(m, struct in_addr *)); 1564 /* 1565 * INADDR_ANY is used to remove a previous selection. 1566 * When no interface is selected, a default one is 1567 * chosen every time a multicast packet is sent. 1568 */ 1569 if (addr.s_addr == INADDR_ANY) { 1570 imo->imo_multicast_ifp = NULL; 1571 break; 1572 } 1573 /* 1574 * The selected interface is identified by its local 1575 * IP address. Find the interface and confirm that 1576 * it supports multicasting. 1577 */ 1578 INADDR_TO_IFP(addr, ifp); 1579 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1580 error = EADDRNOTAVAIL; 1581 break; 1582 } 1583 imo->imo_multicast_ifp = ifp; 1584 break; 1585 1586 case IP_MULTICAST_TTL: 1587 /* 1588 * Set the IP time-to-live for outgoing multicast packets. 1589 */ 1590 if (m == NULL || m->m_len != 1) { 1591 error = EINVAL; 1592 break; 1593 } 1594 imo->imo_multicast_ttl = *(mtod(m, u_char *)); 1595 break; 1596 1597 case IP_MULTICAST_LOOP: 1598 /* 1599 * Set the loopback flag for outgoing multicast packets. 1600 * Must be zero or one. 1601 */ 1602 if (m == NULL || m->m_len != 1 || 1603 (loop = *(mtod(m, u_char *))) > 1) { 1604 error = EINVAL; 1605 break; 1606 } 1607 imo->imo_multicast_loop = loop; 1608 break; 1609 1610 case IP_ADD_MEMBERSHIP: 1611 /* 1612 * Add a multicast group membership. 1613 * Group must be a valid IP multicast address. 1614 */ 1615 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1616 error = EINVAL; 1617 break; 1618 } 1619 mreq = mtod(m, struct ip_mreq *); 1620 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1621 error = EINVAL; 1622 break; 1623 } 1624 /* 1625 * If no interface address was provided, use the interface of 1626 * the route to the given multicast address. 1627 */ 1628 if (mreq->imr_interface.s_addr == INADDR_ANY) { 1629 ro.ro_rt = NULL; 1630 dst = satosin(&ro.ro_dst); 1631 dst->sin_len = sizeof(*dst); 1632 dst->sin_family = AF_INET; 1633 dst->sin_addr = mreq->imr_multiaddr; 1634 rtalloc(&ro); 1635 if (ro.ro_rt == NULL) { 1636 error = EADDRNOTAVAIL; 1637 break; 1638 } 1639 ifp = ro.ro_rt->rt_ifp; 1640 rtfree(ro.ro_rt); 1641 } else { 1642 INADDR_TO_IFP(mreq->imr_interface, ifp); 1643 } 1644 /* 1645 * See if we found an interface, and confirm that it 1646 * supports multicast. 1647 */ 1648 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { 1649 error = EADDRNOTAVAIL; 1650 break; 1651 } 1652 /* 1653 * See if the membership already exists or if all the 1654 * membership slots are full. 1655 */ 1656 for (i = 0; i < imo->imo_num_memberships; ++i) { 1657 if (imo->imo_membership[i]->inm_ifp == ifp && 1658 imo->imo_membership[i]->inm_addr.s_addr 1659 == mreq->imr_multiaddr.s_addr) 1660 break; 1661 } 1662 if (i < imo->imo_num_memberships) { 1663 error = EADDRINUSE; 1664 break; 1665 } 1666 if (i == IP_MAX_MEMBERSHIPS) { 1667 error = ETOOMANYREFS; 1668 break; 1669 } 1670 /* 1671 * Everything looks good; add a new record to the multicast 1672 * address list for the given interface. 1673 */ 1674 if ((imo->imo_membership[i] = 1675 in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { 1676 error = ENOBUFS; 1677 break; 1678 } 1679 ++imo->imo_num_memberships; 1680 break; 1681 1682 case IP_DROP_MEMBERSHIP: 1683 /* 1684 * Drop a multicast group membership. 1685 * Group must be a valid IP multicast address. 1686 */ 1687 if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { 1688 error = EINVAL; 1689 break; 1690 } 1691 mreq = mtod(m, struct ip_mreq *); 1692 if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) { 1693 error = EINVAL; 1694 break; 1695 } 1696 /* 1697 * If an interface address was specified, get a pointer 1698 * to its ifnet structure. 1699 */ 1700 if (mreq->imr_interface.s_addr == INADDR_ANY) 1701 ifp = NULL; 1702 else { 1703 INADDR_TO_IFP(mreq->imr_interface, ifp); 1704 if (ifp == NULL) { 1705 error = EADDRNOTAVAIL; 1706 break; 1707 } 1708 } 1709 /* 1710 * Find the membership in the membership array. 1711 */ 1712 for (i = 0; i < imo->imo_num_memberships; ++i) { 1713 if ((ifp == NULL || 1714 imo->imo_membership[i]->inm_ifp == ifp) && 1715 imo->imo_membership[i]->inm_addr.s_addr == 1716 mreq->imr_multiaddr.s_addr) 1717 break; 1718 } 1719 if (i == imo->imo_num_memberships) { 1720 error = EADDRNOTAVAIL; 1721 break; 1722 } 1723 /* 1724 * Give up the multicast address record to which the 1725 * membership points. 1726 */ 1727 in_delmulti(imo->imo_membership[i]); 1728 /* 1729 * Remove the gap in the membership array. 1730 */ 1731 for (++i; i < imo->imo_num_memberships; ++i) 1732 imo->imo_membership[i-1] = imo->imo_membership[i]; 1733 --imo->imo_num_memberships; 1734 break; 1735 1736 default: 1737 error = EOPNOTSUPP; 1738 break; 1739 } 1740 1741 /* 1742 * If all options have default values, no need to keep the mbuf. 1743 */ 1744 if (imo->imo_multicast_ifp == NULL && 1745 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 1746 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 1747 imo->imo_num_memberships == 0) { 1748 free(*imop, M_IPMOPTS); 1749 *imop = NULL; 1750 } 1751 1752 return (error); 1753 } 1754 1755 /* 1756 * Return the IP multicast options in response to user getsockopt(). 1757 */ 1758 int 1759 ip_getmoptions(optname, imo, mp) 1760 int optname; 1761 register struct ip_moptions *imo; 1762 register struct mbuf **mp; 1763 { 1764 u_char *ttl; 1765 u_char *loop; 1766 struct in_addr *addr; 1767 struct in_ifaddr *ia; 1768 1769 *mp = m_get(M_WAIT, MT_SOOPTS); 1770 1771 switch (optname) { 1772 1773 case IP_MULTICAST_IF: 1774 addr = mtod(*mp, struct in_addr *); 1775 (*mp)->m_len = sizeof(struct in_addr); 1776 if (imo == NULL || imo->imo_multicast_ifp == NULL) 1777 addr->s_addr = INADDR_ANY; 1778 else { 1779 IFP_TO_IA(imo->imo_multicast_ifp, ia); 1780 addr->s_addr = (ia == NULL) ? INADDR_ANY 1781 : ia->ia_addr.sin_addr.s_addr; 1782 } 1783 return (0); 1784 1785 case IP_MULTICAST_TTL: 1786 ttl = mtod(*mp, u_char *); 1787 (*mp)->m_len = 1; 1788 *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL 1789 : imo->imo_multicast_ttl; 1790 return (0); 1791 1792 case IP_MULTICAST_LOOP: 1793 loop = mtod(*mp, u_char *); 1794 (*mp)->m_len = 1; 1795 *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP 1796 : imo->imo_multicast_loop; 1797 return (0); 1798 1799 default: 1800 return (EOPNOTSUPP); 1801 } 1802 } 1803 1804 /* 1805 * Discard the IP multicast options. 1806 */ 1807 void 1808 ip_freemoptions(imo) 1809 register struct ip_moptions *imo; 1810 { 1811 register int i; 1812 1813 if (imo != NULL) { 1814 for (i = 0; i < imo->imo_num_memberships; ++i) 1815 in_delmulti(imo->imo_membership[i]); 1816 free(imo, M_IPMOPTS); 1817 } 1818 } 1819 1820 /* 1821 * Routine called from ip_output() to loop back a copy of an IP multicast 1822 * packet to the input queue of a specified interface. Note that this 1823 * calls the output routine of the loopback "driver", but with an interface 1824 * pointer that might NOT be &loif -- easier than replicating that code here. 1825 */ 1826 static void 1827 ip_mloopback(ifp, m, dst) 1828 struct ifnet *ifp; 1829 register struct mbuf *m; 1830 register struct sockaddr_in *dst; 1831 { 1832 register struct ip *ip; 1833 struct mbuf *copym; 1834 1835 copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1836 if (copym != NULL) { 1837 /* 1838 * We don't bother to fragment if the IP length is greater 1839 * than the interface's MTU. Can this possibly matter? 1840 */ 1841 ip = mtod(copym, struct ip *); 1842 ip->ip_len = htons((u_int16_t)ip->ip_len); 1843 ip->ip_off = htons((u_int16_t)ip->ip_off); 1844 ip->ip_sum = 0; 1845 ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); 1846 (void) looutput(ifp, copym, sintosa(dst), NULL); 1847 } 1848 } 1849 1850 /* 1851 * Process a delayed payload checksum calculation. 1852 */ 1853 void 1854 in_delayed_cksum(struct mbuf *m) 1855 { 1856 struct ip *ip; 1857 u_int16_t csum, offset; 1858 1859 ip = mtod(m, struct ip *); 1860 offset = ip->ip_hl << 2; 1861 csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset); 1862 if (csum == 0 && ip->ip_p == IPPROTO_UDP) 1863 csum = 0xffff; 1864 1865 switch (ip->ip_p) { 1866 case IPPROTO_TCP: 1867 offset += offsetof(struct tcphdr, th_sum); 1868 break; 1869 1870 case IPPROTO_UDP: 1871 offset += offsetof(struct udphdr, uh_sum); 1872 break; 1873 1874 default: 1875 return; 1876 } 1877 1878 if ((offset + sizeof(u_int16_t)) > m->m_len) 1879 m_copyback(m, offset, sizeof(csum), (caddr_t) &csum); 1880 else 1881 *(u_int16_t *)(mtod(m, caddr_t) + offset) = csum; 1882 } 1883