1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 30 * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.37 2003/04/15 06:44:45 silby Exp $ 31 * $DragonFly: src/sys/netinet/ip_output.c,v 1.56 2008/09/07 08:15:25 sephe Exp $ 32 */ 33 34 #define _IP_VHL 35 36 #include "opt_ipfw.h" 37 #include "opt_ipdn.h" 38 #include "opt_ipdivert.h" 39 #include "opt_ipfilter.h" 40 #include "opt_ipsec.h" 41 #include "opt_mbuf_stress_test.h" 42 #include "opt_mpls.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/proc.h> 53 #include <sys/sysctl.h> 54 #include <sys/thread2.h> 55 #include <sys/in_cksum.h> 56 57 #include <net/if.h> 58 #include <net/netisr.h> 59 #include <net/pfil.h> 60 #include <net/route.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_systm.h> 64 #include <netinet/ip.h> 65 #include <netinet/in_pcb.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #ifdef IPDIVERT 69 #include <netinet/ip_divert.h> 70 #endif 71 72 #include <netproto/mpls/mpls_var.h> 73 74 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); 75 76 #ifdef IPSEC 77 #include <netinet6/ipsec.h> 78 #include <netproto/key/key.h> 79 #ifdef IPSEC_DEBUG 80 #include <netproto/key/key_debug.h> 81 #else 82 #define KEYDEBUG(lev,arg) 83 #endif 84 #endif /*IPSEC*/ 85 86 #ifdef FAST_IPSEC 87 #include <netproto/ipsec/ipsec.h> 88 #include <netproto/ipsec/xform.h> 89 #include <netproto/ipsec/key.h> 90 #endif /*FAST_IPSEC*/ 91 92 #include <net/ipfw/ip_fw.h> 93 #include <net/dummynet/ip_dummynet.h> 94 95 #define print_ip(x, a, y) kprintf("%s %d.%d.%d.%d%s",\ 96 x, (ntohl(a.s_addr)>>24)&0xFF,\ 97 (ntohl(a.s_addr)>>16)&0xFF,\ 98 (ntohl(a.s_addr)>>8)&0xFF,\ 99 (ntohl(a.s_addr))&0xFF, y); 100 101 u_short ip_id; 102 103 #ifdef MBUF_STRESS_TEST 104 int mbuf_frag_size = 0; 105 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, 106 &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); 107 #endif 108 109 static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); 110 static struct ifnet *ip_multicast_if(struct in_addr *, int *); 111 static void ip_mloopback 112 (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); 113 static int ip_getmoptions 114 (struct sockopt *, struct ip_moptions *); 115 static int ip_pcbopts(int, struct mbuf **, struct mbuf *); 116 static int ip_setmoptions 117 (struct sockopt *, struct ip_moptions **); 118 119 int ip_optcopy(struct ip *, struct ip *); 120 121 122 extern struct protosw inetsw[]; 123 124 /* 125 * IP output. The packet in mbuf chain m contains a skeletal IP 126 * header (with len, off, ttl, proto, tos, src, dst). 127 * The mbuf chain containing the packet will be freed. 128 * The mbuf opt, if present, will not be freed. 129 */ 130 int 131 ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, 132 int flags, struct ip_moptions *imo, struct inpcb *inp) 133 { 134 struct ip *ip; 135 struct ifnet *ifp = NULL; /* keep compiler happy */ 136 struct mbuf *m; 137 int hlen = sizeof(struct ip); 138 int len, off, error = 0; 139 struct sockaddr_in *dst = NULL; /* keep compiler happy */ 140 struct in_ifaddr *ia = NULL; 141 int isbroadcast, sw_csum; 142 struct in_addr pkt_dst; 143 struct route iproute; 144 struct m_tag *mtag; 145 #ifdef IPSEC 146 struct secpolicy *sp = NULL; 147 struct socket *so = inp ? inp->inp_socket : NULL; 148 #endif 149 #ifdef FAST_IPSEC 150 struct secpolicy *sp = NULL; 151 struct tdb_ident *tdbi; 152 #endif /* FAST_IPSEC */ 153 struct sockaddr_in *next_hop = NULL; 154 int src_was_INADDR_ANY = 0; /* as the name says... */ 155 156 m = m0; 157 M_ASSERTPKTHDR(m); 158 159 if (ro == NULL) { 160 ro = &iproute; 161 bzero(ro, sizeof *ro); 162 } 163 164 if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED) { 165 /* Next hop */ 166 mtag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 167 KKASSERT(mtag != NULL); 168 next_hop = m_tag_data(mtag); 169 } 170 171 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 172 struct dn_pkt *dn_pkt; 173 174 /* Extract info from dummynet tag */ 175 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 176 KKASSERT(mtag != NULL); 177 dn_pkt = m_tag_data(mtag); 178 179 /* 180 * The packet was already tagged, so part of the 181 * processing was already done, and we need to go down. 182 * Get the calculated parameters from the tag. 183 */ 184 ifp = dn_pkt->ifp; 185 186 KKASSERT(ro == &iproute); 187 *ro = dn_pkt->ro; /* structure copy */ 188 189 dst = dn_pkt->dn_dst; 190 if (dst == (struct sockaddr_in *)&(dn_pkt->ro.ro_dst)) { 191 /* If 'dst' points into dummynet tag, adjust it */ 192 dst = (struct sockaddr_in *)&(ro->ro_dst); 193 } 194 195 ip = mtod(m, struct ip *); 196 hlen = IP_VHL_HL(ip->ip_vhl) << 2 ; 197 if (ro->ro_rt) 198 ia = ifatoia(ro->ro_rt->rt_ifa); 199 goto sendit; 200 } 201 202 if (opt) { 203 len = 0; 204 m = ip_insertoptions(m, opt, &len); 205 if (len != 0) 206 hlen = len; 207 } 208 ip = mtod(m, struct ip *); 209 pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst; 210 211 /* 212 * Fill in IP header. 213 */ 214 if (!(flags & (IP_FORWARDING|IP_RAWOUTPUT))) { 215 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); 216 ip->ip_off &= IP_DF; 217 ip->ip_id = ip_newid(); 218 ipstat.ips_localout++; 219 } else { 220 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 221 } 222 223 dst = (struct sockaddr_in *)&ro->ro_dst; 224 /* 225 * If there is a cached route, 226 * check that it is to the same destination 227 * and is still up. If not, free it and try again. 228 * The address family should also be checked in case of sharing the 229 * cache with IPv6. 230 */ 231 if (ro->ro_rt && 232 (!(ro->ro_rt->rt_flags & RTF_UP) || 233 dst->sin_family != AF_INET || 234 dst->sin_addr.s_addr != pkt_dst.s_addr)) { 235 rtfree(ro->ro_rt); 236 ro->ro_rt = (struct rtentry *)NULL; 237 } 238 if (ro->ro_rt == NULL) { 239 bzero(dst, sizeof *dst); 240 dst->sin_family = AF_INET; 241 dst->sin_len = sizeof *dst; 242 dst->sin_addr = pkt_dst; 243 } 244 /* 245 * If routing to interface only, 246 * short circuit routing lookup. 247 */ 248 if (flags & IP_ROUTETOIF) { 249 if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL && 250 (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) { 251 ipstat.ips_noroute++; 252 error = ENETUNREACH; 253 goto bad; 254 } 255 ifp = ia->ia_ifp; 256 ip->ip_ttl = 1; 257 isbroadcast = in_broadcast(dst->sin_addr, ifp); 258 } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) && 259 imo != NULL && imo->imo_multicast_ifp != NULL) { 260 /* 261 * Bypass the normal routing lookup for multicast 262 * packets if the interface is specified. 263 */ 264 ifp = imo->imo_multicast_ifp; 265 ia = IFP_TO_IA(ifp); 266 isbroadcast = 0; /* fool gcc */ 267 } else { 268 /* 269 * If this is the case, we probably don't want to allocate 270 * a protocol-cloned route since we didn't get one from the 271 * ULP. This lets TCP do its thing, while not burdening 272 * forwarding or ICMP with the overhead of cloning a route. 273 * Of course, we still want to do any cloning requested by 274 * the link layer, as this is probably required in all cases 275 * for correct operation (as it is for ARP). 276 */ 277 if (ro->ro_rt == NULL) 278 rtalloc_ign(ro, RTF_PRCLONING); 279 if (ro->ro_rt == NULL) { 280 ipstat.ips_noroute++; 281 error = EHOSTUNREACH; 282 goto bad; 283 } 284 ia = ifatoia(ro->ro_rt->rt_ifa); 285 ifp = ro->ro_rt->rt_ifp; 286 ro->ro_rt->rt_use++; 287 if (ro->ro_rt->rt_flags & RTF_GATEWAY) 288 dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; 289 if (ro->ro_rt->rt_flags & RTF_HOST) 290 isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); 291 else 292 isbroadcast = in_broadcast(dst->sin_addr, ifp); 293 } 294 if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { 295 struct in_multi *inm; 296 297 m->m_flags |= M_MCAST; 298 /* 299 * IP destination address is multicast. Make sure "dst" 300 * still points to the address in "ro". (It may have been 301 * changed to point to a gateway address, above.) 302 */ 303 dst = (struct sockaddr_in *)&ro->ro_dst; 304 /* 305 * See if the caller provided any multicast options 306 */ 307 if (imo != NULL) { 308 ip->ip_ttl = imo->imo_multicast_ttl; 309 if (imo->imo_multicast_vif != -1) 310 ip->ip_src.s_addr = 311 ip_mcast_src ? 312 ip_mcast_src(imo->imo_multicast_vif) : 313 INADDR_ANY; 314 } else 315 ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; 316 /* 317 * Confirm that the outgoing interface supports multicast. 318 */ 319 if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { 320 if (!(ifp->if_flags & IFF_MULTICAST)) { 321 ipstat.ips_noroute++; 322 error = ENETUNREACH; 323 goto bad; 324 } 325 } 326 /* 327 * If source address not specified yet, use address 328 * of outgoing interface. 329 */ 330 if (ip->ip_src.s_addr == INADDR_ANY) { 331 /* Interface may have no addresses. */ 332 if (ia != NULL) 333 ip->ip_src = IA_SIN(ia)->sin_addr; 334 } 335 336 IN_LOOKUP_MULTI(pkt_dst, ifp, inm); 337 if (inm != NULL && 338 (imo == NULL || imo->imo_multicast_loop)) { 339 /* 340 * If we belong to the destination multicast group 341 * on the outgoing interface, and the caller did not 342 * forbid loopback, loop back a copy. 343 */ 344 ip_mloopback(ifp, m, dst, hlen); 345 } 346 else { 347 /* 348 * If we are acting as a multicast router, perform 349 * multicast forwarding as if the packet had just 350 * arrived on the interface to which we are about 351 * to send. The multicast forwarding function 352 * recursively calls this function, using the 353 * IP_FORWARDING flag to prevent infinite recursion. 354 * 355 * Multicasts that are looped back by ip_mloopback(), 356 * above, will be forwarded by the ip_input() routine, 357 * if necessary. 358 */ 359 if (ip_mrouter && !(flags & IP_FORWARDING)) { 360 /* 361 * If rsvp daemon is not running, do not 362 * set ip_moptions. This ensures that the packet 363 * is multicast and not just sent down one link 364 * as prescribed by rsvpd. 365 */ 366 if (!rsvp_on) 367 imo = NULL; 368 if (ip_mforward && 369 ip_mforward(ip, ifp, m, imo) != 0) { 370 m_freem(m); 371 goto done; 372 } 373 } 374 } 375 376 /* 377 * Multicasts with a time-to-live of zero may be looped- 378 * back, above, but must not be transmitted on a network. 379 * Also, multicasts addressed to the loopback interface 380 * are not sent -- the above call to ip_mloopback() will 381 * loop back a copy if this host actually belongs to the 382 * destination group on the loopback interface. 383 */ 384 if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { 385 m_freem(m); 386 goto done; 387 } 388 389 goto sendit; 390 } else { 391 m->m_flags &= ~M_MCAST; 392 } 393 #ifndef notdef 394 /* 395 * If the source address is not specified yet, use the address 396 * of the outoing interface. In case, keep note we did that, so 397 * if the the firewall changes the next-hop causing the output 398 * interface to change, we can fix that. 399 */ 400 if (ip->ip_src.s_addr == INADDR_ANY) { 401 /* Interface may have no addresses. */ 402 if (ia != NULL) { 403 ip->ip_src = IA_SIN(ia)->sin_addr; 404 src_was_INADDR_ANY = 1; 405 } 406 } 407 #endif /* notdef */ 408 #ifdef ALTQ 409 /* 410 * Disable packet drop hack. 411 * Packetdrop should be done by queueing. 412 */ 413 #else /* !ALTQ */ 414 /* 415 * Verify that we have any chance at all of being able to queue 416 * the packet or packet fragments 417 */ 418 if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= 419 ifp->if_snd.ifq_maxlen) { 420 error = ENOBUFS; 421 ipstat.ips_odropped++; 422 goto bad; 423 } 424 #endif /* !ALTQ */ 425 426 /* 427 * Look for broadcast address and 428 * verify user is allowed to send 429 * such a packet. 430 */ 431 if (isbroadcast) { 432 if (!(ifp->if_flags & IFF_BROADCAST)) { 433 error = EADDRNOTAVAIL; 434 goto bad; 435 } 436 if (!(flags & IP_ALLOWBROADCAST)) { 437 error = EACCES; 438 goto bad; 439 } 440 /* don't allow broadcast messages to be fragmented */ 441 if (ip->ip_len > ifp->if_mtu) { 442 error = EMSGSIZE; 443 goto bad; 444 } 445 m->m_flags |= M_BCAST; 446 } else { 447 m->m_flags &= ~M_BCAST; 448 } 449 450 sendit: 451 #ifdef IPSEC 452 /* get SP for this packet */ 453 if (so == NULL) 454 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); 455 else 456 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); 457 458 if (sp == NULL) { 459 ipsecstat.out_inval++; 460 goto bad; 461 } 462 463 error = 0; 464 465 /* check policy */ 466 switch (sp->policy) { 467 case IPSEC_POLICY_DISCARD: 468 /* 469 * This packet is just discarded. 470 */ 471 ipsecstat.out_polvio++; 472 goto bad; 473 474 case IPSEC_POLICY_BYPASS: 475 case IPSEC_POLICY_NONE: 476 /* no need to do IPsec. */ 477 goto skip_ipsec; 478 479 case IPSEC_POLICY_IPSEC: 480 if (sp->req == NULL) { 481 /* acquire a policy */ 482 error = key_spdacquire(sp); 483 goto bad; 484 } 485 break; 486 487 case IPSEC_POLICY_ENTRUST: 488 default: 489 kprintf("ip_output: Invalid policy found. %d\n", sp->policy); 490 } 491 { 492 struct ipsec_output_state state; 493 bzero(&state, sizeof state); 494 state.m = m; 495 if (flags & IP_ROUTETOIF) { 496 state.ro = &iproute; 497 bzero(&iproute, sizeof iproute); 498 } else 499 state.ro = ro; 500 state.dst = (struct sockaddr *)dst; 501 502 ip->ip_sum = 0; 503 504 /* 505 * XXX 506 * delayed checksums are not currently compatible with IPsec 507 */ 508 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 509 in_delayed_cksum(m); 510 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 511 } 512 513 ip->ip_len = htons(ip->ip_len); 514 ip->ip_off = htons(ip->ip_off); 515 516 error = ipsec4_output(&state, sp, flags); 517 518 m = state.m; 519 if (flags & IP_ROUTETOIF) { 520 /* 521 * if we have tunnel mode SA, we may need to ignore 522 * IP_ROUTETOIF. 523 */ 524 if (state.ro != &iproute || state.ro->ro_rt != NULL) { 525 flags &= ~IP_ROUTETOIF; 526 ro = state.ro; 527 } 528 } else 529 ro = state.ro; 530 dst = (struct sockaddr_in *)state.dst; 531 if (error) { 532 /* mbuf is already reclaimed in ipsec4_output. */ 533 m0 = NULL; 534 switch (error) { 535 case EHOSTUNREACH: 536 case ENETUNREACH: 537 case EMSGSIZE: 538 case ENOBUFS: 539 case ENOMEM: 540 break; 541 default: 542 kprintf("ip4_output (ipsec): error code %d\n", error); 543 /*fall through*/ 544 case ENOENT: 545 /* don't show these error codes to the user */ 546 error = 0; 547 break; 548 } 549 goto bad; 550 } 551 } 552 553 /* be sure to update variables that are affected by ipsec4_output() */ 554 ip = mtod(m, struct ip *); 555 #ifdef _IP_VHL 556 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 557 #else 558 hlen = ip->ip_hl << 2; 559 #endif 560 if (ro->ro_rt == NULL) { 561 if (!(flags & IP_ROUTETOIF)) { 562 kprintf("ip_output: " 563 "can't update route after IPsec processing\n"); 564 error = EHOSTUNREACH; /*XXX*/ 565 goto bad; 566 } 567 } else { 568 ia = ifatoia(ro->ro_rt->rt_ifa); 569 ifp = ro->ro_rt->rt_ifp; 570 } 571 572 /* make it flipped, again. */ 573 ip->ip_len = ntohs(ip->ip_len); 574 ip->ip_off = ntohs(ip->ip_off); 575 skip_ipsec: 576 #endif /*IPSEC*/ 577 #ifdef FAST_IPSEC 578 /* 579 * Check the security policy (SP) for the packet and, if 580 * required, do IPsec-related processing. There are two 581 * cases here; the first time a packet is sent through 582 * it will be untagged and handled by ipsec4_checkpolicy. 583 * If the packet is resubmitted to ip_output (e.g. after 584 * AH, ESP, etc. processing), there will be a tag to bypass 585 * the lookup and related policy checking. 586 */ 587 mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); 588 crit_enter(); 589 if (mtag != NULL) { 590 tdbi = (struct tdb_ident *)m_tag_data(mtag); 591 sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); 592 if (sp == NULL) 593 error = -EINVAL; /* force silent drop */ 594 m_tag_delete(m, mtag); 595 } else { 596 sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, 597 &error, inp); 598 } 599 /* 600 * There are four return cases: 601 * sp != NULL apply IPsec policy 602 * sp == NULL, error == 0 no IPsec handling needed 603 * sp == NULL, error == -EINVAL discard packet w/o error 604 * sp == NULL, error != 0 discard packet, report error 605 */ 606 if (sp != NULL) { 607 /* Loop detection, check if ipsec processing already done */ 608 KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); 609 for (mtag = m_tag_first(m); mtag != NULL; 610 mtag = m_tag_next(m, mtag)) { 611 if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) 612 continue; 613 if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && 614 mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) 615 continue; 616 /* 617 * Check if policy has an SA associated with it. 618 * This can happen when an SP has yet to acquire 619 * an SA; e.g. on first reference. If it occurs, 620 * then we let ipsec4_process_packet do its thing. 621 */ 622 if (sp->req->sav == NULL) 623 break; 624 tdbi = (struct tdb_ident *)m_tag_data(mtag); 625 if (tdbi->spi == sp->req->sav->spi && 626 tdbi->proto == sp->req->sav->sah->saidx.proto && 627 bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, 628 sizeof(union sockaddr_union)) == 0) { 629 /* 630 * No IPsec processing is needed, free 631 * reference to SP. 632 * 633 * NB: null pointer to avoid free at 634 * done: below. 635 */ 636 KEY_FREESP(&sp), sp = NULL; 637 crit_exit(); 638 goto spd_done; 639 } 640 } 641 642 /* 643 * Do delayed checksums now because we send before 644 * this is done in the normal processing path. 645 */ 646 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 647 in_delayed_cksum(m); 648 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 649 } 650 651 ip->ip_len = htons(ip->ip_len); 652 ip->ip_off = htons(ip->ip_off); 653 654 /* NB: callee frees mbuf */ 655 error = ipsec4_process_packet(m, sp->req, flags, 0); 656 /* 657 * Preserve KAME behaviour: ENOENT can be returned 658 * when an SA acquire is in progress. Don't propagate 659 * this to user-level; it confuses applications. 660 * 661 * XXX this will go away when the SADB is redone. 662 */ 663 if (error == ENOENT) 664 error = 0; 665 crit_exit(); 666 goto done; 667 } else { 668 crit_exit(); 669 670 if (error != 0) { 671 /* 672 * Hack: -EINVAL is used to signal that a packet 673 * should be silently discarded. This is typically 674 * because we asked key management for an SA and 675 * it was delayed (e.g. kicked up to IKE). 676 */ 677 if (error == -EINVAL) 678 error = 0; 679 goto bad; 680 } else { 681 /* No IPsec processing for this packet. */ 682 } 683 #ifdef notyet 684 /* 685 * If deferred crypto processing is needed, check that 686 * the interface supports it. 687 */ 688 mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL); 689 if (mtag != NULL && !(ifp->if_capenable & IFCAP_IPSEC)) { 690 /* notify IPsec to do its own crypto */ 691 ipsp_skipcrypto_unmark((struct tdb_ident *)m_tag_data(mtag)); 692 error = EHOSTUNREACH; 693 goto bad; 694 } 695 #endif 696 } 697 spd_done: 698 #endif /* FAST_IPSEC */ 699 /* 700 * IpHack's section. 701 * - Xlate: translate packet's addr/port (NAT). 702 * - Firewall: deny/allow/etc. 703 * - Wrap: fake packet's addr/port <unimpl.> 704 * - Encapsulate: put it in another IP and send out. <unimp.> 705 */ 706 707 /* 708 * Run through list of hooks for output packets. 709 */ 710 if (pfil_has_hooks(&inet_pfil_hook)) { 711 error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT); 712 if (error != 0 || m == NULL) 713 goto done; 714 ip = mtod(m, struct ip *); 715 } 716 717 /* 718 * Check with the firewall... 719 * but not if we are already being fwd'd from a firewall. 720 */ 721 if (fw_enable && IPFW_LOADED && !next_hop) { 722 struct sockaddr_in *old = dst; 723 struct ip_fw_args args; 724 725 if (m->m_pkthdr.fw_flags & DUMMYNET_MBUF_TAGGED) { 726 /* Extract info from dummynet tag */ 727 mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 728 KKASSERT(mtag != NULL); 729 args.rule = 730 ((struct dn_pkt *)m_tag_data(mtag))->dn_priv; 731 KKASSERT(args.rule != NULL); 732 733 m_tag_delete(m, mtag); 734 m->m_pkthdr.fw_flags &= ~DUMMYNET_MBUF_TAGGED; 735 } else { 736 args.rule = NULL; 737 } 738 739 args.eh = NULL; 740 args.m = m; 741 args.oif = ifp; 742 off = ip_fw_chk_ptr(&args); 743 m = args.m; 744 745 if (m == NULL) { 746 error = EACCES; 747 goto done; 748 } 749 ip = mtod(m, struct ip *); 750 751 if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED) { 752 mtag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); 753 KKASSERT(mtag != NULL); 754 next_hop = m_tag_data(mtag); 755 dst = next_hop; 756 } 757 758 /* 759 * On return we must do the following: 760 * (off & IP_FW_PORT_DENY_FLAG) -> drop the pkt (new interface) 761 * 1<=off<= 0xffff -> DIVERT 762 * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe 763 * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet 764 * dst != old -> IPFIREWALL_FORWARD 765 * off==0, dst==old -> accept 766 * If some of the above modules are not compiled in, then 767 * we should't have to check the corresponding condition 768 * (because the ipfw control socket should not accept 769 * unsupported rules), but better play safe and drop 770 * packets in case of doubt. 771 */ 772 if (off & IP_FW_PORT_DENY_FLAG) { 773 m_freem(m); 774 error = EACCES; 775 goto done; 776 } 777 if (off == 0 && dst == old) /* common case */ 778 goto pass; 779 if (off & IP_FW_PORT_DYNT_FLAG) { 780 /* 781 * pass the pkt to dummynet. Need to include 782 * pipe number, m, ifp, ro, dst because these are 783 * not recomputed in the next pass. 784 * All other parameters have been already used and 785 * so they are not needed anymore. 786 * XXX note: if the ifp or ro entry are deleted 787 * while a pkt is in dummynet, we are in trouble! 788 */ 789 args.ro = ro; 790 args.dst = dst; 791 args.flags = flags; 792 793 error = 0; 794 ip_fw_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, &args); 795 goto done; 796 } 797 #ifdef IPDIVERT 798 if (off != 0 && !(off & IP_FW_PORT_DYNT_FLAG)) { 799 struct mbuf *clone = NULL; 800 801 /* Clone packet if we're doing a 'tee' */ 802 if ((off & IP_FW_PORT_TEE_FLAG)) 803 clone = m_dup(m, MB_DONTWAIT); 804 805 /* 806 * XXX 807 * delayed checksums are not currently compatible 808 * with divert sockets. 809 */ 810 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 811 in_delayed_cksum(m); 812 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 813 } 814 815 /* Restore packet header fields to original values */ 816 ip->ip_len = htons(ip->ip_len); 817 ip->ip_off = htons(ip->ip_off); 818 819 /* Deliver packet to divert input routine */ 820 divert_packet(m, 0); 821 822 /* If 'tee', continue with original packet */ 823 if (clone != NULL) { 824 m = clone; 825 ip = mtod(m, struct ip *); 826 goto pass; 827 } 828 goto done; 829 } 830 #endif 831 832 /* IPFIREWALL_FORWARD */ 833 /* 834 * Check dst to make sure it is directly reachable on the 835 * interface we previously thought it was. 836 * If it isn't (which may be likely in some situations) we have 837 * to re-route it (ie, find a route for the next-hop and the 838 * associated interface) and set them here. This is nested 839 * forwarding which in most cases is undesirable, except where 840 * such control is nigh impossible. So we do it here. 841 * And I'm babbling. 842 */ 843 if (off == 0 && old != dst) { /* FORWARD, dst has changed */ 844 #if 0 845 /* 846 * XXX To improve readability, this block should be 847 * changed into a function call as below: 848 */ 849 error = ip_ipforward(&m, &dst, &ifp); 850 if (error) 851 goto bad; 852 if (m == NULL) /* ip_input consumed the mbuf */ 853 goto done; 854 #else 855 struct in_ifaddr *ia; 856 struct in_ifaddr_container *iac; 857 858 /* 859 * XXX sro_fwd below is static, and a pointer 860 * to it gets passed to routines downstream. 861 * This could have surprisingly bad results in 862 * practice, because its content is overwritten 863 * by subsequent packets. 864 */ 865 /* There must be a better way to do this next line... */ 866 static struct route sro_fwd; 867 struct route *ro_fwd = &sro_fwd; 868 869 #if 0 870 print_ip("IPFIREWALL_FORWARD: New dst ip: ", 871 dst->sin_addr, "\n"); 872 #endif 873 874 /* 875 * We need to figure out if we have been forwarded 876 * to a local socket. If so, then we should somehow 877 * "loop back" to ip_input, and get directed to the 878 * PCB as if we had received this packet. This is 879 * because it may be dificult to identify the packets 880 * you want to forward until they are being output 881 * and have selected an interface. (e.g. locally 882 * initiated packets) If we used the loopback inteface, 883 * we would not be able to control what happens 884 * as the packet runs through ip_input() as 885 * it is done through a ISR. 886 */ 887 ia = NULL; 888 LIST_FOREACH(iac, INADDR_HASH(dst->sin_addr.s_addr), 889 ia_hash) { 890 /* 891 * If the addr to forward to is one 892 * of ours, we pretend to 893 * be the destination for this packet. 894 */ 895 if (IA_SIN(iac->ia)->sin_addr.s_addr == 896 dst->sin_addr.s_addr) { 897 ia = iac->ia; 898 break; 899 } 900 } 901 if (ia != NULL) { /* tell ip_input "dont filter" */ 902 if (m->m_pkthdr.rcvif == NULL) 903 m->m_pkthdr.rcvif = ifunit("lo0"); 904 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 905 m->m_pkthdr.csum_flags |= 906 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 907 m->m_pkthdr.csum_data = 0xffff; 908 } 909 m->m_pkthdr.csum_flags |= 910 CSUM_IP_CHECKED | CSUM_IP_VALID; 911 ip->ip_len = htons(ip->ip_len); 912 ip->ip_off = htons(ip->ip_off); 913 ip_input(m); 914 goto done; 915 } 916 /* Some of the logic for this was nicked from above. 917 * 918 * This rewrites the cached route in a local PCB. 919 * Is this what we want to do? 920 */ 921 bcopy(dst, &ro_fwd->ro_dst, sizeof *dst); 922 ro_fwd->ro_rt = NULL; 923 924 rtalloc_ign(ro_fwd, RTF_PRCLONING); 925 if (ro_fwd->ro_rt == NULL) { 926 ipstat.ips_noroute++; 927 error = EHOSTUNREACH; 928 goto bad; 929 } 930 931 ia = ifatoia(ro_fwd->ro_rt->rt_ifa); 932 ifp = ro_fwd->ro_rt->rt_ifp; 933 ro_fwd->ro_rt->rt_use++; 934 if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) 935 dst = (struct sockaddr_in *) 936 ro_fwd->ro_rt->rt_gateway; 937 if (ro_fwd->ro_rt->rt_flags & RTF_HOST) 938 isbroadcast = 939 (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); 940 else 941 isbroadcast = in_broadcast(dst->sin_addr, ifp); 942 if (ro->ro_rt != NULL) 943 rtfree(ro->ro_rt); 944 ro->ro_rt = ro_fwd->ro_rt; 945 dst = (struct sockaddr_in *)&ro_fwd->ro_dst; 946 947 #endif /* ... block to be put into a function */ 948 /* 949 * If we added a default src ip earlier, 950 * which would have been gotten from the-then 951 * interface, do it again, from the new one. 952 */ 953 if (src_was_INADDR_ANY) 954 ip->ip_src = IA_SIN(ia)->sin_addr; 955 goto pass ; 956 } 957 958 /* 959 * if we get here, none of the above matches, and 960 * we have to drop the pkt 961 */ 962 m_freem(m); 963 error = EACCES; /* not sure this is the right error msg */ 964 goto done; 965 } 966 967 pass: 968 /* 127/8 must not appear on wire - RFC1122. */ 969 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 970 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 971 if (!(ifp->if_flags & IFF_LOOPBACK)) { 972 ipstat.ips_badaddr++; 973 error = EADDRNOTAVAIL; 974 goto bad; 975 } 976 } 977 978 m->m_pkthdr.csum_flags |= CSUM_IP; 979 sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist; 980 if (sw_csum & CSUM_DELAY_DATA) { 981 in_delayed_cksum(m); 982 sw_csum &= ~CSUM_DELAY_DATA; 983 } 984 m->m_pkthdr.csum_flags &= ifp->if_hwassist; 985 986 /* 987 * If small enough for interface, or the interface will take 988 * care of the fragmentation for us, can just send directly. 989 */ 990 if (ip->ip_len <= ifp->if_mtu || ((ifp->if_hwassist & CSUM_FRAGMENT) && 991 !(ip->ip_off & IP_DF))) { 992 ip->ip_len = htons(ip->ip_len); 993 ip->ip_off = htons(ip->ip_off); 994 ip->ip_sum = 0; 995 if (sw_csum & CSUM_DELAY_IP) { 996 if (ip->ip_vhl == IP_VHL_BORING) { 997 ip->ip_sum = in_cksum_hdr(ip); 998 } else { 999 ip->ip_sum = in_cksum(m, hlen); 1000 } 1001 } 1002 1003 /* Record statistics for this interface address. */ 1004 if (!(flags & IP_FORWARDING) && ia) { 1005 ia->ia_ifa.if_opackets++; 1006 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1007 } 1008 1009 #ifdef IPSEC 1010 /* clean ipsec history once it goes out of the node */ 1011 ipsec_delaux(m); 1012 #endif 1013 1014 #ifdef MBUF_STRESS_TEST 1015 if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size) { 1016 struct mbuf *m1, *m2; 1017 int length, tmp; 1018 1019 tmp = length = m->m_pkthdr.len; 1020 1021 while ((length -= mbuf_frag_size) >= 1) { 1022 m1 = m_split(m, length, MB_DONTWAIT); 1023 if (m1 == NULL) 1024 break; 1025 m2 = m; 1026 while (m2->m_next != NULL) 1027 m2 = m2->m_next; 1028 m2->m_next = m1; 1029 } 1030 m->m_pkthdr.len = tmp; 1031 } 1032 #endif 1033 1034 #ifdef MPLS 1035 if (!mpls_output_process(m, ro->ro_rt)) 1036 goto done; 1037 #endif 1038 error = ifp->if_output(ifp, m, (struct sockaddr *)dst, 1039 ro->ro_rt); 1040 goto done; 1041 } 1042 1043 if (ip->ip_off & IP_DF) { 1044 error = EMSGSIZE; 1045 /* 1046 * This case can happen if the user changed the MTU 1047 * of an interface after enabling IP on it. Because 1048 * most netifs don't keep track of routes pointing to 1049 * them, there is no way for one to update all its 1050 * routes when the MTU is changed. 1051 */ 1052 if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && 1053 !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && 1054 (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { 1055 ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; 1056 } 1057 ipstat.ips_cantfrag++; 1058 goto bad; 1059 } 1060 1061 /* 1062 * Too large for interface; fragment if possible. If successful, 1063 * on return, m will point to a list of packets to be sent. 1064 */ 1065 error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); 1066 if (error) 1067 goto bad; 1068 for (; m; m = m0) { 1069 m0 = m->m_nextpkt; 1070 m->m_nextpkt = NULL; 1071 #ifdef IPSEC 1072 /* clean ipsec history once it goes out of the node */ 1073 ipsec_delaux(m); 1074 #endif 1075 if (error == 0) { 1076 /* Record statistics for this interface address. */ 1077 if (ia != NULL) { 1078 ia->ia_ifa.if_opackets++; 1079 ia->ia_ifa.if_obytes += m->m_pkthdr.len; 1080 } 1081 #ifdef MPLS 1082 if (!mpls_output_process(m, ro->ro_rt)) 1083 continue; 1084 #endif 1085 error = ifp->if_output(ifp, m, (struct sockaddr *)dst, 1086 ro->ro_rt); 1087 } else { 1088 m_freem(m); 1089 } 1090 } 1091 1092 if (error == 0) 1093 ipstat.ips_fragmented++; 1094 1095 done: 1096 if (ro == &iproute && ro->ro_rt != NULL) { 1097 RTFREE(ro->ro_rt); 1098 ro->ro_rt = NULL; 1099 } 1100 #ifdef IPSEC 1101 if (sp != NULL) { 1102 KEYDEBUG(KEYDEBUG_IPSEC_STAMP, 1103 kprintf("DP ip_output call free SP:%p\n", sp)); 1104 key_freesp(sp); 1105 } 1106 #endif 1107 #ifdef FAST_IPSEC 1108 if (sp != NULL) 1109 KEY_FREESP(&sp); 1110 #endif 1111 return (error); 1112 bad: 1113 m_freem(m); 1114 goto done; 1115 } 1116 1117 /* 1118 * Create a chain of fragments which fit the given mtu. m_frag points to the 1119 * mbuf to be fragmented; on return it points to the chain with the fragments. 1120 * Return 0 if no error. If error, m_frag may contain a partially built 1121 * chain of fragments that should be freed by the caller. 1122 * 1123 * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) 1124 * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). 1125 */ 1126 int 1127 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, 1128 u_long if_hwassist_flags, int sw_csum) 1129 { 1130 int error = 0; 1131 int hlen = IP_VHL_HL(ip->ip_vhl) << 2; 1132 int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ 1133 int off; 1134 struct mbuf *m0 = *m_frag; /* the original packet */ 1135 int firstlen; 1136 struct mbuf **mnext; 1137 int nfrags; 1138 1139 if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ 1140 ipstat.ips_cantfrag++; 1141 return EMSGSIZE; 1142 } 1143 1144 /* 1145 * Must be able to put at least 8 bytes per fragment. 1146 */ 1147 if (len < 8) 1148 return EMSGSIZE; 1149 1150 /* 1151 * If the interface will not calculate checksums on 1152 * fragmented packets, then do it here. 1153 */ 1154 if ((m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) && 1155 !(if_hwassist_flags & CSUM_IP_FRAGS)) { 1156 in_delayed_cksum(m0); 1157 m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 1158 } 1159 1160 if (len > PAGE_SIZE) { 1161 /* 1162 * Fragment large datagrams such that each segment 1163 * contains a multiple of PAGE_SIZE amount of data, 1164 * plus headers. This enables a receiver to perform 1165 * page-flipping zero-copy optimizations. 1166 * 1167 * XXX When does this help given that sender and receiver 1168 * could have different page sizes, and also mtu could 1169 * be less than the receiver's page size ? 1170 */ 1171 int newlen; 1172 struct mbuf *m; 1173 1174 for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) 1175 off += m->m_len; 1176 1177 /* 1178 * firstlen (off - hlen) must be aligned on an 1179 * 8-byte boundary 1180 */ 1181 if (off < hlen) 1182 goto smart_frag_failure; 1183 off = ((off - hlen) & ~7) + hlen; 1184 newlen = (~PAGE_MASK) & mtu; 1185 if ((newlen + sizeof(struct ip)) > mtu) { 1186 /* we failed, go back the default */ 1187 smart_frag_failure: 1188 newlen = len; 1189 off = hlen + len; 1190 } 1191 len = newlen; 1192 1193 } else { 1194 off = hlen + len; 1195 } 1196 1197 firstlen = off - hlen; 1198 mnext = &m0->m_nextpkt; /* pointer to next packet */ 1199 1200 /* 1201 * Loop through length of segment after first fragment, 1202 * make new header and copy data of each part and link onto chain. 1203 * Here, m0 is the original packet, m is the fragment being created. 1204 * The fragments are linked off the m_nextpkt of the original 1205 * packet, which after processing serves as the first fragment. 1206 */ 1207 for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { 1208 struct ip *mhip; /* ip header on the fragment */ 1209 struct mbuf *m; 1210 int mhlen = sizeof(struct ip); 1211 1212 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 1213 if (m == NULL) { 1214 error = ENOBUFS; 1215 ipstat.ips_odropped++; 1216 goto done; 1217 } 1218 m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; 1219 /* 1220 * In the first mbuf, leave room for the link header, then 1221 * copy the original IP header including options. The payload 1222 * goes into an additional mbuf chain returned by m_copy(). 1223 */ 1224 m->m_data += max_linkhdr; 1225 mhip = mtod(m, struct ip *); 1226 *mhip = *ip; 1227 if (hlen > sizeof(struct ip)) { 1228 mhlen = ip_optcopy(ip, mhip) + sizeof(struct ip); 1229 mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2); 1230 } 1231 m->m_len = mhlen; 1232 /* XXX do we need to add ip->ip_off below ? */ 1233 mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; 1234 if (off + len >= ip->ip_len) { /* last fragment */ 1235 len = ip->ip_len - off; 1236 m->m_flags |= M_LASTFRAG; 1237 } else 1238 mhip->ip_off |= IP_MF; 1239 mhip->ip_len = htons((u_short)(len + mhlen)); 1240 m->m_next = m_copy(m0, off, len); 1241 if (m->m_next == NULL) { /* copy failed */ 1242 m_free(m); 1243 error = ENOBUFS; /* ??? */ 1244 ipstat.ips_odropped++; 1245 goto done; 1246 } 1247 m->m_pkthdr.len = mhlen + len; 1248 m->m_pkthdr.rcvif = (struct ifnet *)NULL; 1249 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; 1250 mhip->ip_off = htons(mhip->ip_off); 1251 mhip->ip_sum = 0; 1252 if (sw_csum & CSUM_DELAY_IP) 1253 mhip->ip_sum = in_cksum(m, mhlen); 1254 *mnext = m; 1255 mnext = &m->m_nextpkt; 1256 } 1257 ipstat.ips_ofragments += nfrags; 1258 1259 /* set first marker for fragment chain */ 1260 m0->m_flags |= M_FIRSTFRAG | M_FRAG; 1261 m0->m_pkthdr.csum_data = nfrags; 1262 1263 /* 1264 * Update first fragment by trimming what's been copied out 1265 * and updating header. 1266 */ 1267 m_adj(m0, hlen + firstlen - ip->ip_len); 1268 m0->m_pkthdr.len = hlen + firstlen; 1269 ip->ip_len = htons((u_short)m0->m_pkthdr.len); 1270 ip->ip_off |= IP_MF; 1271 ip->ip_off = htons(ip->ip_off); 1272 ip->ip_sum = 0; 1273 if (sw_csum & CSUM_DELAY_IP) 1274 ip->ip_sum = in_cksum(m0, hlen); 1275 1276 done: 1277 *m_frag = m0; 1278 return error; 1279 } 1280 1281 void 1282 in_delayed_cksum(struct mbuf *m) 1283 { 1284 struct ip *ip; 1285 u_short csum, offset; 1286 1287 ip = mtod(m, struct ip *); 1288 offset = IP_VHL_HL(ip->ip_vhl) << 2 ; 1289 csum = in_cksum_skip(m, ip->ip_len, offset); 1290 if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) 1291 csum = 0xffff; 1292 offset += m->m_pkthdr.csum_data; /* checksum offset */ 1293 1294 if (offset + sizeof(u_short) > m->m_len) { 1295 kprintf("delayed m_pullup, m->len: %d off: %d p: %d\n", 1296 m->m_len, offset, ip->ip_p); 1297 /* 1298 * XXX 1299 * this shouldn't happen, but if it does, the 1300 * correct behavior may be to insert the checksum 1301 * in the existing chain instead of rearranging it. 1302 */ 1303 m = m_pullup(m, offset + sizeof(u_short)); 1304 } 1305 *(u_short *)(m->m_data + offset) = csum; 1306 } 1307 1308 /* 1309 * Insert IP options into preformed packet. 1310 * Adjust IP destination as required for IP source routing, 1311 * as indicated by a non-zero in_addr at the start of the options. 1312 * 1313 * XXX This routine assumes that the packet has no options in place. 1314 */ 1315 static struct mbuf * 1316 ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) 1317 { 1318 struct ipoption *p = mtod(opt, struct ipoption *); 1319 struct mbuf *n; 1320 struct ip *ip = mtod(m, struct ip *); 1321 unsigned optlen; 1322 1323 optlen = opt->m_len - sizeof p->ipopt_dst; 1324 if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) { 1325 *phlen = 0; 1326 return (m); /* XXX should fail */ 1327 } 1328 if (p->ipopt_dst.s_addr) 1329 ip->ip_dst = p->ipopt_dst; 1330 if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { 1331 MGETHDR(n, MB_DONTWAIT, MT_HEADER); 1332 if (n == NULL) { 1333 *phlen = 0; 1334 return (m); 1335 } 1336 n->m_pkthdr.rcvif = (struct ifnet *)NULL; 1337 n->m_pkthdr.len = m->m_pkthdr.len + optlen; 1338 m->m_len -= sizeof(struct ip); 1339 m->m_data += sizeof(struct ip); 1340 n->m_next = m; 1341 m = n; 1342 m->m_len = optlen + sizeof(struct ip); 1343 m->m_data += max_linkhdr; 1344 memcpy(mtod(m, void *), ip, sizeof(struct ip)); 1345 } else { 1346 m->m_data -= optlen; 1347 m->m_len += optlen; 1348 m->m_pkthdr.len += optlen; 1349 ovbcopy(ip, mtod(m, caddr_t), sizeof(struct ip)); 1350 } 1351 ip = mtod(m, struct ip *); 1352 bcopy(p->ipopt_list, ip + 1, optlen); 1353 *phlen = sizeof(struct ip) + optlen; 1354 ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); 1355 ip->ip_len += optlen; 1356 return (m); 1357 } 1358 1359 /* 1360 * Copy options from ip to jp, 1361 * omitting those not copied during fragmentation. 1362 */ 1363 int 1364 ip_optcopy(struct ip *ip, struct ip *jp) 1365 { 1366 u_char *cp, *dp; 1367 int opt, optlen, cnt; 1368 1369 cp = (u_char *)(ip + 1); 1370 dp = (u_char *)(jp + 1); 1371 cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip); 1372 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1373 opt = cp[0]; 1374 if (opt == IPOPT_EOL) 1375 break; 1376 if (opt == IPOPT_NOP) { 1377 /* Preserve for IP mcast tunnel's LSRR alignment. */ 1378 *dp++ = IPOPT_NOP; 1379 optlen = 1; 1380 continue; 1381 } 1382 1383 KASSERT(cnt >= IPOPT_OLEN + sizeof *cp, 1384 ("ip_optcopy: malformed ipv4 option")); 1385 optlen = cp[IPOPT_OLEN]; 1386 KASSERT(optlen >= IPOPT_OLEN + sizeof *cp && optlen <= cnt, 1387 ("ip_optcopy: malformed ipv4 option")); 1388 1389 /* bogus lengths should have been caught by ip_dooptions */ 1390 if (optlen > cnt) 1391 optlen = cnt; 1392 if (IPOPT_COPIED(opt)) { 1393 bcopy(cp, dp, optlen); 1394 dp += optlen; 1395 } 1396 } 1397 for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) 1398 *dp++ = IPOPT_EOL; 1399 return (optlen); 1400 } 1401 1402 /* 1403 * IP socket option processing. 1404 */ 1405 int 1406 ip_ctloutput(struct socket *so, struct sockopt *sopt) 1407 { 1408 struct inpcb *inp = so->so_pcb; 1409 int error, optval; 1410 1411 error = optval = 0; 1412 if (sopt->sopt_level != IPPROTO_IP) { 1413 return (EINVAL); 1414 } 1415 1416 switch (sopt->sopt_dir) { 1417 case SOPT_SET: 1418 switch (sopt->sopt_name) { 1419 case IP_OPTIONS: 1420 #ifdef notyet 1421 case IP_RETOPTS: 1422 #endif 1423 { 1424 struct mbuf *m; 1425 if (sopt->sopt_valsize > MLEN) { 1426 error = EMSGSIZE; 1427 break; 1428 } 1429 MGET(m, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_HEADER); 1430 if (m == NULL) { 1431 error = ENOBUFS; 1432 break; 1433 } 1434 m->m_len = sopt->sopt_valsize; 1435 error = soopt_to_kbuf(sopt, mtod(m, void *), m->m_len, 1436 m->m_len); 1437 return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, 1438 m)); 1439 } 1440 1441 case IP_TOS: 1442 case IP_TTL: 1443 case IP_MINTTL: 1444 case IP_RECVOPTS: 1445 case IP_RECVRETOPTS: 1446 case IP_RECVDSTADDR: 1447 case IP_RECVIF: 1448 case IP_RECVTTL: 1449 case IP_FAITH: 1450 error = soopt_to_kbuf(sopt, &optval, sizeof optval, 1451 sizeof optval); 1452 if (error) 1453 break; 1454 switch (sopt->sopt_name) { 1455 case IP_TOS: 1456 inp->inp_ip_tos = optval; 1457 break; 1458 1459 case IP_TTL: 1460 inp->inp_ip_ttl = optval; 1461 break; 1462 case IP_MINTTL: 1463 if (optval > 0 && optval <= MAXTTL) 1464 inp->inp_ip_minttl = optval; 1465 else 1466 error = EINVAL; 1467 break; 1468 #define OPTSET(bit) \ 1469 if (optval) \ 1470 inp->inp_flags |= bit; \ 1471 else \ 1472 inp->inp_flags &= ~bit; 1473 1474 case IP_RECVOPTS: 1475 OPTSET(INP_RECVOPTS); 1476 break; 1477 1478 case IP_RECVRETOPTS: 1479 OPTSET(INP_RECVRETOPTS); 1480 break; 1481 1482 case IP_RECVDSTADDR: 1483 OPTSET(INP_RECVDSTADDR); 1484 break; 1485 1486 case IP_RECVIF: 1487 OPTSET(INP_RECVIF); 1488 break; 1489 1490 case IP_RECVTTL: 1491 OPTSET(INP_RECVTTL); 1492 break; 1493 1494 case IP_FAITH: 1495 OPTSET(INP_FAITH); 1496 break; 1497 } 1498 break; 1499 #undef OPTSET 1500 1501 case IP_MULTICAST_IF: 1502 case IP_MULTICAST_VIF: 1503 case IP_MULTICAST_TTL: 1504 case IP_MULTICAST_LOOP: 1505 case IP_ADD_MEMBERSHIP: 1506 case IP_DROP_MEMBERSHIP: 1507 error = ip_setmoptions(sopt, &inp->inp_moptions); 1508 break; 1509 1510 case IP_PORTRANGE: 1511 error = soopt_to_kbuf(sopt, &optval, sizeof optval, 1512 sizeof optval); 1513 if (error) 1514 break; 1515 1516 switch (optval) { 1517 case IP_PORTRANGE_DEFAULT: 1518 inp->inp_flags &= ~(INP_LOWPORT); 1519 inp->inp_flags &= ~(INP_HIGHPORT); 1520 break; 1521 1522 case IP_PORTRANGE_HIGH: 1523 inp->inp_flags &= ~(INP_LOWPORT); 1524 inp->inp_flags |= INP_HIGHPORT; 1525 break; 1526 1527 case IP_PORTRANGE_LOW: 1528 inp->inp_flags &= ~(INP_HIGHPORT); 1529 inp->inp_flags |= INP_LOWPORT; 1530 break; 1531 1532 default: 1533 error = EINVAL; 1534 break; 1535 } 1536 break; 1537 1538 #if defined(IPSEC) || defined(FAST_IPSEC) 1539 case IP_IPSEC_POLICY: 1540 { 1541 caddr_t req; 1542 size_t len = 0; 1543 int priv; 1544 struct mbuf *m; 1545 int optname; 1546 1547 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ 1548 break; 1549 soopt_to_mbuf(sopt, m); 1550 priv = (sopt->sopt_td != NULL && 1551 suser(sopt->sopt_td) != 0) ? 0 : 1; 1552 req = mtod(m, caddr_t); 1553 len = m->m_len; 1554 optname = sopt->sopt_name; 1555 error = ipsec4_set_policy(inp, optname, req, len, priv); 1556 m_freem(m); 1557 break; 1558 } 1559 #endif /*IPSEC*/ 1560 1561 default: 1562 error = ENOPROTOOPT; 1563 break; 1564 } 1565 break; 1566 1567 case SOPT_GET: 1568 switch (sopt->sopt_name) { 1569 case IP_OPTIONS: 1570 case IP_RETOPTS: 1571 if (inp->inp_options) 1572 soopt_from_kbuf(sopt, mtod(inp->inp_options, 1573 char *), 1574 inp->inp_options->m_len); 1575 else 1576 sopt->sopt_valsize = 0; 1577 break; 1578 1579 case IP_TOS: 1580 case IP_TTL: 1581 case IP_MINTTL: 1582 case IP_RECVOPTS: 1583 case IP_RECVRETOPTS: 1584 case IP_RECVDSTADDR: 1585 case IP_RECVTTL: 1586 case IP_RECVIF: 1587 case IP_PORTRANGE: 1588 case IP_FAITH: 1589 switch (sopt->sopt_name) { 1590 1591 case IP_TOS: 1592 optval = inp->inp_ip_tos; 1593 break; 1594 1595 case IP_TTL: 1596 optval = inp->inp_ip_ttl; 1597 break; 1598 case IP_MINTTL: 1599 optval = inp->inp_ip_minttl; 1600 break; 1601 1602 #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) 1603 1604 case IP_RECVOPTS: 1605 optval = OPTBIT(INP_RECVOPTS); 1606 break; 1607 1608 case IP_RECVRETOPTS: 1609 optval = OPTBIT(INP_RECVRETOPTS); 1610 break; 1611 1612 case IP_RECVDSTADDR: 1613 optval = OPTBIT(INP_RECVDSTADDR); 1614 break; 1615 1616 case IP_RECVTTL: 1617 optval = OPTBIT(INP_RECVTTL); 1618 break; 1619 1620 case IP_RECVIF: 1621 optval = OPTBIT(INP_RECVIF); 1622 break; 1623 1624 case IP_PORTRANGE: 1625 if (inp->inp_flags & INP_HIGHPORT) 1626 optval = IP_PORTRANGE_HIGH; 1627 else if (inp->inp_flags & INP_LOWPORT) 1628 optval = IP_PORTRANGE_LOW; 1629 else 1630 optval = 0; 1631 break; 1632 1633 case IP_FAITH: 1634 optval = OPTBIT(INP_FAITH); 1635 break; 1636 } 1637 soopt_from_kbuf(sopt, &optval, sizeof optval); 1638 break; 1639 1640 case IP_MULTICAST_IF: 1641 case IP_MULTICAST_VIF: 1642 case IP_MULTICAST_TTL: 1643 case IP_MULTICAST_LOOP: 1644 case IP_ADD_MEMBERSHIP: 1645 case IP_DROP_MEMBERSHIP: 1646 error = ip_getmoptions(sopt, inp->inp_moptions); 1647 break; 1648 1649 #if defined(IPSEC) || defined(FAST_IPSEC) 1650 case IP_IPSEC_POLICY: 1651 { 1652 struct mbuf *m = NULL; 1653 caddr_t req = NULL; 1654 size_t len = 0; 1655 1656 if (m != NULL) { 1657 req = mtod(m, caddr_t); 1658 len = m->m_len; 1659 } 1660 error = ipsec4_get_policy(so->so_pcb, req, len, &m); 1661 if (error == 0) 1662 error = soopt_from_mbuf(sopt, m); /* XXX */ 1663 if (error == 0) 1664 m_freem(m); 1665 break; 1666 } 1667 #endif /*IPSEC*/ 1668 1669 default: 1670 error = ENOPROTOOPT; 1671 break; 1672 } 1673 break; 1674 } 1675 return (error); 1676 } 1677 1678 /* 1679 * Set up IP options in pcb for insertion in output packets. 1680 * Store in mbuf with pointer in pcbopt, adding pseudo-option 1681 * with destination address if source routed. 1682 */ 1683 static int 1684 ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m) 1685 { 1686 int cnt, optlen; 1687 u_char *cp; 1688 u_char opt; 1689 1690 /* turn off any old options */ 1691 if (*pcbopt) 1692 m_free(*pcbopt); 1693 *pcbopt = 0; 1694 if (m == NULL || m->m_len == 0) { 1695 /* 1696 * Only turning off any previous options. 1697 */ 1698 if (m != NULL) 1699 m_free(m); 1700 return (0); 1701 } 1702 1703 if (m->m_len % sizeof(int32_t)) 1704 goto bad; 1705 /* 1706 * IP first-hop destination address will be stored before 1707 * actual options; move other options back 1708 * and clear it when none present. 1709 */ 1710 if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) 1711 goto bad; 1712 cnt = m->m_len; 1713 m->m_len += sizeof(struct in_addr); 1714 cp = mtod(m, u_char *) + sizeof(struct in_addr); 1715 ovbcopy(mtod(m, caddr_t), cp, cnt); 1716 bzero(mtod(m, caddr_t), sizeof(struct in_addr)); 1717 1718 for (; cnt > 0; cnt -= optlen, cp += optlen) { 1719 opt = cp[IPOPT_OPTVAL]; 1720 if (opt == IPOPT_EOL) 1721 break; 1722 if (opt == IPOPT_NOP) 1723 optlen = 1; 1724 else { 1725 if (cnt < IPOPT_OLEN + sizeof *cp) 1726 goto bad; 1727 optlen = cp[IPOPT_OLEN]; 1728 if (optlen < IPOPT_OLEN + sizeof *cp || optlen > cnt) 1729 goto bad; 1730 } 1731 switch (opt) { 1732 1733 default: 1734 break; 1735 1736 case IPOPT_LSRR: 1737 case IPOPT_SSRR: 1738 /* 1739 * user process specifies route as: 1740 * ->A->B->C->D 1741 * D must be our final destination (but we can't 1742 * check that since we may not have connected yet). 1743 * A is first hop destination, which doesn't appear in 1744 * actual IP option, but is stored before the options. 1745 */ 1746 if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) 1747 goto bad; 1748 m->m_len -= sizeof(struct in_addr); 1749 cnt -= sizeof(struct in_addr); 1750 optlen -= sizeof(struct in_addr); 1751 cp[IPOPT_OLEN] = optlen; 1752 /* 1753 * Move first hop before start of options. 1754 */ 1755 bcopy(&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), 1756 sizeof(struct in_addr)); 1757 /* 1758 * Then copy rest of options back 1759 * to close up the deleted entry. 1760 */ 1761 ovbcopy(&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr), 1762 &cp[IPOPT_OFFSET+1], 1763 cnt - (IPOPT_MINOFF - 1)); 1764 break; 1765 } 1766 } 1767 if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) 1768 goto bad; 1769 *pcbopt = m; 1770 return (0); 1771 1772 bad: 1773 m_free(m); 1774 return (EINVAL); 1775 } 1776 1777 /* 1778 * XXX 1779 * The whole multicast option thing needs to be re-thought. 1780 * Several of these options are equally applicable to non-multicast 1781 * transmission, and one (IP_MULTICAST_TTL) totally duplicates a 1782 * standard option (IP_TTL). 1783 */ 1784 1785 /* 1786 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. 1787 */ 1788 static struct ifnet * 1789 ip_multicast_if(struct in_addr *a, int *ifindexp) 1790 { 1791 int ifindex; 1792 struct ifnet *ifp; 1793 1794 if (ifindexp) 1795 *ifindexp = 0; 1796 if (ntohl(a->s_addr) >> 24 == 0) { 1797 ifindex = ntohl(a->s_addr) & 0xffffff; 1798 if (ifindex < 0 || if_index < ifindex) 1799 return NULL; 1800 ifp = ifindex2ifnet[ifindex]; 1801 if (ifindexp) 1802 *ifindexp = ifindex; 1803 } else { 1804 ifp = INADDR_TO_IFP(a); 1805 } 1806 return ifp; 1807 } 1808 1809 /* 1810 * Set the IP multicast options in response to user setsockopt(). 1811 */ 1812 static int 1813 ip_setmoptions(struct sockopt *sopt, struct ip_moptions **imop) 1814 { 1815 int error = 0; 1816 int i; 1817 struct in_addr addr; 1818 struct ip_mreq mreq; 1819 struct ifnet *ifp; 1820 struct ip_moptions *imo = *imop; 1821 int ifindex; 1822 1823 if (imo == NULL) { 1824 /* 1825 * No multicast option buffer attached to the pcb; 1826 * allocate one and initialize to default values. 1827 */ 1828 imo = kmalloc(sizeof *imo, M_IPMOPTS, M_WAITOK); 1829 1830 *imop = imo; 1831 imo->imo_multicast_ifp = NULL; 1832 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1833 imo->imo_multicast_vif = -1; 1834 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1835 imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1836 imo->imo_num_memberships = 0; 1837 } 1838 switch (sopt->sopt_name) { 1839 /* store an index number for the vif you wanna use in the send */ 1840 case IP_MULTICAST_VIF: 1841 if (legal_vif_num == 0) { 1842 error = EOPNOTSUPP; 1843 break; 1844 } 1845 error = soopt_to_kbuf(sopt, &i, sizeof i, sizeof i); 1846 if (error) 1847 break; 1848 if (!legal_vif_num(i) && (i != -1)) { 1849 error = EINVAL; 1850 break; 1851 } 1852 imo->imo_multicast_vif = i; 1853 break; 1854 1855 case IP_MULTICAST_IF: 1856 /* 1857 * Select the interface for outgoing multicast packets. 1858 */ 1859 error = soopt_to_kbuf(sopt, &addr, sizeof addr, sizeof addr); 1860 if (error) 1861 break; 1862 1863 /* 1864 * INADDR_ANY is used to remove a previous selection. 1865 * When no interface is selected, a default one is 1866 * chosen every time a multicast packet is sent. 1867 */ 1868 if (addr.s_addr == INADDR_ANY) { 1869 imo->imo_multicast_ifp = NULL; 1870 break; 1871 } 1872 /* 1873 * The selected interface is identified by its local 1874 * IP address. Find the interface and confirm that 1875 * it supports multicasting. 1876 */ 1877 crit_enter(); 1878 ifp = ip_multicast_if(&addr, &ifindex); 1879 if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) { 1880 crit_exit(); 1881 error = EADDRNOTAVAIL; 1882 break; 1883 } 1884 imo->imo_multicast_ifp = ifp; 1885 if (ifindex) 1886 imo->imo_multicast_addr = addr; 1887 else 1888 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1889 crit_exit(); 1890 break; 1891 1892 case IP_MULTICAST_TTL: 1893 /* 1894 * Set the IP time-to-live for outgoing multicast packets. 1895 * The original multicast API required a char argument, 1896 * which is inconsistent with the rest of the socket API. 1897 * We allow either a char or an int. 1898 */ 1899 if (sopt->sopt_valsize == 1) { 1900 u_char ttl; 1901 error = soopt_to_kbuf(sopt, &ttl, 1, 1); 1902 if (error) 1903 break; 1904 imo->imo_multicast_ttl = ttl; 1905 } else { 1906 u_int ttl; 1907 error = soopt_to_kbuf(sopt, &ttl, sizeof ttl, sizeof ttl); 1908 if (error) 1909 break; 1910 if (ttl > 255) 1911 error = EINVAL; 1912 else 1913 imo->imo_multicast_ttl = ttl; 1914 } 1915 break; 1916 1917 case IP_MULTICAST_LOOP: 1918 /* 1919 * Set the loopback flag for outgoing multicast packets. 1920 * Must be zero or one. The original multicast API required a 1921 * char argument, which is inconsistent with the rest 1922 * of the socket API. We allow either a char or an int. 1923 */ 1924 if (sopt->sopt_valsize == 1) { 1925 u_char loop; 1926 1927 error = soopt_to_kbuf(sopt, &loop, 1, 1); 1928 if (error) 1929 break; 1930 imo->imo_multicast_loop = !!loop; 1931 } else { 1932 u_int loop; 1933 1934 error = soopt_to_kbuf(sopt, &loop, sizeof loop, 1935 sizeof loop); 1936 if (error) 1937 break; 1938 imo->imo_multicast_loop = !!loop; 1939 } 1940 break; 1941 1942 case IP_ADD_MEMBERSHIP: 1943 /* 1944 * Add a multicast group membership. 1945 * Group must be a valid IP multicast address. 1946 */ 1947 error = soopt_to_kbuf(sopt, &mreq, sizeof mreq, sizeof mreq); 1948 if (error) 1949 break; 1950 1951 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 1952 error = EINVAL; 1953 break; 1954 } 1955 crit_enter(); 1956 /* 1957 * If no interface address was provided, use the interface of 1958 * the route to the given multicast address. 1959 */ 1960 if (mreq.imr_interface.s_addr == INADDR_ANY) { 1961 struct sockaddr_in dst; 1962 struct rtentry *rt; 1963 1964 bzero(&dst, sizeof(struct sockaddr_in)); 1965 dst.sin_len = sizeof(struct sockaddr_in); 1966 dst.sin_family = AF_INET; 1967 dst.sin_addr = mreq.imr_multiaddr; 1968 rt = rtlookup((struct sockaddr *)&dst); 1969 if (rt == NULL) { 1970 error = EADDRNOTAVAIL; 1971 crit_exit(); 1972 break; 1973 } 1974 --rt->rt_refcnt; 1975 ifp = rt->rt_ifp; 1976 } else { 1977 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 1978 } 1979 1980 /* 1981 * See if we found an interface, and confirm that it 1982 * supports multicast. 1983 */ 1984 if (ifp == NULL || !(ifp->if_flags & IFF_MULTICAST)) { 1985 error = EADDRNOTAVAIL; 1986 crit_exit(); 1987 break; 1988 } 1989 /* 1990 * See if the membership already exists or if all the 1991 * membership slots are full. 1992 */ 1993 for (i = 0; i < imo->imo_num_memberships; ++i) { 1994 if (imo->imo_membership[i]->inm_ifp == ifp && 1995 imo->imo_membership[i]->inm_addr.s_addr 1996 == mreq.imr_multiaddr.s_addr) 1997 break; 1998 } 1999 if (i < imo->imo_num_memberships) { 2000 error = EADDRINUSE; 2001 crit_exit(); 2002 break; 2003 } 2004 if (i == IP_MAX_MEMBERSHIPS) { 2005 error = ETOOMANYREFS; 2006 crit_exit(); 2007 break; 2008 } 2009 /* 2010 * Everything looks good; add a new record to the multicast 2011 * address list for the given interface. 2012 */ 2013 if ((imo->imo_membership[i] = 2014 in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { 2015 error = ENOBUFS; 2016 crit_exit(); 2017 break; 2018 } 2019 ++imo->imo_num_memberships; 2020 crit_exit(); 2021 break; 2022 2023 case IP_DROP_MEMBERSHIP: 2024 /* 2025 * Drop a multicast group membership. 2026 * Group must be a valid IP multicast address. 2027 */ 2028 error = soopt_to_kbuf(sopt, &mreq, sizeof mreq, sizeof mreq); 2029 if (error) 2030 break; 2031 2032 if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { 2033 error = EINVAL; 2034 break; 2035 } 2036 2037 crit_enter(); 2038 /* 2039 * If an interface address was specified, get a pointer 2040 * to its ifnet structure. 2041 */ 2042 if (mreq.imr_interface.s_addr == INADDR_ANY) 2043 ifp = NULL; 2044 else { 2045 ifp = ip_multicast_if(&mreq.imr_interface, NULL); 2046 if (ifp == NULL) { 2047 error = EADDRNOTAVAIL; 2048 crit_exit(); 2049 break; 2050 } 2051 } 2052 /* 2053 * Find the membership in the membership array. 2054 */ 2055 for (i = 0; i < imo->imo_num_memberships; ++i) { 2056 if ((ifp == NULL || 2057 imo->imo_membership[i]->inm_ifp == ifp) && 2058 imo->imo_membership[i]->inm_addr.s_addr == 2059 mreq.imr_multiaddr.s_addr) 2060 break; 2061 } 2062 if (i == imo->imo_num_memberships) { 2063 error = EADDRNOTAVAIL; 2064 crit_exit(); 2065 break; 2066 } 2067 /* 2068 * Give up the multicast address record to which the 2069 * membership points. 2070 */ 2071 in_delmulti(imo->imo_membership[i]); 2072 /* 2073 * Remove the gap in the membership array. 2074 */ 2075 for (++i; i < imo->imo_num_memberships; ++i) 2076 imo->imo_membership[i-1] = imo->imo_membership[i]; 2077 --imo->imo_num_memberships; 2078 crit_exit(); 2079 break; 2080 2081 default: 2082 error = EOPNOTSUPP; 2083 break; 2084 } 2085 2086 /* 2087 * If all options have default values, no need to keep the mbuf. 2088 */ 2089 if (imo->imo_multicast_ifp == NULL && 2090 imo->imo_multicast_vif == -1 && 2091 imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && 2092 imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && 2093 imo->imo_num_memberships == 0) { 2094 kfree(*imop, M_IPMOPTS); 2095 *imop = NULL; 2096 } 2097 2098 return (error); 2099 } 2100 2101 /* 2102 * Return the IP multicast options in response to user getsockopt(). 2103 */ 2104 static int 2105 ip_getmoptions(struct sockopt *sopt, struct ip_moptions *imo) 2106 { 2107 struct in_addr addr; 2108 struct in_ifaddr *ia; 2109 int error, optval; 2110 u_char coptval; 2111 2112 error = 0; 2113 switch (sopt->sopt_name) { 2114 case IP_MULTICAST_VIF: 2115 if (imo != NULL) 2116 optval = imo->imo_multicast_vif; 2117 else 2118 optval = -1; 2119 soopt_from_kbuf(sopt, &optval, sizeof optval); 2120 break; 2121 2122 case IP_MULTICAST_IF: 2123 if (imo == NULL || imo->imo_multicast_ifp == NULL) 2124 addr.s_addr = INADDR_ANY; 2125 else if (imo->imo_multicast_addr.s_addr) { 2126 /* return the value user has set */ 2127 addr = imo->imo_multicast_addr; 2128 } else { 2129 ia = IFP_TO_IA(imo->imo_multicast_ifp); 2130 addr.s_addr = (ia == NULL) ? INADDR_ANY 2131 : IA_SIN(ia)->sin_addr.s_addr; 2132 } 2133 soopt_from_kbuf(sopt, &addr, sizeof addr); 2134 break; 2135 2136 case IP_MULTICAST_TTL: 2137 if (imo == NULL) 2138 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 2139 else 2140 optval = coptval = imo->imo_multicast_ttl; 2141 if (sopt->sopt_valsize == 1) 2142 soopt_from_kbuf(sopt, &coptval, 1); 2143 else 2144 soopt_from_kbuf(sopt, &optval, sizeof optval); 2145 break; 2146 2147 case IP_MULTICAST_LOOP: 2148 if (imo == NULL) 2149 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 2150 else 2151 optval = coptval = imo->imo_multicast_loop; 2152 if (sopt->sopt_valsize == 1) 2153 soopt_from_kbuf(sopt, &coptval, 1); 2154 else 2155 soopt_from_kbuf(sopt, &optval, sizeof optval); 2156 break; 2157 2158 default: 2159 error = ENOPROTOOPT; 2160 break; 2161 } 2162 return (error); 2163 } 2164 2165 /* 2166 * Discard the IP multicast options. 2167 */ 2168 void 2169 ip_freemoptions(struct ip_moptions *imo) 2170 { 2171 int i; 2172 2173 if (imo != NULL) { 2174 for (i = 0; i < imo->imo_num_memberships; ++i) 2175 in_delmulti(imo->imo_membership[i]); 2176 kfree(imo, M_IPMOPTS); 2177 } 2178 } 2179 2180 /* 2181 * Routine called from ip_output() to loop back a copy of an IP multicast 2182 * packet to the input queue of a specified interface. Note that this 2183 * calls the output routine of the loopback "driver", but with an interface 2184 * pointer that might NOT be a loopback interface -- evil, but easier than 2185 * replicating that code here. 2186 */ 2187 static void 2188 ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst, 2189 int hlen) 2190 { 2191 struct ip *ip; 2192 struct mbuf *copym; 2193 2194 copym = m_copypacket(m, MB_DONTWAIT); 2195 if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) 2196 copym = m_pullup(copym, hlen); 2197 if (copym != NULL) { 2198 /* 2199 * if the checksum hasn't been computed, mark it as valid 2200 */ 2201 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { 2202 in_delayed_cksum(copym); 2203 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; 2204 copym->m_pkthdr.csum_flags |= 2205 CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 2206 copym->m_pkthdr.csum_data = 0xffff; 2207 } 2208 /* 2209 * We don't bother to fragment if the IP length is greater 2210 * than the interface's MTU. Can this possibly matter? 2211 */ 2212 ip = mtod(copym, struct ip *); 2213 ip->ip_len = htons(ip->ip_len); 2214 ip->ip_off = htons(ip->ip_off); 2215 ip->ip_sum = 0; 2216 if (ip->ip_vhl == IP_VHL_BORING) { 2217 ip->ip_sum = in_cksum_hdr(ip); 2218 } else { 2219 ip->ip_sum = in_cksum(copym, hlen); 2220 } 2221 /* 2222 * NB: 2223 * It's not clear whether there are any lingering 2224 * reentrancy problems in other areas which might 2225 * be exposed by using ip_input directly (in 2226 * particular, everything which modifies the packet 2227 * in-place). Yet another option is using the 2228 * protosw directly to deliver the looped back 2229 * packet. For the moment, we'll err on the side 2230 * of safety by using if_simloop(). 2231 */ 2232 #if 1 /* XXX */ 2233 if (dst->sin_family != AF_INET) { 2234 kprintf("ip_mloopback: bad address family %d\n", 2235 dst->sin_family); 2236 dst->sin_family = AF_INET; 2237 } 2238 #endif 2239 2240 #ifdef notdef 2241 copym->m_pkthdr.rcvif = ifp; 2242 ip_input(copym); 2243 #else 2244 if_simloop(ifp, copym, dst->sin_family, 0); 2245 #endif 2246 } 2247 } 2248