1*11042SErik.Nordmark@Sun.COM /* 2*11042SErik.Nordmark@Sun.COM * CDDL HEADER START 3*11042SErik.Nordmark@Sun.COM * 4*11042SErik.Nordmark@Sun.COM * The contents of this file are subject to the terms of the 5*11042SErik.Nordmark@Sun.COM * Common Development and Distribution License (the "License"). 6*11042SErik.Nordmark@Sun.COM * You may not use this file except in compliance with the License. 7*11042SErik.Nordmark@Sun.COM * 8*11042SErik.Nordmark@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*11042SErik.Nordmark@Sun.COM * or http://www.opensolaris.org/os/licensing. 10*11042SErik.Nordmark@Sun.COM * See the License for the specific language governing permissions 11*11042SErik.Nordmark@Sun.COM * and limitations under the License. 12*11042SErik.Nordmark@Sun.COM * 13*11042SErik.Nordmark@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 14*11042SErik.Nordmark@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*11042SErik.Nordmark@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 16*11042SErik.Nordmark@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 17*11042SErik.Nordmark@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 18*11042SErik.Nordmark@Sun.COM * 19*11042SErik.Nordmark@Sun.COM * CDDL HEADER END 20*11042SErik.Nordmark@Sun.COM */ 21*11042SErik.Nordmark@Sun.COM 22*11042SErik.Nordmark@Sun.COM /* 23*11042SErik.Nordmark@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24*11042SErik.Nordmark@Sun.COM * Use is subject to license terms. 25*11042SErik.Nordmark@Sun.COM */ 26*11042SErik.Nordmark@Sun.COM /* Copyright (c) 1990 Mentat Inc. */ 27*11042SErik.Nordmark@Sun.COM 28*11042SErik.Nordmark@Sun.COM #include <sys/types.h> 29*11042SErik.Nordmark@Sun.COM #include <sys/stream.h> 30*11042SErik.Nordmark@Sun.COM #include <sys/strsubr.h> 31*11042SErik.Nordmark@Sun.COM #include <sys/dlpi.h> 32*11042SErik.Nordmark@Sun.COM #include <sys/strsun.h> 33*11042SErik.Nordmark@Sun.COM #include <sys/zone.h> 34*11042SErik.Nordmark@Sun.COM #include <sys/ddi.h> 35*11042SErik.Nordmark@Sun.COM #include <sys/sunddi.h> 36*11042SErik.Nordmark@Sun.COM #include <sys/cmn_err.h> 37*11042SErik.Nordmark@Sun.COM #include <sys/debug.h> 38*11042SErik.Nordmark@Sun.COM #include <sys/atomic.h> 39*11042SErik.Nordmark@Sun.COM 40*11042SErik.Nordmark@Sun.COM #include <sys/systm.h> 41*11042SErik.Nordmark@Sun.COM #include <sys/param.h> 42*11042SErik.Nordmark@Sun.COM #include <sys/kmem.h> 43*11042SErik.Nordmark@Sun.COM #include <sys/sdt.h> 44*11042SErik.Nordmark@Sun.COM #include <sys/socket.h> 45*11042SErik.Nordmark@Sun.COM #include <sys/mac.h> 46*11042SErik.Nordmark@Sun.COM #include <net/if.h> 47*11042SErik.Nordmark@Sun.COM #include <net/if_arp.h> 48*11042SErik.Nordmark@Sun.COM #include <net/route.h> 49*11042SErik.Nordmark@Sun.COM #include <sys/sockio.h> 50*11042SErik.Nordmark@Sun.COM #include <netinet/in.h> 51*11042SErik.Nordmark@Sun.COM #include <net/if_dl.h> 52*11042SErik.Nordmark@Sun.COM 53*11042SErik.Nordmark@Sun.COM #include <inet/common.h> 54*11042SErik.Nordmark@Sun.COM #include <inet/mi.h> 55*11042SErik.Nordmark@Sun.COM #include <inet/mib2.h> 56*11042SErik.Nordmark@Sun.COM #include <inet/nd.h> 57*11042SErik.Nordmark@Sun.COM #include <inet/arp.h> 58*11042SErik.Nordmark@Sun.COM #include <inet/snmpcom.h> 59*11042SErik.Nordmark@Sun.COM #include <inet/kstatcom.h> 60*11042SErik.Nordmark@Sun.COM 61*11042SErik.Nordmark@Sun.COM #include <netinet/igmp_var.h> 62*11042SErik.Nordmark@Sun.COM #include <netinet/ip6.h> 63*11042SErik.Nordmark@Sun.COM #include <netinet/icmp6.h> 64*11042SErik.Nordmark@Sun.COM #include <netinet/sctp.h> 65*11042SErik.Nordmark@Sun.COM 66*11042SErik.Nordmark@Sun.COM #include <inet/ip.h> 67*11042SErik.Nordmark@Sun.COM #include <inet/ip_impl.h> 68*11042SErik.Nordmark@Sun.COM #include <inet/ip6.h> 69*11042SErik.Nordmark@Sun.COM #include <inet/ip6_asp.h> 70*11042SErik.Nordmark@Sun.COM #include <inet/tcp.h> 71*11042SErik.Nordmark@Sun.COM #include <inet/ip_multi.h> 72*11042SErik.Nordmark@Sun.COM #include <inet/ip_if.h> 73*11042SErik.Nordmark@Sun.COM #include <inet/ip_ire.h> 74*11042SErik.Nordmark@Sun.COM #include <inet/ip_ftable.h> 75*11042SErik.Nordmark@Sun.COM #include <inet/ip_rts.h> 76*11042SErik.Nordmark@Sun.COM #include <inet/optcom.h> 77*11042SErik.Nordmark@Sun.COM #include <inet/ip_ndp.h> 78*11042SErik.Nordmark@Sun.COM #include <inet/ip_listutils.h> 79*11042SErik.Nordmark@Sun.COM #include <netinet/igmp.h> 80*11042SErik.Nordmark@Sun.COM #include <netinet/ip_mroute.h> 81*11042SErik.Nordmark@Sun.COM #include <inet/ipp_common.h> 82*11042SErik.Nordmark@Sun.COM 83*11042SErik.Nordmark@Sun.COM #include <net/pfkeyv2.h> 84*11042SErik.Nordmark@Sun.COM #include <inet/sadb.h> 85*11042SErik.Nordmark@Sun.COM #include <inet/ipsec_impl.h> 86*11042SErik.Nordmark@Sun.COM #include <inet/ipdrop.h> 87*11042SErik.Nordmark@Sun.COM #include <inet/ip_netinfo.h> 88*11042SErik.Nordmark@Sun.COM 89*11042SErik.Nordmark@Sun.COM #include <sys/pattr.h> 90*11042SErik.Nordmark@Sun.COM #include <inet/ipclassifier.h> 91*11042SErik.Nordmark@Sun.COM #include <inet/sctp_ip.h> 92*11042SErik.Nordmark@Sun.COM #include <inet/sctp/sctp_impl.h> 93*11042SErik.Nordmark@Sun.COM #include <inet/udp_impl.h> 94*11042SErik.Nordmark@Sun.COM #include <sys/sunddi.h> 95*11042SErik.Nordmark@Sun.COM 96*11042SErik.Nordmark@Sun.COM #include <sys/tsol/label.h> 97*11042SErik.Nordmark@Sun.COM #include <sys/tsol/tnet.h> 98*11042SErik.Nordmark@Sun.COM 99*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 100*11042SErik.Nordmark@Sun.COM extern boolean_t skip_sctp_cksum; 101*11042SErik.Nordmark@Sun.COM #endif 102*11042SErik.Nordmark@Sun.COM 103*11042SErik.Nordmark@Sun.COM static int ip_verify_nce(mblk_t *, ip_xmit_attr_t *); 104*11042SErik.Nordmark@Sun.COM static int ip_verify_dce(mblk_t *, ip_xmit_attr_t *); 105*11042SErik.Nordmark@Sun.COM static boolean_t ip_verify_lso(ill_t *, ip_xmit_attr_t *); 106*11042SErik.Nordmark@Sun.COM static boolean_t ip_verify_zcopy(ill_t *, ip_xmit_attr_t *); 107*11042SErik.Nordmark@Sun.COM static void ip_output_simple_broadcast(ip_xmit_attr_t *, mblk_t *); 108*11042SErik.Nordmark@Sun.COM 109*11042SErik.Nordmark@Sun.COM /* 110*11042SErik.Nordmark@Sun.COM * There are two types of output functions for IP used for different 111*11042SErik.Nordmark@Sun.COM * purposes: 112*11042SErik.Nordmark@Sun.COM * - ip_output_simple() is when sending ICMP errors, TCP resets, etc when there 113*11042SErik.Nordmark@Sun.COM * is no context in the form of a conn_t. However, there is a 114*11042SErik.Nordmark@Sun.COM * ip_xmit_attr_t that the callers use to influence interface selection 115*11042SErik.Nordmark@Sun.COM * (needed for ICMP echo as well as IPv6 link-locals) and IPsec. 116*11042SErik.Nordmark@Sun.COM * 117*11042SErik.Nordmark@Sun.COM * - conn_ip_output() is used when sending packets with a conn_t and 118*11042SErik.Nordmark@Sun.COM * ip_set_destination has been called to cache information. In that case 119*11042SErik.Nordmark@Sun.COM * various socket options are recorded in the ip_xmit_attr_t and should 120*11042SErik.Nordmark@Sun.COM * be taken into account. 121*11042SErik.Nordmark@Sun.COM */ 122*11042SErik.Nordmark@Sun.COM 123*11042SErik.Nordmark@Sun.COM /* 124*11042SErik.Nordmark@Sun.COM * The caller *must* have called conn_connect() or ip_attr_connect() 125*11042SErik.Nordmark@Sun.COM * before calling conn_ip_output(). The caller needs to redo that each time 126*11042SErik.Nordmark@Sun.COM * the destination IP address or port changes, as well as each time there is 127*11042SErik.Nordmark@Sun.COM * a change to any socket option that would modify how packets are routed out 128*11042SErik.Nordmark@Sun.COM * of the box (e.g., SO_DONTROUTE, IP_NEXTHOP, IP_BOUND_IF). 129*11042SErik.Nordmark@Sun.COM * 130*11042SErik.Nordmark@Sun.COM * The ULP caller has to serialize the use of a single ip_xmit_attr_t. 131*11042SErik.Nordmark@Sun.COM * We assert for that here. 132*11042SErik.Nordmark@Sun.COM */ 133*11042SErik.Nordmark@Sun.COM int 134*11042SErik.Nordmark@Sun.COM conn_ip_output(mblk_t *mp, ip_xmit_attr_t *ixa) 135*11042SErik.Nordmark@Sun.COM { 136*11042SErik.Nordmark@Sun.COM iaflags_t ixaflags = ixa->ixa_flags; 137*11042SErik.Nordmark@Sun.COM ire_t *ire; 138*11042SErik.Nordmark@Sun.COM nce_t *nce; 139*11042SErik.Nordmark@Sun.COM dce_t *dce; 140*11042SErik.Nordmark@Sun.COM ill_t *ill; 141*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 142*11042SErik.Nordmark@Sun.COM int error; 143*11042SErik.Nordmark@Sun.COM 144*11042SErik.Nordmark@Sun.COM /* We defer ipIfStatsHCOutRequests until an error or we have an ill */ 145*11042SErik.Nordmark@Sun.COM 146*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_ire != NULL); 147*11042SErik.Nordmark@Sun.COM /* Note there is no ixa_nce when reject and blackhole routes */ 148*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_dce != NULL); /* Could be default dce */ 149*11042SErik.Nordmark@Sun.COM 150*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 151*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_curthread == NULL); 152*11042SErik.Nordmark@Sun.COM ixa->ixa_curthread = curthread; 153*11042SErik.Nordmark@Sun.COM #endif 154*11042SErik.Nordmark@Sun.COM 155*11042SErik.Nordmark@Sun.COM /* 156*11042SErik.Nordmark@Sun.COM * Even on labeled systems we can have a NULL ixa_tsl e.g., 157*11042SErik.Nordmark@Sun.COM * for IGMP/MLD traffic. 158*11042SErik.Nordmark@Sun.COM */ 159*11042SErik.Nordmark@Sun.COM 160*11042SErik.Nordmark@Sun.COM ire = ixa->ixa_ire; 161*11042SErik.Nordmark@Sun.COM 162*11042SErik.Nordmark@Sun.COM /* 163*11042SErik.Nordmark@Sun.COM * If the ULP says the (old) IRE resulted in reachability we 164*11042SErik.Nordmark@Sun.COM * record this before determine whether to use a new IRE. 165*11042SErik.Nordmark@Sun.COM * No locking for performance reasons. 166*11042SErik.Nordmark@Sun.COM */ 167*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_REACH_CONF) 168*11042SErik.Nordmark@Sun.COM ire->ire_badcnt = 0; 169*11042SErik.Nordmark@Sun.COM 170*11042SErik.Nordmark@Sun.COM /* 171*11042SErik.Nordmark@Sun.COM * Has routing changed since we cached the results of the lookup? 172*11042SErik.Nordmark@Sun.COM * 173*11042SErik.Nordmark@Sun.COM * This check captures all of: 174*11042SErik.Nordmark@Sun.COM * - the cached ire being deleted (by means of the special 175*11042SErik.Nordmark@Sun.COM * IRE_GENERATION_CONDEMNED) 176*11042SErik.Nordmark@Sun.COM * - A potentially better ire being added (ire_generation being 177*11042SErik.Nordmark@Sun.COM * increased) 178*11042SErik.Nordmark@Sun.COM * - A deletion of the nexthop ire that was used when we did the 179*11042SErik.Nordmark@Sun.COM * lookup. 180*11042SErik.Nordmark@Sun.COM * - An addition of a potentially better nexthop ire. 181*11042SErik.Nordmark@Sun.COM * The last two are handled by walking and increasing the generation 182*11042SErik.Nordmark@Sun.COM * number on all dependant IREs in ire_flush_cache(). 183*11042SErik.Nordmark@Sun.COM * 184*11042SErik.Nordmark@Sun.COM * The check also handles all cases of RTF_REJECT and RTF_BLACKHOLE 185*11042SErik.Nordmark@Sun.COM * since we ensure that each time we set ixa_ire to such an IRE we 186*11042SErik.Nordmark@Sun.COM * make sure the ixa_ire_generation does not match (by using 187*11042SErik.Nordmark@Sun.COM * IRE_GENERATION_VERIFY). 188*11042SErik.Nordmark@Sun.COM */ 189*11042SErik.Nordmark@Sun.COM if (ire->ire_generation != ixa->ixa_ire_generation) { 190*11042SErik.Nordmark@Sun.COM error = ip_verify_ire(mp, ixa); 191*11042SErik.Nordmark@Sun.COM if (error != 0) { 192*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - verify ire", 193*11042SErik.Nordmark@Sun.COM mp, NULL); 194*11042SErik.Nordmark@Sun.COM goto drop; 195*11042SErik.Nordmark@Sun.COM } 196*11042SErik.Nordmark@Sun.COM ire = ixa->ixa_ire; 197*11042SErik.Nordmark@Sun.COM ASSERT(ire != NULL); 198*11042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 199*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 200*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_curthread == curthread); 201*11042SErik.Nordmark@Sun.COM ixa->ixa_curthread = NULL; 202*11042SErik.Nordmark@Sun.COM #endif 203*11042SErik.Nordmark@Sun.COM ire->ire_ob_pkt_count++; 204*11042SErik.Nordmark@Sun.COM /* ixa_dce might be condemned; use default one */ 205*11042SErik.Nordmark@Sun.COM return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, ixa, 206*11042SErik.Nordmark@Sun.COM &ipst->ips_dce_default->dce_ident)); 207*11042SErik.Nordmark@Sun.COM } 208*11042SErik.Nordmark@Sun.COM /* 209*11042SErik.Nordmark@Sun.COM * If the ncec changed then ip_verify_ire already set 210*11042SErik.Nordmark@Sun.COM * ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 211*11042SErik.Nordmark@Sun.COM * so we can recheck the interface mtu. 212*11042SErik.Nordmark@Sun.COM */ 213*11042SErik.Nordmark@Sun.COM 214*11042SErik.Nordmark@Sun.COM /* 215*11042SErik.Nordmark@Sun.COM * Note that ire->ire_generation could already have changed. 216*11042SErik.Nordmark@Sun.COM * We catch that next time we send a packet. 217*11042SErik.Nordmark@Sun.COM */ 218*11042SErik.Nordmark@Sun.COM } 219*11042SErik.Nordmark@Sun.COM 220*11042SErik.Nordmark@Sun.COM /* 221*11042SErik.Nordmark@Sun.COM * No need to lock access to ixa_nce since the ip_xmit_attr usage 222*11042SErik.Nordmark@Sun.COM * is single threaded. 223*11042SErik.Nordmark@Sun.COM */ 224*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_nce != NULL); 225*11042SErik.Nordmark@Sun.COM nce = ixa->ixa_nce; 226*11042SErik.Nordmark@Sun.COM if (nce->nce_is_condemned) { 227*11042SErik.Nordmark@Sun.COM error = ip_verify_nce(mp, ixa); 228*11042SErik.Nordmark@Sun.COM /* 229*11042SErik.Nordmark@Sun.COM * In case ZEROCOPY capability become not available, we 230*11042SErik.Nordmark@Sun.COM * copy the message and free the original one. We might 231*11042SErik.Nordmark@Sun.COM * be copying more data than needed but it doesn't hurt 232*11042SErik.Nordmark@Sun.COM * since such change rarely happens. 233*11042SErik.Nordmark@Sun.COM */ 234*11042SErik.Nordmark@Sun.COM switch (error) { 235*11042SErik.Nordmark@Sun.COM case 0: 236*11042SErik.Nordmark@Sun.COM break; 237*11042SErik.Nordmark@Sun.COM case ENOTSUP: { /* ZEROCOPY */ 238*11042SErik.Nordmark@Sun.COM mblk_t *nmp; 239*11042SErik.Nordmark@Sun.COM 240*11042SErik.Nordmark@Sun.COM if ((nmp = copymsg(mp)) != NULL) { 241*11042SErik.Nordmark@Sun.COM freemsg(mp); 242*11042SErik.Nordmark@Sun.COM mp = nmp; 243*11042SErik.Nordmark@Sun.COM 244*11042SErik.Nordmark@Sun.COM break; 245*11042SErik.Nordmark@Sun.COM } 246*11042SErik.Nordmark@Sun.COM /* FALLTHROUGH */ 247*11042SErik.Nordmark@Sun.COM } 248*11042SErik.Nordmark@Sun.COM default: 249*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - verify nce", 250*11042SErik.Nordmark@Sun.COM mp, NULL); 251*11042SErik.Nordmark@Sun.COM goto drop; 252*11042SErik.Nordmark@Sun.COM } 253*11042SErik.Nordmark@Sun.COM ire = ixa->ixa_ire; 254*11042SErik.Nordmark@Sun.COM ASSERT(ire != NULL); 255*11042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 256*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 257*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_curthread == curthread); 258*11042SErik.Nordmark@Sun.COM ixa->ixa_curthread = NULL; 259*11042SErik.Nordmark@Sun.COM #endif 260*11042SErik.Nordmark@Sun.COM ire->ire_ob_pkt_count++; 261*11042SErik.Nordmark@Sun.COM /* ixa_dce might be condemned; use default one */ 262*11042SErik.Nordmark@Sun.COM return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, 263*11042SErik.Nordmark@Sun.COM ixa, &ipst->ips_dce_default->dce_ident)); 264*11042SErik.Nordmark@Sun.COM } 265*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_nce != NULL); 266*11042SErik.Nordmark@Sun.COM nce = ixa->ixa_nce; 267*11042SErik.Nordmark@Sun.COM 268*11042SErik.Nordmark@Sun.COM /* 269*11042SErik.Nordmark@Sun.COM * Note that some other event could already have made 270*11042SErik.Nordmark@Sun.COM * the new nce condemned. We catch that next time we 271*11042SErik.Nordmark@Sun.COM * try to send a packet. 272*11042SErik.Nordmark@Sun.COM */ 273*11042SErik.Nordmark@Sun.COM } 274*11042SErik.Nordmark@Sun.COM /* 275*11042SErik.Nordmark@Sun.COM * If there is no per-destination dce_t then we have a reference to 276*11042SErik.Nordmark@Sun.COM * the default dce_t (which merely contains the dce_ipid). 277*11042SErik.Nordmark@Sun.COM * The generation check captures both the introduction of a 278*11042SErik.Nordmark@Sun.COM * per-destination dce_t (e.g., due to ICMP packet too big) and 279*11042SErik.Nordmark@Sun.COM * any change to the per-destination dce (including it becoming 280*11042SErik.Nordmark@Sun.COM * condemned by use of the special DCE_GENERATION_CONDEMNED). 281*11042SErik.Nordmark@Sun.COM */ 282*11042SErik.Nordmark@Sun.COM dce = ixa->ixa_dce; 283*11042SErik.Nordmark@Sun.COM 284*11042SErik.Nordmark@Sun.COM /* 285*11042SErik.Nordmark@Sun.COM * To avoid a periodic timer to increase the path MTU we 286*11042SErik.Nordmark@Sun.COM * look at dce_last_change_time each time we send a packet. 287*11042SErik.Nordmark@Sun.COM */ 288*11042SErik.Nordmark@Sun.COM if ((dce->dce_flags & DCEF_PMTU) && 289*11042SErik.Nordmark@Sun.COM (TICK_TO_SEC(lbolt64) - dce->dce_last_change_time > 290*11042SErik.Nordmark@Sun.COM ipst->ips_ip_pathmtu_interval)) { 291*11042SErik.Nordmark@Sun.COM /* 292*11042SErik.Nordmark@Sun.COM * Older than 20 minutes. Drop the path MTU information. 293*11042SErik.Nordmark@Sun.COM * Since the path MTU changes as a result of this, twiddle 294*11042SErik.Nordmark@Sun.COM * ixa_dce_generation to make us go through the dce 295*11042SErik.Nordmark@Sun.COM * verification code in conn_ip_output. 296*11042SErik.Nordmark@Sun.COM */ 297*11042SErik.Nordmark@Sun.COM mutex_enter(&dce->dce_lock); 298*11042SErik.Nordmark@Sun.COM dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU); 299*11042SErik.Nordmark@Sun.COM dce->dce_last_change_time = TICK_TO_SEC(lbolt64); 300*11042SErik.Nordmark@Sun.COM mutex_exit(&dce->dce_lock); 301*11042SErik.Nordmark@Sun.COM dce_increment_generation(dce); 302*11042SErik.Nordmark@Sun.COM } 303*11042SErik.Nordmark@Sun.COM 304*11042SErik.Nordmark@Sun.COM if (dce->dce_generation != ixa->ixa_dce_generation) { 305*11042SErik.Nordmark@Sun.COM error = ip_verify_dce(mp, ixa); 306*11042SErik.Nordmark@Sun.COM if (error != 0) { 307*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - verify dce", 308*11042SErik.Nordmark@Sun.COM mp, NULL); 309*11042SErik.Nordmark@Sun.COM goto drop; 310*11042SErik.Nordmark@Sun.COM } 311*11042SErik.Nordmark@Sun.COM dce = ixa->ixa_dce; 312*11042SErik.Nordmark@Sun.COM 313*11042SErik.Nordmark@Sun.COM /* 314*11042SErik.Nordmark@Sun.COM * Note that some other event could already have made the 315*11042SErik.Nordmark@Sun.COM * new dce's generation number change. 316*11042SErik.Nordmark@Sun.COM * We catch that next time we try to send a packet. 317*11042SErik.Nordmark@Sun.COM */ 318*11042SErik.Nordmark@Sun.COM } 319*11042SErik.Nordmark@Sun.COM 320*11042SErik.Nordmark@Sun.COM ill = nce->nce_ill; 321*11042SErik.Nordmark@Sun.COM 322*11042SErik.Nordmark@Sun.COM /* 323*11042SErik.Nordmark@Sun.COM * An initial ixa_fragsize was set in ip_set_destination 324*11042SErik.Nordmark@Sun.COM * and we update it if any routing changes above. 325*11042SErik.Nordmark@Sun.COM * A change to ill_mtu with ifconfig will increase all dce_generation 326*11042SErik.Nordmark@Sun.COM * so that we will detect that with the generation check. 327*11042SErik.Nordmark@Sun.COM */ 328*11042SErik.Nordmark@Sun.COM 329*11042SErik.Nordmark@Sun.COM /* 330*11042SErik.Nordmark@Sun.COM * Caller needs to make sure IXAF_VERIFY_SRC is not set if 331*11042SErik.Nordmark@Sun.COM * conn_unspec_src. 332*11042SErik.Nordmark@Sun.COM */ 333*11042SErik.Nordmark@Sun.COM if ((ixaflags & IXAF_VERIFY_SOURCE) && 334*11042SErik.Nordmark@Sun.COM ixa->ixa_src_generation != ipst->ips_src_generation) { 335*11042SErik.Nordmark@Sun.COM /* Check if the IP source is still assigned to the host. */ 336*11042SErik.Nordmark@Sun.COM uint_t gen; 337*11042SErik.Nordmark@Sun.COM 338*11042SErik.Nordmark@Sun.COM if (!ip_verify_src(mp, ixa, &gen)) { 339*11042SErik.Nordmark@Sun.COM /* Don't send a packet with a source that isn't ours */ 340*11042SErik.Nordmark@Sun.COM error = EADDRNOTAVAIL; 341*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - invalid src", 342*11042SErik.Nordmark@Sun.COM mp, NULL); 343*11042SErik.Nordmark@Sun.COM goto drop; 344*11042SErik.Nordmark@Sun.COM } 345*11042SErik.Nordmark@Sun.COM /* The source is still valid - update the generation number */ 346*11042SErik.Nordmark@Sun.COM ixa->ixa_src_generation = gen; 347*11042SErik.Nordmark@Sun.COM } 348*11042SErik.Nordmark@Sun.COM 349*11042SErik.Nordmark@Sun.COM /* 350*11042SErik.Nordmark@Sun.COM * We don't have an IRE when we fragment, hence ire_ob_pkt_count 351*11042SErik.Nordmark@Sun.COM * can only count the use prior to fragmentation. However the MIB 352*11042SErik.Nordmark@Sun.COM * counters on the ill will be incremented in post fragmentation. 353*11042SErik.Nordmark@Sun.COM */ 354*11042SErik.Nordmark@Sun.COM ire->ire_ob_pkt_count++; 355*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 356*11042SErik.Nordmark@Sun.COM 357*11042SErik.Nordmark@Sun.COM /* 358*11042SErik.Nordmark@Sun.COM * Based on ire_type and ire_flags call one of: 359*11042SErik.Nordmark@Sun.COM * ire_send_local_v* - for IRE_LOCAL and IRE_LOOPBACK 360*11042SErik.Nordmark@Sun.COM * ire_send_multirt_v* - if RTF_MULTIRT 361*11042SErik.Nordmark@Sun.COM * ire_send_noroute_v* - if RTF_REJECT or RTF_BLACHOLE 362*11042SErik.Nordmark@Sun.COM * ire_send_multicast_v* - for IRE_MULTICAST 363*11042SErik.Nordmark@Sun.COM * ire_send_broadcast_v4 - for IRE_BROADCAST 364*11042SErik.Nordmark@Sun.COM * ire_send_wire_v* - for the rest. 365*11042SErik.Nordmark@Sun.COM */ 366*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 367*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_curthread == curthread); 368*11042SErik.Nordmark@Sun.COM ixa->ixa_curthread = NULL; 369*11042SErik.Nordmark@Sun.COM #endif 370*11042SErik.Nordmark@Sun.COM return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, ixa, &dce->dce_ident)); 371*11042SErik.Nordmark@Sun.COM 372*11042SErik.Nordmark@Sun.COM drop: 373*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_IS_IPV4) { 374*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 375*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 376*11042SErik.Nordmark@Sun.COM } else { 377*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsHCOutRequests); 378*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 379*11042SErik.Nordmark@Sun.COM } 380*11042SErik.Nordmark@Sun.COM freemsg(mp); 381*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 382*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_curthread == curthread); 383*11042SErik.Nordmark@Sun.COM ixa->ixa_curthread = NULL; 384*11042SErik.Nordmark@Sun.COM #endif 385*11042SErik.Nordmark@Sun.COM return (error); 386*11042SErik.Nordmark@Sun.COM } 387*11042SErik.Nordmark@Sun.COM 388*11042SErik.Nordmark@Sun.COM /* 389*11042SErik.Nordmark@Sun.COM * Handle both IPv4 and IPv6. Sets the generation number 390*11042SErik.Nordmark@Sun.COM * to allow the caller to know when to call us again. 391*11042SErik.Nordmark@Sun.COM * Returns true if the source address in the packet is a valid source. 392*11042SErik.Nordmark@Sun.COM * We handle callers which try to send with a zero address (since we only 393*11042SErik.Nordmark@Sun.COM * get here if UNSPEC_SRC is not set). 394*11042SErik.Nordmark@Sun.COM */ 395*11042SErik.Nordmark@Sun.COM boolean_t 396*11042SErik.Nordmark@Sun.COM ip_verify_src(mblk_t *mp, ip_xmit_attr_t *ixa, uint_t *generationp) 397*11042SErik.Nordmark@Sun.COM { 398*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 399*11042SErik.Nordmark@Sun.COM 400*11042SErik.Nordmark@Sun.COM /* 401*11042SErik.Nordmark@Sun.COM * Need to grab the generation number before we check to 402*11042SErik.Nordmark@Sun.COM * avoid a race with a change to the set of local addresses. 403*11042SErik.Nordmark@Sun.COM * No lock needed since the thread which updates the set of local 404*11042SErik.Nordmark@Sun.COM * addresses use ipif/ill locks and exit those (hence a store memory 405*11042SErik.Nordmark@Sun.COM * barrier) before doing the atomic increase of ips_src_generation. 406*11042SErik.Nordmark@Sun.COM */ 407*11042SErik.Nordmark@Sun.COM if (generationp != NULL) 408*11042SErik.Nordmark@Sun.COM *generationp = ipst->ips_src_generation; 409*11042SErik.Nordmark@Sun.COM 410*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_IS_IPV4) { 411*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)mp->b_rptr; 412*11042SErik.Nordmark@Sun.COM 413*11042SErik.Nordmark@Sun.COM if (ipha->ipha_src == INADDR_ANY) 414*11042SErik.Nordmark@Sun.COM return (B_FALSE); 415*11042SErik.Nordmark@Sun.COM 416*11042SErik.Nordmark@Sun.COM return (ip_laddr_verify_v4(ipha->ipha_src, ixa->ixa_zoneid, 417*11042SErik.Nordmark@Sun.COM ipst, B_FALSE) != IPVL_BAD); 418*11042SErik.Nordmark@Sun.COM } else { 419*11042SErik.Nordmark@Sun.COM ip6_t *ip6h = (ip6_t *)mp->b_rptr; 420*11042SErik.Nordmark@Sun.COM uint_t scopeid; 421*11042SErik.Nordmark@Sun.COM 422*11042SErik.Nordmark@Sun.COM if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) 423*11042SErik.Nordmark@Sun.COM return (B_FALSE); 424*11042SErik.Nordmark@Sun.COM 425*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_SCOPEID_SET) 426*11042SErik.Nordmark@Sun.COM scopeid = ixa->ixa_scopeid; 427*11042SErik.Nordmark@Sun.COM else 428*11042SErik.Nordmark@Sun.COM scopeid = 0; 429*11042SErik.Nordmark@Sun.COM 430*11042SErik.Nordmark@Sun.COM return (ip_laddr_verify_v6(&ip6h->ip6_src, ixa->ixa_zoneid, 431*11042SErik.Nordmark@Sun.COM ipst, B_FALSE, scopeid) != IPVL_BAD); 432*11042SErik.Nordmark@Sun.COM } 433*11042SErik.Nordmark@Sun.COM } 434*11042SErik.Nordmark@Sun.COM 435*11042SErik.Nordmark@Sun.COM /* 436*11042SErik.Nordmark@Sun.COM * Handle both IPv4 and IPv6. Reverify/recalculate the IRE to use. 437*11042SErik.Nordmark@Sun.COM */ 438*11042SErik.Nordmark@Sun.COM int 439*11042SErik.Nordmark@Sun.COM ip_verify_ire(mblk_t *mp, ip_xmit_attr_t *ixa) 440*11042SErik.Nordmark@Sun.COM { 441*11042SErik.Nordmark@Sun.COM uint_t gen; 442*11042SErik.Nordmark@Sun.COM ire_t *ire; 443*11042SErik.Nordmark@Sun.COM nce_t *nce; 444*11042SErik.Nordmark@Sun.COM int error; 445*11042SErik.Nordmark@Sun.COM boolean_t multirt = B_FALSE; 446*11042SErik.Nordmark@Sun.COM 447*11042SErik.Nordmark@Sun.COM /* 448*11042SErik.Nordmark@Sun.COM * Redo ip_select_route. 449*11042SErik.Nordmark@Sun.COM * Need to grab generation number as part of the lookup to 450*11042SErik.Nordmark@Sun.COM * avoid race. 451*11042SErik.Nordmark@Sun.COM */ 452*11042SErik.Nordmark@Sun.COM error = 0; 453*11042SErik.Nordmark@Sun.COM ire = ip_select_route_pkt(mp, ixa, &gen, &error, &multirt); 454*11042SErik.Nordmark@Sun.COM ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 455*11042SErik.Nordmark@Sun.COM if (error != 0) { 456*11042SErik.Nordmark@Sun.COM ire_refrele(ire); 457*11042SErik.Nordmark@Sun.COM return (error); 458*11042SErik.Nordmark@Sun.COM } 459*11042SErik.Nordmark@Sun.COM 460*11042SErik.Nordmark@Sun.COM if (ixa->ixa_ire != NULL) 461*11042SErik.Nordmark@Sun.COM ire_refrele_notr(ixa->ixa_ire); 462*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 463*11042SErik.Nordmark@Sun.COM ire_refhold_notr(ire); 464*11042SErik.Nordmark@Sun.COM ire_refrele(ire); 465*11042SErik.Nordmark@Sun.COM #endif 466*11042SErik.Nordmark@Sun.COM ixa->ixa_ire = ire; 467*11042SErik.Nordmark@Sun.COM ixa->ixa_ire_generation = gen; 468*11042SErik.Nordmark@Sun.COM if (multirt) { 469*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_IS_IPV4) 470*11042SErik.Nordmark@Sun.COM ixa->ixa_postfragfn = ip_postfrag_multirt_v4; 471*11042SErik.Nordmark@Sun.COM else 472*11042SErik.Nordmark@Sun.COM ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 473*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 474*11042SErik.Nordmark@Sun.COM } else { 475*11042SErik.Nordmark@Sun.COM ixa->ixa_postfragfn = ire->ire_postfragfn; 476*11042SErik.Nordmark@Sun.COM ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 477*11042SErik.Nordmark@Sun.COM } 478*11042SErik.Nordmark@Sun.COM 479*11042SErik.Nordmark@Sun.COM /* 480*11042SErik.Nordmark@Sun.COM * Don't look for an nce for reject or blackhole. 481*11042SErik.Nordmark@Sun.COM * They have ire_generation set to IRE_GENERATION_VERIFY which 482*11042SErik.Nordmark@Sun.COM * makes conn_ip_output avoid references to ixa_nce. 483*11042SErik.Nordmark@Sun.COM */ 484*11042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 485*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_ire_generation == IRE_GENERATION_VERIFY); 486*11042SErik.Nordmark@Sun.COM ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 487*11042SErik.Nordmark@Sun.COM return (0); 488*11042SErik.Nordmark@Sun.COM } 489*11042SErik.Nordmark@Sun.COM 490*11042SErik.Nordmark@Sun.COM /* The NCE could now be different */ 491*11042SErik.Nordmark@Sun.COM nce = ire_to_nce_pkt(ire, mp); 492*11042SErik.Nordmark@Sun.COM if (nce == NULL) { 493*11042SErik.Nordmark@Sun.COM /* 494*11042SErik.Nordmark@Sun.COM * Allocation failure. Make sure we redo ire/nce selection 495*11042SErik.Nordmark@Sun.COM * next time we send. 496*11042SErik.Nordmark@Sun.COM */ 497*11042SErik.Nordmark@Sun.COM ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 498*11042SErik.Nordmark@Sun.COM ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 499*11042SErik.Nordmark@Sun.COM return (ENOBUFS); 500*11042SErik.Nordmark@Sun.COM } 501*11042SErik.Nordmark@Sun.COM if (nce == ixa->ixa_nce) { 502*11042SErik.Nordmark@Sun.COM /* No change */ 503*11042SErik.Nordmark@Sun.COM nce_refrele(nce); 504*11042SErik.Nordmark@Sun.COM return (0); 505*11042SErik.Nordmark@Sun.COM } 506*11042SErik.Nordmark@Sun.COM 507*11042SErik.Nordmark@Sun.COM /* 508*11042SErik.Nordmark@Sun.COM * Since the path MTU might change as a result of this 509*11042SErik.Nordmark@Sun.COM * route change, we twiddle ixa_dce_generation to 510*11042SErik.Nordmark@Sun.COM * make conn_ip_output go through the ip_verify_dce code. 511*11042SErik.Nordmark@Sun.COM */ 512*11042SErik.Nordmark@Sun.COM ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 513*11042SErik.Nordmark@Sun.COM 514*11042SErik.Nordmark@Sun.COM if (ixa->ixa_nce != NULL) 515*11042SErik.Nordmark@Sun.COM nce_refrele(ixa->ixa_nce); 516*11042SErik.Nordmark@Sun.COM ixa->ixa_nce = nce; 517*11042SErik.Nordmark@Sun.COM return (0); 518*11042SErik.Nordmark@Sun.COM } 519*11042SErik.Nordmark@Sun.COM 520*11042SErik.Nordmark@Sun.COM /* 521*11042SErik.Nordmark@Sun.COM * Handle both IPv4 and IPv6. Reverify/recalculate the NCE to use. 522*11042SErik.Nordmark@Sun.COM */ 523*11042SErik.Nordmark@Sun.COM static int 524*11042SErik.Nordmark@Sun.COM ip_verify_nce(mblk_t *mp, ip_xmit_attr_t *ixa) 525*11042SErik.Nordmark@Sun.COM { 526*11042SErik.Nordmark@Sun.COM ire_t *ire = ixa->ixa_ire; 527*11042SErik.Nordmark@Sun.COM nce_t *nce; 528*11042SErik.Nordmark@Sun.COM int error = 0; 529*11042SErik.Nordmark@Sun.COM ipha_t *ipha = NULL; 530*11042SErik.Nordmark@Sun.COM ip6_t *ip6h = NULL; 531*11042SErik.Nordmark@Sun.COM 532*11042SErik.Nordmark@Sun.COM if (ire->ire_ipversion == IPV4_VERSION) 533*11042SErik.Nordmark@Sun.COM ipha = (ipha_t *)mp->b_rptr; 534*11042SErik.Nordmark@Sun.COM else 535*11042SErik.Nordmark@Sun.COM ip6h = (ip6_t *)mp->b_rptr; 536*11042SErik.Nordmark@Sun.COM 537*11042SErik.Nordmark@Sun.COM nce = ire_handle_condemned_nce(ixa->ixa_nce, ire, ipha, ip6h, B_TRUE); 538*11042SErik.Nordmark@Sun.COM if (nce == NULL) { 539*11042SErik.Nordmark@Sun.COM /* Try to find a better ire */ 540*11042SErik.Nordmark@Sun.COM return (ip_verify_ire(mp, ixa)); 541*11042SErik.Nordmark@Sun.COM } 542*11042SErik.Nordmark@Sun.COM 543*11042SErik.Nordmark@Sun.COM /* 544*11042SErik.Nordmark@Sun.COM * The hardware offloading capabilities, for example LSO, of the 545*11042SErik.Nordmark@Sun.COM * interface might have changed, so do sanity verification here. 546*11042SErik.Nordmark@Sun.COM */ 547*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_VERIFY_LSO) { 548*11042SErik.Nordmark@Sun.COM if (!ip_verify_lso(nce->nce_ill, ixa)) { 549*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_notify != NULL); 550*11042SErik.Nordmark@Sun.COM ixa->ixa_notify(ixa->ixa_notify_cookie, ixa, 551*11042SErik.Nordmark@Sun.COM IXAN_LSO, 0); 552*11042SErik.Nordmark@Sun.COM error = ENOTSUP; 553*11042SErik.Nordmark@Sun.COM } 554*11042SErik.Nordmark@Sun.COM } 555*11042SErik.Nordmark@Sun.COM 556*11042SErik.Nordmark@Sun.COM /* 557*11042SErik.Nordmark@Sun.COM * Verify ZEROCOPY capability of underlying ill. Notify the ULP with 558*11042SErik.Nordmark@Sun.COM * any ZEROCOPY changes. In case ZEROCOPY capability is not available 559*11042SErik.Nordmark@Sun.COM * any more, return error so that conn_ip_output() can take care of 560*11042SErik.Nordmark@Sun.COM * the ZEROCOPY message properly. It's safe to continue send the 561*11042SErik.Nordmark@Sun.COM * message when ZEROCOPY newly become available. 562*11042SErik.Nordmark@Sun.COM */ 563*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_VERIFY_ZCOPY) { 564*11042SErik.Nordmark@Sun.COM if (!ip_verify_zcopy(nce->nce_ill, ixa)) { 565*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_notify != NULL); 566*11042SErik.Nordmark@Sun.COM ixa->ixa_notify(ixa->ixa_notify_cookie, ixa, 567*11042SErik.Nordmark@Sun.COM IXAN_ZCOPY, 0); 568*11042SErik.Nordmark@Sun.COM if ((ixa->ixa_flags & IXAF_ZCOPY_CAPAB) == 0) 569*11042SErik.Nordmark@Sun.COM error = ENOTSUP; 570*11042SErik.Nordmark@Sun.COM } 571*11042SErik.Nordmark@Sun.COM } 572*11042SErik.Nordmark@Sun.COM 573*11042SErik.Nordmark@Sun.COM /* 574*11042SErik.Nordmark@Sun.COM * Since the path MTU might change as a result of this 575*11042SErik.Nordmark@Sun.COM * change, we twiddle ixa_dce_generation to 576*11042SErik.Nordmark@Sun.COM * make conn_ip_output go through the ip_verify_dce code. 577*11042SErik.Nordmark@Sun.COM */ 578*11042SErik.Nordmark@Sun.COM ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 579*11042SErik.Nordmark@Sun.COM 580*11042SErik.Nordmark@Sun.COM nce_refrele(ixa->ixa_nce); 581*11042SErik.Nordmark@Sun.COM ixa->ixa_nce = nce; 582*11042SErik.Nordmark@Sun.COM return (error); 583*11042SErik.Nordmark@Sun.COM } 584*11042SErik.Nordmark@Sun.COM 585*11042SErik.Nordmark@Sun.COM /* 586*11042SErik.Nordmark@Sun.COM * Handle both IPv4 and IPv6. Reverify/recalculate the DCE to use. 587*11042SErik.Nordmark@Sun.COM */ 588*11042SErik.Nordmark@Sun.COM static int 589*11042SErik.Nordmark@Sun.COM ip_verify_dce(mblk_t *mp, ip_xmit_attr_t *ixa) 590*11042SErik.Nordmark@Sun.COM { 591*11042SErik.Nordmark@Sun.COM dce_t *dce; 592*11042SErik.Nordmark@Sun.COM uint_t gen; 593*11042SErik.Nordmark@Sun.COM uint_t pmtu; 594*11042SErik.Nordmark@Sun.COM 595*11042SErik.Nordmark@Sun.COM dce = dce_lookup_pkt(mp, ixa, &gen); 596*11042SErik.Nordmark@Sun.COM ASSERT(dce != NULL); 597*11042SErik.Nordmark@Sun.COM 598*11042SErik.Nordmark@Sun.COM dce_refrele_notr(ixa->ixa_dce); 599*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 600*11042SErik.Nordmark@Sun.COM dce_refhold_notr(dce); 601*11042SErik.Nordmark@Sun.COM dce_refrele(dce); 602*11042SErik.Nordmark@Sun.COM #endif 603*11042SErik.Nordmark@Sun.COM ixa->ixa_dce = dce; 604*11042SErik.Nordmark@Sun.COM ixa->ixa_dce_generation = gen; 605*11042SErik.Nordmark@Sun.COM 606*11042SErik.Nordmark@Sun.COM /* Extract the (path) mtu from the dce, ncec_ill etc */ 607*11042SErik.Nordmark@Sun.COM pmtu = ip_get_pmtu(ixa); 608*11042SErik.Nordmark@Sun.COM 609*11042SErik.Nordmark@Sun.COM /* 610*11042SErik.Nordmark@Sun.COM * Tell ULP about PMTU changes - increase or decrease - by returning 611*11042SErik.Nordmark@Sun.COM * an error if IXAF_VERIFY_PMTU is set. In such case, ULP should update 612*11042SErik.Nordmark@Sun.COM * both ixa_pmtu and ixa_fragsize appropriately. 613*11042SErik.Nordmark@Sun.COM * 614*11042SErik.Nordmark@Sun.COM * If ULP doesn't set that flag then we need to update ixa_fragsize 615*11042SErik.Nordmark@Sun.COM * since routing could have changed the ill after after ixa_fragsize 616*11042SErik.Nordmark@Sun.COM * was set previously in the conn_ip_output path or in 617*11042SErik.Nordmark@Sun.COM * ip_set_destination. 618*11042SErik.Nordmark@Sun.COM * 619*11042SErik.Nordmark@Sun.COM * In case of LSO, ixa_fragsize might be greater than ixa_pmtu. 620*11042SErik.Nordmark@Sun.COM * 621*11042SErik.Nordmark@Sun.COM * In the case of a path MTU increase we send the packet after the 622*11042SErik.Nordmark@Sun.COM * notify to the ULP. 623*11042SErik.Nordmark@Sun.COM */ 624*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_VERIFY_PMTU) { 625*11042SErik.Nordmark@Sun.COM if (ixa->ixa_pmtu != pmtu) { 626*11042SErik.Nordmark@Sun.COM uint_t oldmtu = ixa->ixa_pmtu; 627*11042SErik.Nordmark@Sun.COM 628*11042SErik.Nordmark@Sun.COM DTRACE_PROBE2(verify_pmtu, uint32_t, pmtu, 629*11042SErik.Nordmark@Sun.COM uint32_t, ixa->ixa_pmtu); 630*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_notify != NULL); 631*11042SErik.Nordmark@Sun.COM ixa->ixa_notify(ixa->ixa_notify_cookie, ixa, 632*11042SErik.Nordmark@Sun.COM IXAN_PMTU, pmtu); 633*11042SErik.Nordmark@Sun.COM if (pmtu < oldmtu) 634*11042SErik.Nordmark@Sun.COM return (EMSGSIZE); 635*11042SErik.Nordmark@Sun.COM } 636*11042SErik.Nordmark@Sun.COM } else { 637*11042SErik.Nordmark@Sun.COM ixa->ixa_fragsize = pmtu; 638*11042SErik.Nordmark@Sun.COM } 639*11042SErik.Nordmark@Sun.COM return (0); 640*11042SErik.Nordmark@Sun.COM } 641*11042SErik.Nordmark@Sun.COM 642*11042SErik.Nordmark@Sun.COM /* 643*11042SErik.Nordmark@Sun.COM * Verify LSO usability. Keep the return value simple to indicate whether 644*11042SErik.Nordmark@Sun.COM * the LSO capability has changed. Handle both IPv4 and IPv6. 645*11042SErik.Nordmark@Sun.COM */ 646*11042SErik.Nordmark@Sun.COM static boolean_t 647*11042SErik.Nordmark@Sun.COM ip_verify_lso(ill_t *ill, ip_xmit_attr_t *ixa) 648*11042SErik.Nordmark@Sun.COM { 649*11042SErik.Nordmark@Sun.COM ill_lso_capab_t *lsoc = &ixa->ixa_lso_capab; 650*11042SErik.Nordmark@Sun.COM ill_lso_capab_t *new_lsoc = ill->ill_lso_capab; 651*11042SErik.Nordmark@Sun.COM 652*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_LSO_CAPAB) { 653*11042SErik.Nordmark@Sun.COM /* 654*11042SErik.Nordmark@Sun.COM * Not unsable any more. 655*11042SErik.Nordmark@Sun.COM */ 656*11042SErik.Nordmark@Sun.COM if ((ixa->ixa_flags & IXAF_IPSEC_SECURE) || 657*11042SErik.Nordmark@Sun.COM (ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) || 658*11042SErik.Nordmark@Sun.COM (ixa->ixa_ire->ire_flags & RTF_MULTIRT) || 659*11042SErik.Nordmark@Sun.COM ((ixa->ixa_flags & IXAF_IS_IPV4) ? 660*11042SErik.Nordmark@Sun.COM !ILL_LSO_TCP_IPV4_USABLE(ill) : 661*11042SErik.Nordmark@Sun.COM !ILL_LSO_TCP_IPV6_USABLE(ill))) { 662*11042SErik.Nordmark@Sun.COM ixa->ixa_flags &= ~IXAF_LSO_CAPAB; 663*11042SErik.Nordmark@Sun.COM 664*11042SErik.Nordmark@Sun.COM return (B_FALSE); 665*11042SErik.Nordmark@Sun.COM } 666*11042SErik.Nordmark@Sun.COM 667*11042SErik.Nordmark@Sun.COM /* 668*11042SErik.Nordmark@Sun.COM * Capability has changed, refresh the copy in ixa. 669*11042SErik.Nordmark@Sun.COM */ 670*11042SErik.Nordmark@Sun.COM if (lsoc->ill_lso_max != new_lsoc->ill_lso_max) { 671*11042SErik.Nordmark@Sun.COM *lsoc = *new_lsoc; 672*11042SErik.Nordmark@Sun.COM 673*11042SErik.Nordmark@Sun.COM return (B_FALSE); 674*11042SErik.Nordmark@Sun.COM } 675*11042SErik.Nordmark@Sun.COM } else { /* Was not usable */ 676*11042SErik.Nordmark@Sun.COM if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE) && 677*11042SErik.Nordmark@Sun.COM !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) && 678*11042SErik.Nordmark@Sun.COM !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) && 679*11042SErik.Nordmark@Sun.COM ((ixa->ixa_flags & IXAF_IS_IPV4) ? 680*11042SErik.Nordmark@Sun.COM ILL_LSO_TCP_IPV4_USABLE(ill) : 681*11042SErik.Nordmark@Sun.COM ILL_LSO_TCP_IPV6_USABLE(ill))) { 682*11042SErik.Nordmark@Sun.COM *lsoc = *new_lsoc; 683*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_LSO_CAPAB; 684*11042SErik.Nordmark@Sun.COM 685*11042SErik.Nordmark@Sun.COM return (B_FALSE); 686*11042SErik.Nordmark@Sun.COM } 687*11042SErik.Nordmark@Sun.COM } 688*11042SErik.Nordmark@Sun.COM 689*11042SErik.Nordmark@Sun.COM return (B_TRUE); 690*11042SErik.Nordmark@Sun.COM } 691*11042SErik.Nordmark@Sun.COM 692*11042SErik.Nordmark@Sun.COM /* 693*11042SErik.Nordmark@Sun.COM * Verify ZEROCOPY usability. Keep the return value simple to indicate whether 694*11042SErik.Nordmark@Sun.COM * the ZEROCOPY capability has changed. Handle both IPv4 and IPv6. 695*11042SErik.Nordmark@Sun.COM */ 696*11042SErik.Nordmark@Sun.COM static boolean_t 697*11042SErik.Nordmark@Sun.COM ip_verify_zcopy(ill_t *ill, ip_xmit_attr_t *ixa) 698*11042SErik.Nordmark@Sun.COM { 699*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_ZCOPY_CAPAB) { 700*11042SErik.Nordmark@Sun.COM /* 701*11042SErik.Nordmark@Sun.COM * Not unsable any more. 702*11042SErik.Nordmark@Sun.COM */ 703*11042SErik.Nordmark@Sun.COM if ((ixa->ixa_flags & IXAF_IPSEC_SECURE) || 704*11042SErik.Nordmark@Sun.COM (ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) || 705*11042SErik.Nordmark@Sun.COM (ixa->ixa_ire->ire_flags & RTF_MULTIRT) || 706*11042SErik.Nordmark@Sun.COM !ILL_ZCOPY_USABLE(ill)) { 707*11042SErik.Nordmark@Sun.COM ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB; 708*11042SErik.Nordmark@Sun.COM 709*11042SErik.Nordmark@Sun.COM return (B_FALSE); 710*11042SErik.Nordmark@Sun.COM } 711*11042SErik.Nordmark@Sun.COM } else { /* Was not usable */ 712*11042SErik.Nordmark@Sun.COM if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE) && 713*11042SErik.Nordmark@Sun.COM !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) && 714*11042SErik.Nordmark@Sun.COM !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) && 715*11042SErik.Nordmark@Sun.COM ILL_ZCOPY_USABLE(ill)) { 716*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_ZCOPY_CAPAB; 717*11042SErik.Nordmark@Sun.COM 718*11042SErik.Nordmark@Sun.COM return (B_FALSE); 719*11042SErik.Nordmark@Sun.COM } 720*11042SErik.Nordmark@Sun.COM } 721*11042SErik.Nordmark@Sun.COM 722*11042SErik.Nordmark@Sun.COM return (B_TRUE); 723*11042SErik.Nordmark@Sun.COM } 724*11042SErik.Nordmark@Sun.COM 725*11042SErik.Nordmark@Sun.COM 726*11042SErik.Nordmark@Sun.COM /* 727*11042SErik.Nordmark@Sun.COM * When there is no conn_t context, this will send a packet. 728*11042SErik.Nordmark@Sun.COM * The caller must *not* have called conn_connect() or ip_attr_connect() 729*11042SErik.Nordmark@Sun.COM * before calling ip_output_simple(). 730*11042SErik.Nordmark@Sun.COM * Handles IPv4 and IPv6. Returns zero or an errno such as ENETUNREACH. 731*11042SErik.Nordmark@Sun.COM * Honors IXAF_SET_SOURCE. 732*11042SErik.Nordmark@Sun.COM * 733*11042SErik.Nordmark@Sun.COM * We acquire the ire and after calling ire_sendfn we release 734*11042SErik.Nordmark@Sun.COM * the hold on the ire. Ditto for the nce and dce. 735*11042SErik.Nordmark@Sun.COM * 736*11042SErik.Nordmark@Sun.COM * This assumes that the caller has set the following in ip_xmit_attr_t: 737*11042SErik.Nordmark@Sun.COM * ixa_tsl, ixa_zoneid, and ixa_ipst must always be set. 738*11042SErik.Nordmark@Sun.COM * If ixa_ifindex is non-zero it means send out that ill. (If it is 739*11042SErik.Nordmark@Sun.COM * an upper IPMP ill we load balance across the group; if a lower we send 740*11042SErik.Nordmark@Sun.COM * on that lower ill without load balancing.) 741*11042SErik.Nordmark@Sun.COM * IXAF_IS_IPV4 must be set correctly. 742*11042SErik.Nordmark@Sun.COM * If IXAF_IPSEC_SECURE is set then the ixa_ipsec_* fields must be set. 743*11042SErik.Nordmark@Sun.COM * If IXAF_NO_IPSEC is set we'd skip IPsec policy lookup. 744*11042SErik.Nordmark@Sun.COM * If neither of those two are set we do an IPsec policy lookup. 745*11042SErik.Nordmark@Sun.COM * 746*11042SErik.Nordmark@Sun.COM * We handle setting things like 747*11042SErik.Nordmark@Sun.COM * ixa_pktlen 748*11042SErik.Nordmark@Sun.COM * ixa_ip_hdr_length 749*11042SErik.Nordmark@Sun.COM * ixa->ixa_protocol 750*11042SErik.Nordmark@Sun.COM * 751*11042SErik.Nordmark@Sun.COM * The caller may set ixa_xmit_hint, which is used for ECMP selection and 752*11042SErik.Nordmark@Sun.COM * transmit ring selecting in GLD. 753*11042SErik.Nordmark@Sun.COM * 754*11042SErik.Nordmark@Sun.COM * The caller must do an ixa_cleanup() to release any IPsec references 755*11042SErik.Nordmark@Sun.COM * after we return. 756*11042SErik.Nordmark@Sun.COM */ 757*11042SErik.Nordmark@Sun.COM int 758*11042SErik.Nordmark@Sun.COM ip_output_simple(mblk_t *mp, ip_xmit_attr_t *ixa) 759*11042SErik.Nordmark@Sun.COM { 760*11042SErik.Nordmark@Sun.COM ts_label_t *effective_tsl = NULL; 761*11042SErik.Nordmark@Sun.COM int err; 762*11042SErik.Nordmark@Sun.COM 763*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_ipst != NULL); 764*11042SErik.Nordmark@Sun.COM 765*11042SErik.Nordmark@Sun.COM if (is_system_labeled()) { 766*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 767*11042SErik.Nordmark@Sun.COM 768*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_IS_IPV4) { 769*11042SErik.Nordmark@Sun.COM err = tsol_check_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid, 770*11042SErik.Nordmark@Sun.COM &mp, CONN_MAC_DEFAULT, B_FALSE, ixa->ixa_ipst, 771*11042SErik.Nordmark@Sun.COM &effective_tsl); 772*11042SErik.Nordmark@Sun.COM } else { 773*11042SErik.Nordmark@Sun.COM err = tsol_check_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid, 774*11042SErik.Nordmark@Sun.COM &mp, CONN_MAC_DEFAULT, B_FALSE, ixa->ixa_ipst, 775*11042SErik.Nordmark@Sun.COM &effective_tsl); 776*11042SErik.Nordmark@Sun.COM } 777*11042SErik.Nordmark@Sun.COM if (err != 0) { 778*11042SErik.Nordmark@Sun.COM ip2dbg(("tsol_check: label check failed (%d)\n", err)); 779*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 780*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 781*11042SErik.Nordmark@Sun.COM ip_drop_output("tsol_check_label", mp, NULL); 782*11042SErik.Nordmark@Sun.COM freemsg(mp); 783*11042SErik.Nordmark@Sun.COM return (err); 784*11042SErik.Nordmark@Sun.COM } 785*11042SErik.Nordmark@Sun.COM if (effective_tsl != NULL) { 786*11042SErik.Nordmark@Sun.COM /* Update the label */ 787*11042SErik.Nordmark@Sun.COM ip_xmit_attr_replace_tsl(ixa, effective_tsl); 788*11042SErik.Nordmark@Sun.COM } 789*11042SErik.Nordmark@Sun.COM } 790*11042SErik.Nordmark@Sun.COM 791*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_IS_IPV4) 792*11042SErik.Nordmark@Sun.COM return (ip_output_simple_v4(mp, ixa)); 793*11042SErik.Nordmark@Sun.COM else 794*11042SErik.Nordmark@Sun.COM return (ip_output_simple_v6(mp, ixa)); 795*11042SErik.Nordmark@Sun.COM } 796*11042SErik.Nordmark@Sun.COM 797*11042SErik.Nordmark@Sun.COM int 798*11042SErik.Nordmark@Sun.COM ip_output_simple_v4(mblk_t *mp, ip_xmit_attr_t *ixa) 799*11042SErik.Nordmark@Sun.COM { 800*11042SErik.Nordmark@Sun.COM ipha_t *ipha; 801*11042SErik.Nordmark@Sun.COM ipaddr_t firsthop; /* In IP header */ 802*11042SErik.Nordmark@Sun.COM ipaddr_t dst; /* End of source route, or ipha_dst if none */ 803*11042SErik.Nordmark@Sun.COM ire_t *ire; 804*11042SErik.Nordmark@Sun.COM ipaddr_t setsrc; /* RTF_SETSRC */ 805*11042SErik.Nordmark@Sun.COM int error; 806*11042SErik.Nordmark@Sun.COM ill_t *ill = NULL; 807*11042SErik.Nordmark@Sun.COM dce_t *dce = NULL; 808*11042SErik.Nordmark@Sun.COM nce_t *nce; 809*11042SErik.Nordmark@Sun.COM iaflags_t ixaflags = ixa->ixa_flags; 810*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 811*11042SErik.Nordmark@Sun.COM boolean_t repeat = B_FALSE; 812*11042SErik.Nordmark@Sun.COM boolean_t multirt = B_FALSE; 813*11042SErik.Nordmark@Sun.COM 814*11042SErik.Nordmark@Sun.COM ipha = (ipha_t *)mp->b_rptr; 815*11042SErik.Nordmark@Sun.COM ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 816*11042SErik.Nordmark@Sun.COM 817*11042SErik.Nordmark@Sun.COM /* 818*11042SErik.Nordmark@Sun.COM * Even on labeled systems we can have a NULL ixa_tsl e.g., 819*11042SErik.Nordmark@Sun.COM * for IGMP/MLD traffic. 820*11042SErik.Nordmark@Sun.COM */ 821*11042SErik.Nordmark@Sun.COM 822*11042SErik.Nordmark@Sun.COM /* Caller already set flags */ 823*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_flags & IXAF_IS_IPV4); 824*11042SErik.Nordmark@Sun.COM 825*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_nce == NULL); 826*11042SErik.Nordmark@Sun.COM 827*11042SErik.Nordmark@Sun.COM ixa->ixa_pktlen = ntohs(ipha->ipha_length); 828*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_pktlen == msgdsize(mp)); 829*11042SErik.Nordmark@Sun.COM ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(ipha); 830*11042SErik.Nordmark@Sun.COM ixa->ixa_protocol = ipha->ipha_protocol; 831*11042SErik.Nordmark@Sun.COM 832*11042SErik.Nordmark@Sun.COM /* 833*11042SErik.Nordmark@Sun.COM * Assumes that source routed packets have already been massaged by 834*11042SErik.Nordmark@Sun.COM * the ULP (ip_massage_options) and as a result ipha_dst is the next 835*11042SErik.Nordmark@Sun.COM * hop in the source route. The final destination is used for IPsec 836*11042SErik.Nordmark@Sun.COM * policy and DCE lookup. 837*11042SErik.Nordmark@Sun.COM */ 838*11042SErik.Nordmark@Sun.COM firsthop = ipha->ipha_dst; 839*11042SErik.Nordmark@Sun.COM dst = ip_get_dst(ipha); 840*11042SErik.Nordmark@Sun.COM 841*11042SErik.Nordmark@Sun.COM repeat_ire: 842*11042SErik.Nordmark@Sun.COM error = 0; 843*11042SErik.Nordmark@Sun.COM setsrc = INADDR_ANY; 844*11042SErik.Nordmark@Sun.COM ire = ip_select_route_v4(firsthop, ixa, NULL, &setsrc, &error, 845*11042SErik.Nordmark@Sun.COM &multirt); 846*11042SErik.Nordmark@Sun.COM ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 847*11042SErik.Nordmark@Sun.COM if (error != 0) { 848*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 849*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 850*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - select route", mp, NULL); 851*11042SErik.Nordmark@Sun.COM freemsg(mp); 852*11042SErik.Nordmark@Sun.COM goto done; 853*11042SErik.Nordmark@Sun.COM } 854*11042SErik.Nordmark@Sun.COM 855*11042SErik.Nordmark@Sun.COM if (ire->ire_flags & (RTF_BLACKHOLE|RTF_REJECT)) { 856*11042SErik.Nordmark@Sun.COM /* ire_ill might be NULL hence need to skip some code */ 857*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_SET_SOURCE) 858*11042SErik.Nordmark@Sun.COM ipha->ipha_src = htonl(INADDR_LOOPBACK); 859*11042SErik.Nordmark@Sun.COM ixa->ixa_fragsize = IP_MAXPACKET; 860*11042SErik.Nordmark@Sun.COM ill = NULL; 861*11042SErik.Nordmark@Sun.COM nce = NULL; 862*11042SErik.Nordmark@Sun.COM ire->ire_ob_pkt_count++; 863*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 864*11042SErik.Nordmark@Sun.COM /* No dce yet; use default one */ 865*11042SErik.Nordmark@Sun.COM error = (ire->ire_sendfn)(ire, mp, ipha, ixa, 866*11042SErik.Nordmark@Sun.COM &ipst->ips_dce_default->dce_ident); 867*11042SErik.Nordmark@Sun.COM goto done; 868*11042SErik.Nordmark@Sun.COM } 869*11042SErik.Nordmark@Sun.COM 870*11042SErik.Nordmark@Sun.COM /* Note that ipha_dst is only used for IRE_MULTICAST */ 871*11042SErik.Nordmark@Sun.COM nce = ire_to_nce(ire, ipha->ipha_dst, NULL); 872*11042SErik.Nordmark@Sun.COM if (nce == NULL) { 873*11042SErik.Nordmark@Sun.COM /* Allocation failure? */ 874*11042SErik.Nordmark@Sun.COM ip_drop_output("ire_to_nce", mp, ill); 875*11042SErik.Nordmark@Sun.COM freemsg(mp); 876*11042SErik.Nordmark@Sun.COM error = ENOBUFS; 877*11042SErik.Nordmark@Sun.COM goto done; 878*11042SErik.Nordmark@Sun.COM } 879*11042SErik.Nordmark@Sun.COM if (nce->nce_is_condemned) { 880*11042SErik.Nordmark@Sun.COM nce_t *nce1; 881*11042SErik.Nordmark@Sun.COM 882*11042SErik.Nordmark@Sun.COM nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_TRUE); 883*11042SErik.Nordmark@Sun.COM nce_refrele(nce); 884*11042SErik.Nordmark@Sun.COM if (nce1 == NULL) { 885*11042SErik.Nordmark@Sun.COM if (!repeat) { 886*11042SErik.Nordmark@Sun.COM /* Try finding a better IRE */ 887*11042SErik.Nordmark@Sun.COM repeat = B_TRUE; 888*11042SErik.Nordmark@Sun.COM ire_refrele(ire); 889*11042SErik.Nordmark@Sun.COM goto repeat_ire; 890*11042SErik.Nordmark@Sun.COM } 891*11042SErik.Nordmark@Sun.COM /* Tried twice - drop packet */ 892*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 893*11042SErik.Nordmark@Sun.COM ip_drop_output("No nce", mp, ill); 894*11042SErik.Nordmark@Sun.COM freemsg(mp); 895*11042SErik.Nordmark@Sun.COM error = ENOBUFS; 896*11042SErik.Nordmark@Sun.COM goto done; 897*11042SErik.Nordmark@Sun.COM } 898*11042SErik.Nordmark@Sun.COM nce = nce1; 899*11042SErik.Nordmark@Sun.COM } 900*11042SErik.Nordmark@Sun.COM 901*11042SErik.Nordmark@Sun.COM /* 902*11042SErik.Nordmark@Sun.COM * For multicast with multirt we have a flag passed back from 903*11042SErik.Nordmark@Sun.COM * ire_lookup_multi_ill_v4 since we don't have an IRE for each 904*11042SErik.Nordmark@Sun.COM * possible multicast address. 905*11042SErik.Nordmark@Sun.COM * We also need a flag for multicast since we can't check 906*11042SErik.Nordmark@Sun.COM * whether RTF_MULTIRT is set in ixa_ire for multicast. 907*11042SErik.Nordmark@Sun.COM */ 908*11042SErik.Nordmark@Sun.COM if (multirt) { 909*11042SErik.Nordmark@Sun.COM ixa->ixa_postfragfn = ip_postfrag_multirt_v4; 910*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 911*11042SErik.Nordmark@Sun.COM } else { 912*11042SErik.Nordmark@Sun.COM ixa->ixa_postfragfn = ire->ire_postfragfn; 913*11042SErik.Nordmark@Sun.COM ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 914*11042SErik.Nordmark@Sun.COM } 915*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_nce == NULL); 916*11042SErik.Nordmark@Sun.COM ixa->ixa_nce = nce; 917*11042SErik.Nordmark@Sun.COM 918*11042SErik.Nordmark@Sun.COM /* 919*11042SErik.Nordmark@Sun.COM * Check for a dce_t with a path mtu. 920*11042SErik.Nordmark@Sun.COM */ 921*11042SErik.Nordmark@Sun.COM dce = dce_lookup_v4(dst, ipst, NULL); 922*11042SErik.Nordmark@Sun.COM ASSERT(dce != NULL); 923*11042SErik.Nordmark@Sun.COM 924*11042SErik.Nordmark@Sun.COM if (!(ixaflags & IXAF_PMTU_DISCOVERY)) { 925*11042SErik.Nordmark@Sun.COM ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire); 926*11042SErik.Nordmark@Sun.COM } else if (dce->dce_flags & DCEF_PMTU) { 927*11042SErik.Nordmark@Sun.COM /* 928*11042SErik.Nordmark@Sun.COM * To avoid a periodic timer to increase the path MTU we 929*11042SErik.Nordmark@Sun.COM * look at dce_last_change_time each time we send a packet. 930*11042SErik.Nordmark@Sun.COM */ 931*11042SErik.Nordmark@Sun.COM if (TICK_TO_SEC(lbolt64) - dce->dce_last_change_time > 932*11042SErik.Nordmark@Sun.COM ipst->ips_ip_pathmtu_interval) { 933*11042SErik.Nordmark@Sun.COM /* 934*11042SErik.Nordmark@Sun.COM * Older than 20 minutes. Drop the path MTU information. 935*11042SErik.Nordmark@Sun.COM */ 936*11042SErik.Nordmark@Sun.COM mutex_enter(&dce->dce_lock); 937*11042SErik.Nordmark@Sun.COM dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU); 938*11042SErik.Nordmark@Sun.COM dce->dce_last_change_time = TICK_TO_SEC(lbolt64); 939*11042SErik.Nordmark@Sun.COM mutex_exit(&dce->dce_lock); 940*11042SErik.Nordmark@Sun.COM dce_increment_generation(dce); 941*11042SErik.Nordmark@Sun.COM ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire); 942*11042SErik.Nordmark@Sun.COM } else { 943*11042SErik.Nordmark@Sun.COM uint_t fragsize; 944*11042SErik.Nordmark@Sun.COM 945*11042SErik.Nordmark@Sun.COM fragsize = ip_get_base_mtu(nce->nce_ill, ire); 946*11042SErik.Nordmark@Sun.COM if (fragsize > dce->dce_pmtu) 947*11042SErik.Nordmark@Sun.COM fragsize = dce->dce_pmtu; 948*11042SErik.Nordmark@Sun.COM ixa->ixa_fragsize = fragsize; 949*11042SErik.Nordmark@Sun.COM } 950*11042SErik.Nordmark@Sun.COM } else { 951*11042SErik.Nordmark@Sun.COM ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire); 952*11042SErik.Nordmark@Sun.COM } 953*11042SErik.Nordmark@Sun.COM 954*11042SErik.Nordmark@Sun.COM /* 955*11042SErik.Nordmark@Sun.COM * We use use ire_nexthop_ill (and not ncec_ill) to avoid the under ipmp 956*11042SErik.Nordmark@Sun.COM * interface for source address selection. 957*11042SErik.Nordmark@Sun.COM */ 958*11042SErik.Nordmark@Sun.COM ill = ire_nexthop_ill(ire); 959*11042SErik.Nordmark@Sun.COM 960*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_SET_SOURCE) { 961*11042SErik.Nordmark@Sun.COM ipaddr_t src; 962*11042SErik.Nordmark@Sun.COM 963*11042SErik.Nordmark@Sun.COM /* 964*11042SErik.Nordmark@Sun.COM * We use the final destination to get 965*11042SErik.Nordmark@Sun.COM * correct selection for source routed packets 966*11042SErik.Nordmark@Sun.COM */ 967*11042SErik.Nordmark@Sun.COM 968*11042SErik.Nordmark@Sun.COM /* If unreachable we have no ill but need some source */ 969*11042SErik.Nordmark@Sun.COM if (ill == NULL) { 970*11042SErik.Nordmark@Sun.COM src = htonl(INADDR_LOOPBACK); 971*11042SErik.Nordmark@Sun.COM error = 0; 972*11042SErik.Nordmark@Sun.COM } else { 973*11042SErik.Nordmark@Sun.COM error = ip_select_source_v4(ill, setsrc, dst, 974*11042SErik.Nordmark@Sun.COM ixa->ixa_multicast_ifaddr, ixa->ixa_zoneid, ipst, 975*11042SErik.Nordmark@Sun.COM &src, NULL, NULL); 976*11042SErik.Nordmark@Sun.COM } 977*11042SErik.Nordmark@Sun.COM if (error != 0) { 978*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 979*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 980*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - no source", 981*11042SErik.Nordmark@Sun.COM mp, ill); 982*11042SErik.Nordmark@Sun.COM freemsg(mp); 983*11042SErik.Nordmark@Sun.COM goto done; 984*11042SErik.Nordmark@Sun.COM } 985*11042SErik.Nordmark@Sun.COM ipha->ipha_src = src; 986*11042SErik.Nordmark@Sun.COM } else if (ixaflags & IXAF_VERIFY_SOURCE) { 987*11042SErik.Nordmark@Sun.COM /* Check if the IP source is assigned to the host. */ 988*11042SErik.Nordmark@Sun.COM if (!ip_verify_src(mp, ixa, NULL)) { 989*11042SErik.Nordmark@Sun.COM /* Don't send a packet with a source that isn't ours */ 990*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 991*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 992*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - invalid source", 993*11042SErik.Nordmark@Sun.COM mp, ill); 994*11042SErik.Nordmark@Sun.COM freemsg(mp); 995*11042SErik.Nordmark@Sun.COM error = EADDRNOTAVAIL; 996*11042SErik.Nordmark@Sun.COM goto done; 997*11042SErik.Nordmark@Sun.COM } 998*11042SErik.Nordmark@Sun.COM } 999*11042SErik.Nordmark@Sun.COM 1000*11042SErik.Nordmark@Sun.COM 1001*11042SErik.Nordmark@Sun.COM /* 1002*11042SErik.Nordmark@Sun.COM * Check against global IPsec policy to set the AH/ESP attributes. 1003*11042SErik.Nordmark@Sun.COM * IPsec will set IXAF_IPSEC_* and ixa_ipsec_* as appropriate. 1004*11042SErik.Nordmark@Sun.COM */ 1005*11042SErik.Nordmark@Sun.COM if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) { 1006*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_ipsec_policy == NULL); 1007*11042SErik.Nordmark@Sun.COM mp = ip_output_attach_policy(mp, ipha, NULL, NULL, ixa); 1008*11042SErik.Nordmark@Sun.COM if (mp == NULL) { 1009*11042SErik.Nordmark@Sun.COM /* MIB and ip_drop_packet already done */ 1010*11042SErik.Nordmark@Sun.COM return (EHOSTUNREACH); /* IPsec policy failure */ 1011*11042SErik.Nordmark@Sun.COM } 1012*11042SErik.Nordmark@Sun.COM } 1013*11042SErik.Nordmark@Sun.COM 1014*11042SErik.Nordmark@Sun.COM if (ill != NULL) { 1015*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 1016*11042SErik.Nordmark@Sun.COM } else { 1017*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 1018*11042SErik.Nordmark@Sun.COM } 1019*11042SErik.Nordmark@Sun.COM 1020*11042SErik.Nordmark@Sun.COM /* 1021*11042SErik.Nordmark@Sun.COM * We update the statistics on the most specific IRE i.e., the first 1022*11042SErik.Nordmark@Sun.COM * one we found. 1023*11042SErik.Nordmark@Sun.COM * We don't have an IRE when we fragment, hence ire_ob_pkt_count 1024*11042SErik.Nordmark@Sun.COM * can only count the use prior to fragmentation. However the MIB 1025*11042SErik.Nordmark@Sun.COM * counters on the ill will be incremented in post fragmentation. 1026*11042SErik.Nordmark@Sun.COM */ 1027*11042SErik.Nordmark@Sun.COM ire->ire_ob_pkt_count++; 1028*11042SErik.Nordmark@Sun.COM 1029*11042SErik.Nordmark@Sun.COM /* 1030*11042SErik.Nordmark@Sun.COM * Based on ire_type and ire_flags call one of: 1031*11042SErik.Nordmark@Sun.COM * ire_send_local_v4 - for IRE_LOCAL and IRE_LOOPBACK 1032*11042SErik.Nordmark@Sun.COM * ire_send_multirt_v4 - if RTF_MULTIRT 1033*11042SErik.Nordmark@Sun.COM * ire_send_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE 1034*11042SErik.Nordmark@Sun.COM * ire_send_multicast_v4 - for IRE_MULTICAST 1035*11042SErik.Nordmark@Sun.COM * ire_send_broadcast_v4 - for IRE_BROADCAST 1036*11042SErik.Nordmark@Sun.COM * ire_send_wire_v4 - for the rest. 1037*11042SErik.Nordmark@Sun.COM */ 1038*11042SErik.Nordmark@Sun.COM error = (ire->ire_sendfn)(ire, mp, ipha, ixa, &dce->dce_ident); 1039*11042SErik.Nordmark@Sun.COM done: 1040*11042SErik.Nordmark@Sun.COM ire_refrele(ire); 1041*11042SErik.Nordmark@Sun.COM if (dce != NULL) 1042*11042SErik.Nordmark@Sun.COM dce_refrele(dce); 1043*11042SErik.Nordmark@Sun.COM if (ill != NULL) 1044*11042SErik.Nordmark@Sun.COM ill_refrele(ill); 1045*11042SErik.Nordmark@Sun.COM if (ixa->ixa_nce != NULL) 1046*11042SErik.Nordmark@Sun.COM nce_refrele(ixa->ixa_nce); 1047*11042SErik.Nordmark@Sun.COM ixa->ixa_nce = NULL; 1048*11042SErik.Nordmark@Sun.COM return (error); 1049*11042SErik.Nordmark@Sun.COM } 1050*11042SErik.Nordmark@Sun.COM 1051*11042SErik.Nordmark@Sun.COM /* 1052*11042SErik.Nordmark@Sun.COM * ire_sendfn() functions. 1053*11042SErik.Nordmark@Sun.COM * These functions use the following xmit_attr: 1054*11042SErik.Nordmark@Sun.COM * - ixa_fragsize - read to determine whether or not to fragment 1055*11042SErik.Nordmark@Sun.COM * - IXAF_IPSEC_SECURE - to determine whether or not to invoke IPsec 1056*11042SErik.Nordmark@Sun.COM * - ixa_ipsec_* are used inside IPsec 1057*11042SErik.Nordmark@Sun.COM * - IXAF_SET_SOURCE - replace IP source in broadcast case. 1058*11042SErik.Nordmark@Sun.COM * - IXAF_LOOPBACK_COPY - for multicast and broadcast 1059*11042SErik.Nordmark@Sun.COM */ 1060*11042SErik.Nordmark@Sun.COM 1061*11042SErik.Nordmark@Sun.COM 1062*11042SErik.Nordmark@Sun.COM /* 1063*11042SErik.Nordmark@Sun.COM * ire_sendfn for IRE_LOCAL and IRE_LOOPBACK 1064*11042SErik.Nordmark@Sun.COM * 1065*11042SErik.Nordmark@Sun.COM * The checks for restrict_interzone_loopback are done in ire_route_recursive. 1066*11042SErik.Nordmark@Sun.COM */ 1067*11042SErik.Nordmark@Sun.COM /* ARGSUSED4 */ 1068*11042SErik.Nordmark@Sun.COM int 1069*11042SErik.Nordmark@Sun.COM ire_send_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1070*11042SErik.Nordmark@Sun.COM ip_xmit_attr_t *ixa, uint32_t *identp) 1071*11042SErik.Nordmark@Sun.COM { 1072*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)iph_arg; 1073*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 1074*11042SErik.Nordmark@Sun.COM ill_t *ill = ire->ire_ill; 1075*11042SErik.Nordmark@Sun.COM ip_recv_attr_t iras; /* NOTE: No bzero for performance */ 1076*11042SErik.Nordmark@Sun.COM uint_t pktlen = ixa->ixa_pktlen; 1077*11042SErik.Nordmark@Sun.COM 1078*11042SErik.Nordmark@Sun.COM /* 1079*11042SErik.Nordmark@Sun.COM * No fragmentation, no nce, no application of IPsec, 1080*11042SErik.Nordmark@Sun.COM * and no ipha_ident assignment. 1081*11042SErik.Nordmark@Sun.COM * 1082*11042SErik.Nordmark@Sun.COM * Note different order between IP provider and FW_HOOKS than in 1083*11042SErik.Nordmark@Sun.COM * send_wire case. 1084*11042SErik.Nordmark@Sun.COM */ 1085*11042SErik.Nordmark@Sun.COM 1086*11042SErik.Nordmark@Sun.COM /* 1087*11042SErik.Nordmark@Sun.COM * DTrace this as ip:::send. A packet blocked by FW_HOOKS will fire the 1088*11042SErik.Nordmark@Sun.COM * send probe, but not the receive probe. 1089*11042SErik.Nordmark@Sun.COM */ 1090*11042SErik.Nordmark@Sun.COM DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 1091*11042SErik.Nordmark@Sun.COM ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 1092*11042SErik.Nordmark@Sun.COM int, 1); 1093*11042SErik.Nordmark@Sun.COM 1094*11042SErik.Nordmark@Sun.COM if (HOOKS4_INTERESTED_LOOPBACK_OUT(ipst)) { 1095*11042SErik.Nordmark@Sun.COM int error; 1096*11042SErik.Nordmark@Sun.COM 1097*11042SErik.Nordmark@Sun.COM DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL, 1098*11042SErik.Nordmark@Sun.COM ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 1099*11042SErik.Nordmark@Sun.COM FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1100*11042SErik.Nordmark@Sun.COM ipst->ips_ipv4firewall_loopback_out, 1101*11042SErik.Nordmark@Sun.COM NULL, ill, ipha, mp, mp, 0, ipst, error); 1102*11042SErik.Nordmark@Sun.COM DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, mp); 1103*11042SErik.Nordmark@Sun.COM if (mp == NULL) 1104*11042SErik.Nordmark@Sun.COM return (error); 1105*11042SErik.Nordmark@Sun.COM 1106*11042SErik.Nordmark@Sun.COM /* 1107*11042SErik.Nordmark@Sun.COM * Even if the destination was changed by the filter we use the 1108*11042SErik.Nordmark@Sun.COM * forwarding decision that was made based on the address 1109*11042SErik.Nordmark@Sun.COM * in ip_output/ip_set_destination. 1110*11042SErik.Nordmark@Sun.COM */ 1111*11042SErik.Nordmark@Sun.COM /* Length could be different */ 1112*11042SErik.Nordmark@Sun.COM ipha = (ipha_t *)mp->b_rptr; 1113*11042SErik.Nordmark@Sun.COM pktlen = ntohs(ipha->ipha_length); 1114*11042SErik.Nordmark@Sun.COM } 1115*11042SErik.Nordmark@Sun.COM 1116*11042SErik.Nordmark@Sun.COM /* 1117*11042SErik.Nordmark@Sun.COM * If a callback is enabled then we need to know the 1118*11042SErik.Nordmark@Sun.COM * source and destination zoneids for the packet. We already 1119*11042SErik.Nordmark@Sun.COM * have those handy. 1120*11042SErik.Nordmark@Sun.COM */ 1121*11042SErik.Nordmark@Sun.COM if (ipst->ips_ip4_observe.he_interested) { 1122*11042SErik.Nordmark@Sun.COM zoneid_t szone, dzone; 1123*11042SErik.Nordmark@Sun.COM zoneid_t stackzoneid; 1124*11042SErik.Nordmark@Sun.COM 1125*11042SErik.Nordmark@Sun.COM stackzoneid = netstackid_to_zoneid( 1126*11042SErik.Nordmark@Sun.COM ipst->ips_netstack->netstack_stackid); 1127*11042SErik.Nordmark@Sun.COM 1128*11042SErik.Nordmark@Sun.COM if (stackzoneid == GLOBAL_ZONEID) { 1129*11042SErik.Nordmark@Sun.COM /* Shared-IP zone */ 1130*11042SErik.Nordmark@Sun.COM dzone = ire->ire_zoneid; 1131*11042SErik.Nordmark@Sun.COM szone = ixa->ixa_zoneid; 1132*11042SErik.Nordmark@Sun.COM } else { 1133*11042SErik.Nordmark@Sun.COM szone = dzone = stackzoneid; 1134*11042SErik.Nordmark@Sun.COM } 1135*11042SErik.Nordmark@Sun.COM ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst); 1136*11042SErik.Nordmark@Sun.COM } 1137*11042SErik.Nordmark@Sun.COM 1138*11042SErik.Nordmark@Sun.COM /* Handle lo0 stats */ 1139*11042SErik.Nordmark@Sun.COM ipst->ips_loopback_packets++; 1140*11042SErik.Nordmark@Sun.COM 1141*11042SErik.Nordmark@Sun.COM /* Map ixa to ira including IPsec policies */ 1142*11042SErik.Nordmark@Sun.COM ipsec_out_to_in(ixa, ill, &iras); 1143*11042SErik.Nordmark@Sun.COM iras.ira_pktlen = pktlen; 1144*11042SErik.Nordmark@Sun.COM 1145*11042SErik.Nordmark@Sun.COM if (!IS_SIMPLE_IPH(ipha)) { 1146*11042SErik.Nordmark@Sun.COM ip_output_local_options(ipha, ipst); 1147*11042SErik.Nordmark@Sun.COM iras.ira_flags |= IRAF_IPV4_OPTIONS; 1148*11042SErik.Nordmark@Sun.COM } 1149*11042SErik.Nordmark@Sun.COM 1150*11042SErik.Nordmark@Sun.COM if (HOOKS4_INTERESTED_LOOPBACK_IN(ipst)) { 1151*11042SErik.Nordmark@Sun.COM int error; 1152*11042SErik.Nordmark@Sun.COM 1153*11042SErik.Nordmark@Sun.COM DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill, 1154*11042SErik.Nordmark@Sun.COM ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp); 1155*11042SErik.Nordmark@Sun.COM FW_HOOKS(ipst->ips_ip4_loopback_in_event, 1156*11042SErik.Nordmark@Sun.COM ipst->ips_ipv4firewall_loopback_in, 1157*11042SErik.Nordmark@Sun.COM ill, NULL, ipha, mp, mp, 0, ipst, error); 1158*11042SErik.Nordmark@Sun.COM 1159*11042SErik.Nordmark@Sun.COM DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, mp); 1160*11042SErik.Nordmark@Sun.COM if (mp == NULL) { 1161*11042SErik.Nordmark@Sun.COM ira_cleanup(&iras, B_FALSE); 1162*11042SErik.Nordmark@Sun.COM return (error); 1163*11042SErik.Nordmark@Sun.COM } 1164*11042SErik.Nordmark@Sun.COM /* 1165*11042SErik.Nordmark@Sun.COM * Even if the destination was changed by the filter we use the 1166*11042SErik.Nordmark@Sun.COM * forwarding decision that was made based on the address 1167*11042SErik.Nordmark@Sun.COM * in ip_output/ip_set_destination. 1168*11042SErik.Nordmark@Sun.COM */ 1169*11042SErik.Nordmark@Sun.COM /* Length could be different */ 1170*11042SErik.Nordmark@Sun.COM ipha = (ipha_t *)mp->b_rptr; 1171*11042SErik.Nordmark@Sun.COM pktlen = iras.ira_pktlen = ntohs(ipha->ipha_length); 1172*11042SErik.Nordmark@Sun.COM } 1173*11042SErik.Nordmark@Sun.COM 1174*11042SErik.Nordmark@Sun.COM DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 1175*11042SErik.Nordmark@Sun.COM ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 1176*11042SErik.Nordmark@Sun.COM int, 1); 1177*11042SErik.Nordmark@Sun.COM 1178*11042SErik.Nordmark@Sun.COM ire->ire_ib_pkt_count++; 1179*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 1180*11042SErik.Nordmark@Sun.COM UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, pktlen); 1181*11042SErik.Nordmark@Sun.COM 1182*11042SErik.Nordmark@Sun.COM /* Destined to ire_zoneid - use that for fanout */ 1183*11042SErik.Nordmark@Sun.COM iras.ira_zoneid = ire->ire_zoneid; 1184*11042SErik.Nordmark@Sun.COM 1185*11042SErik.Nordmark@Sun.COM if (is_system_labeled()) { 1186*11042SErik.Nordmark@Sun.COM iras.ira_flags |= IRAF_SYSTEM_LABELED; 1187*11042SErik.Nordmark@Sun.COM 1188*11042SErik.Nordmark@Sun.COM /* 1189*11042SErik.Nordmark@Sun.COM * This updates ira_cred, ira_tsl and ira_free_flags based 1190*11042SErik.Nordmark@Sun.COM * on the label. We don't expect this to ever fail for 1191*11042SErik.Nordmark@Sun.COM * loopback packets, so we silently drop the packet should it 1192*11042SErik.Nordmark@Sun.COM * fail. 1193*11042SErik.Nordmark@Sun.COM */ 1194*11042SErik.Nordmark@Sun.COM if (!tsol_get_pkt_label(mp, IPV4_VERSION, &iras)) { 1195*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1196*11042SErik.Nordmark@Sun.COM ip_drop_input("tsol_get_pkt_label", mp, ill); 1197*11042SErik.Nordmark@Sun.COM freemsg(mp); 1198*11042SErik.Nordmark@Sun.COM return (0); 1199*11042SErik.Nordmark@Sun.COM } 1200*11042SErik.Nordmark@Sun.COM ASSERT(iras.ira_tsl != NULL); 1201*11042SErik.Nordmark@Sun.COM 1202*11042SErik.Nordmark@Sun.COM /* tsol_get_pkt_label sometimes does pullupmsg */ 1203*11042SErik.Nordmark@Sun.COM ipha = (ipha_t *)mp->b_rptr; 1204*11042SErik.Nordmark@Sun.COM } 1205*11042SErik.Nordmark@Sun.COM 1206*11042SErik.Nordmark@Sun.COM ip_fanout_v4(mp, ipha, &iras); 1207*11042SErik.Nordmark@Sun.COM 1208*11042SErik.Nordmark@Sun.COM /* We moved any IPsec refs from ixa to iras */ 1209*11042SErik.Nordmark@Sun.COM ira_cleanup(&iras, B_FALSE); 1210*11042SErik.Nordmark@Sun.COM return (0); 1211*11042SErik.Nordmark@Sun.COM } 1212*11042SErik.Nordmark@Sun.COM 1213*11042SErik.Nordmark@Sun.COM /* 1214*11042SErik.Nordmark@Sun.COM * ire_sendfn for IRE_BROADCAST 1215*11042SErik.Nordmark@Sun.COM * If the broadcast address is present on multiple ills and ixa_ifindex 1216*11042SErik.Nordmark@Sun.COM * isn't set, then we generate 1217*11042SErik.Nordmark@Sun.COM * a separate datagram (potentially with different source address) for 1218*11042SErik.Nordmark@Sun.COM * those ills. In any case, only one copy is looped back to ip_input_v4. 1219*11042SErik.Nordmark@Sun.COM */ 1220*11042SErik.Nordmark@Sun.COM int 1221*11042SErik.Nordmark@Sun.COM ire_send_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1222*11042SErik.Nordmark@Sun.COM ip_xmit_attr_t *ixa, uint32_t *identp) 1223*11042SErik.Nordmark@Sun.COM { 1224*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)iph_arg; 1225*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 1226*11042SErik.Nordmark@Sun.COM irb_t *irb = ire->ire_bucket; 1227*11042SErik.Nordmark@Sun.COM ire_t *ire1; 1228*11042SErik.Nordmark@Sun.COM mblk_t *mp1; 1229*11042SErik.Nordmark@Sun.COM ipha_t *ipha1; 1230*11042SErik.Nordmark@Sun.COM iaflags_t ixaflags = ixa->ixa_flags; 1231*11042SErik.Nordmark@Sun.COM nce_t *nce1, *nce_orig; 1232*11042SErik.Nordmark@Sun.COM 1233*11042SErik.Nordmark@Sun.COM /* 1234*11042SErik.Nordmark@Sun.COM * Unless ire_send_multirt_v4 already set a ttl, force the 1235*11042SErik.Nordmark@Sun.COM * ttl to a smallish value. 1236*11042SErik.Nordmark@Sun.COM */ 1237*11042SErik.Nordmark@Sun.COM if (!(ixa->ixa_flags & IXAF_NO_TTL_CHANGE)) { 1238*11042SErik.Nordmark@Sun.COM /* 1239*11042SErik.Nordmark@Sun.COM * To avoid broadcast storms, we usually set the TTL to 1 for 1240*11042SErik.Nordmark@Sun.COM * broadcasts. This can 1241*11042SErik.Nordmark@Sun.COM * be overridden stack-wide through the ip_broadcast_ttl 1242*11042SErik.Nordmark@Sun.COM * ndd tunable, or on a per-connection basis through the 1243*11042SErik.Nordmark@Sun.COM * IP_BROADCAST_TTL socket option. 1244*11042SErik.Nordmark@Sun.COM * 1245*11042SErik.Nordmark@Sun.COM * If SO_DONTROUTE/IXAF_DONTROUTE is set, then ire_send_wire_v4 1246*11042SErik.Nordmark@Sun.COM * will force ttl to one after we've set this. 1247*11042SErik.Nordmark@Sun.COM */ 1248*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_BROADCAST_TTL_SET) 1249*11042SErik.Nordmark@Sun.COM ipha->ipha_ttl = ixa->ixa_broadcast_ttl; 1250*11042SErik.Nordmark@Sun.COM else 1251*11042SErik.Nordmark@Sun.COM ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl; 1252*11042SErik.Nordmark@Sun.COM } 1253*11042SErik.Nordmark@Sun.COM /* 1254*11042SErik.Nordmark@Sun.COM * Make sure we get a loopback copy (after IPsec and frag) 1255*11042SErik.Nordmark@Sun.COM * Skip hardware checksum so that loopback copy is checksumed. 1256*11042SErik.Nordmark@Sun.COM */ 1257*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 1258*11042SErik.Nordmark@Sun.COM 1259*11042SErik.Nordmark@Sun.COM /* Do we need to potentially generate multiple copies? */ 1260*11042SErik.Nordmark@Sun.COM if (irb->irb_ire_cnt == 1 || ixa->ixa_ifindex != 0) 1261*11042SErik.Nordmark@Sun.COM return (ire_send_wire_v4(ire, mp, ipha, ixa, identp)); 1262*11042SErik.Nordmark@Sun.COM 1263*11042SErik.Nordmark@Sun.COM /* 1264*11042SErik.Nordmark@Sun.COM * Loop over all IRE_BROADCAST in the bucket (might only be one). 1265*11042SErik.Nordmark@Sun.COM * Note that everything in the bucket has the same destination address. 1266*11042SErik.Nordmark@Sun.COM */ 1267*11042SErik.Nordmark@Sun.COM irb_refhold(irb); 1268*11042SErik.Nordmark@Sun.COM for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 1269*11042SErik.Nordmark@Sun.COM /* We do the main IRE after the end of the loop */ 1270*11042SErik.Nordmark@Sun.COM if (ire1 == ire) 1271*11042SErik.Nordmark@Sun.COM continue; 1272*11042SErik.Nordmark@Sun.COM 1273*11042SErik.Nordmark@Sun.COM /* 1274*11042SErik.Nordmark@Sun.COM * Only IREs for the same IP address should be in the same 1275*11042SErik.Nordmark@Sun.COM * bucket. 1276*11042SErik.Nordmark@Sun.COM * But could have IRE_HOSTs in the case of CGTP. 1277*11042SErik.Nordmark@Sun.COM * If we find any multirt routes we bail out of the loop 1278*11042SErik.Nordmark@Sun.COM * and just do the single packet at the end; ip_postfrag_multirt 1279*11042SErik.Nordmark@Sun.COM * will duplicate the packet. 1280*11042SErik.Nordmark@Sun.COM */ 1281*11042SErik.Nordmark@Sun.COM ASSERT(ire1->ire_addr == ire->ire_addr); 1282*11042SErik.Nordmark@Sun.COM if (!(ire1->ire_type & IRE_BROADCAST)) 1283*11042SErik.Nordmark@Sun.COM continue; 1284*11042SErik.Nordmark@Sun.COM 1285*11042SErik.Nordmark@Sun.COM if (IRE_IS_CONDEMNED(ire1)) 1286*11042SErik.Nordmark@Sun.COM continue; 1287*11042SErik.Nordmark@Sun.COM 1288*11042SErik.Nordmark@Sun.COM if (ixa->ixa_zoneid != ALL_ZONES && 1289*11042SErik.Nordmark@Sun.COM ire->ire_zoneid != ire1->ire_zoneid) 1290*11042SErik.Nordmark@Sun.COM continue; 1291*11042SErik.Nordmark@Sun.COM 1292*11042SErik.Nordmark@Sun.COM ASSERT(ire->ire_ill != ire1->ire_ill && ire1->ire_ill != NULL); 1293*11042SErik.Nordmark@Sun.COM 1294*11042SErik.Nordmark@Sun.COM if (ire1->ire_flags & RTF_MULTIRT) 1295*11042SErik.Nordmark@Sun.COM break; 1296*11042SErik.Nordmark@Sun.COM 1297*11042SErik.Nordmark@Sun.COM /* 1298*11042SErik.Nordmark@Sun.COM * For IPMP we only send for the ipmp_ill. arp_nce_init() will 1299*11042SErik.Nordmark@Sun.COM * ensure that this goes out on the cast_ill. 1300*11042SErik.Nordmark@Sun.COM */ 1301*11042SErik.Nordmark@Sun.COM if (IS_UNDER_IPMP(ire1->ire_ill)) 1302*11042SErik.Nordmark@Sun.COM continue; 1303*11042SErik.Nordmark@Sun.COM 1304*11042SErik.Nordmark@Sun.COM mp1 = copymsg(mp); 1305*11042SErik.Nordmark@Sun.COM if (mp1 == NULL) { 1306*11042SErik.Nordmark@Sun.COM BUMP_MIB(ire1->ire_ill->ill_ip_mib, 1307*11042SErik.Nordmark@Sun.COM ipIfStatsOutDiscards); 1308*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards", 1309*11042SErik.Nordmark@Sun.COM mp, ire1->ire_ill); 1310*11042SErik.Nordmark@Sun.COM continue; 1311*11042SErik.Nordmark@Sun.COM } 1312*11042SErik.Nordmark@Sun.COM 1313*11042SErik.Nordmark@Sun.COM ipha1 = (ipha_t *)mp1->b_rptr; 1314*11042SErik.Nordmark@Sun.COM if (ixa->ixa_flags & IXAF_SET_SOURCE) { 1315*11042SErik.Nordmark@Sun.COM /* 1316*11042SErik.Nordmark@Sun.COM * Need to pick a different source address for each 1317*11042SErik.Nordmark@Sun.COM * interface. If we have a global IPsec policy and 1318*11042SErik.Nordmark@Sun.COM * no per-socket policy then we punt to 1319*11042SErik.Nordmark@Sun.COM * ip_output_simple_v4 using a separate ip_xmit_attr_t. 1320*11042SErik.Nordmark@Sun.COM */ 1321*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_IPSEC_GLOBAL_POLICY) { 1322*11042SErik.Nordmark@Sun.COM ip_output_simple_broadcast(ixa, mp1); 1323*11042SErik.Nordmark@Sun.COM continue; 1324*11042SErik.Nordmark@Sun.COM } 1325*11042SErik.Nordmark@Sun.COM /* Pick a new source address for each interface */ 1326*11042SErik.Nordmark@Sun.COM if (ip_select_source_v4(ire1->ire_ill, INADDR_ANY, 1327*11042SErik.Nordmark@Sun.COM ipha1->ipha_dst, INADDR_ANY, ixa->ixa_zoneid, ipst, 1328*11042SErik.Nordmark@Sun.COM &ipha1->ipha_src, NULL, NULL) != 0) { 1329*11042SErik.Nordmark@Sun.COM BUMP_MIB(ire1->ire_ill->ill_ip_mib, 1330*11042SErik.Nordmark@Sun.COM ipIfStatsOutDiscards); 1331*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - select " 1332*11042SErik.Nordmark@Sun.COM "broadcast source", mp1, ire1->ire_ill); 1333*11042SErik.Nordmark@Sun.COM freemsg(mp1); 1334*11042SErik.Nordmark@Sun.COM continue; 1335*11042SErik.Nordmark@Sun.COM } 1336*11042SErik.Nordmark@Sun.COM /* 1337*11042SErik.Nordmark@Sun.COM * Check against global IPsec policy to set the AH/ESP 1338*11042SErik.Nordmark@Sun.COM * attributes. IPsec will set IXAF_IPSEC_* and 1339*11042SErik.Nordmark@Sun.COM * ixa_ipsec_* as appropriate. 1340*11042SErik.Nordmark@Sun.COM */ 1341*11042SErik.Nordmark@Sun.COM if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) { 1342*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_ipsec_policy == NULL); 1343*11042SErik.Nordmark@Sun.COM mp1 = ip_output_attach_policy(mp1, ipha, NULL, 1344*11042SErik.Nordmark@Sun.COM NULL, ixa); 1345*11042SErik.Nordmark@Sun.COM if (mp1 == NULL) { 1346*11042SErik.Nordmark@Sun.COM /* 1347*11042SErik.Nordmark@Sun.COM * MIB and ip_drop_packet already 1348*11042SErik.Nordmark@Sun.COM * done 1349*11042SErik.Nordmark@Sun.COM */ 1350*11042SErik.Nordmark@Sun.COM continue; 1351*11042SErik.Nordmark@Sun.COM } 1352*11042SErik.Nordmark@Sun.COM } 1353*11042SErik.Nordmark@Sun.COM } 1354*11042SErik.Nordmark@Sun.COM /* Make sure we have an NCE on this ill */ 1355*11042SErik.Nordmark@Sun.COM nce1 = arp_nce_init(ire1->ire_ill, ire1->ire_addr, 1356*11042SErik.Nordmark@Sun.COM ire1->ire_type); 1357*11042SErik.Nordmark@Sun.COM if (nce1 == NULL) { 1358*11042SErik.Nordmark@Sun.COM BUMP_MIB(ire1->ire_ill->ill_ip_mib, 1359*11042SErik.Nordmark@Sun.COM ipIfStatsOutDiscards); 1360*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - broadcast nce", 1361*11042SErik.Nordmark@Sun.COM mp1, ire1->ire_ill); 1362*11042SErik.Nordmark@Sun.COM freemsg(mp1); 1363*11042SErik.Nordmark@Sun.COM continue; 1364*11042SErik.Nordmark@Sun.COM } 1365*11042SErik.Nordmark@Sun.COM nce_orig = ixa->ixa_nce; 1366*11042SErik.Nordmark@Sun.COM ixa->ixa_nce = nce1; 1367*11042SErik.Nordmark@Sun.COM 1368*11042SErik.Nordmark@Sun.COM ire_refhold(ire1); 1369*11042SErik.Nordmark@Sun.COM /* 1370*11042SErik.Nordmark@Sun.COM * Ignore any errors here. We just collect the errno for 1371*11042SErik.Nordmark@Sun.COM * the main ire below 1372*11042SErik.Nordmark@Sun.COM */ 1373*11042SErik.Nordmark@Sun.COM (void) ire_send_wire_v4(ire1, mp1, ipha1, ixa, identp); 1374*11042SErik.Nordmark@Sun.COM ire_refrele(ire1); 1375*11042SErik.Nordmark@Sun.COM 1376*11042SErik.Nordmark@Sun.COM ixa->ixa_nce = nce_orig; 1377*11042SErik.Nordmark@Sun.COM nce_refrele(nce1); 1378*11042SErik.Nordmark@Sun.COM 1379*11042SErik.Nordmark@Sun.COM ixa->ixa_flags &= ~IXAF_LOOPBACK_COPY; 1380*11042SErik.Nordmark@Sun.COM } 1381*11042SErik.Nordmark@Sun.COM irb_refrele(irb); 1382*11042SErik.Nordmark@Sun.COM /* Finally, the main one */ 1383*11042SErik.Nordmark@Sun.COM 1384*11042SErik.Nordmark@Sun.COM /* 1385*11042SErik.Nordmark@Sun.COM * For IPMP we only send broadcasts on the ipmp_ill. 1386*11042SErik.Nordmark@Sun.COM */ 1387*11042SErik.Nordmark@Sun.COM if (IS_UNDER_IPMP(ire->ire_ill)) { 1388*11042SErik.Nordmark@Sun.COM freemsg(mp); 1389*11042SErik.Nordmark@Sun.COM return (0); 1390*11042SErik.Nordmark@Sun.COM } 1391*11042SErik.Nordmark@Sun.COM 1392*11042SErik.Nordmark@Sun.COM return (ire_send_wire_v4(ire, mp, ipha, ixa, identp)); 1393*11042SErik.Nordmark@Sun.COM } 1394*11042SErik.Nordmark@Sun.COM 1395*11042SErik.Nordmark@Sun.COM /* 1396*11042SErik.Nordmark@Sun.COM * Send a packet using a different source address and different 1397*11042SErik.Nordmark@Sun.COM * IPsec policy. 1398*11042SErik.Nordmark@Sun.COM */ 1399*11042SErik.Nordmark@Sun.COM static void 1400*11042SErik.Nordmark@Sun.COM ip_output_simple_broadcast(ip_xmit_attr_t *ixa, mblk_t *mp) 1401*11042SErik.Nordmark@Sun.COM { 1402*11042SErik.Nordmark@Sun.COM ip_xmit_attr_t ixas; 1403*11042SErik.Nordmark@Sun.COM 1404*11042SErik.Nordmark@Sun.COM bzero(&ixas, sizeof (ixas)); 1405*11042SErik.Nordmark@Sun.COM ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; 1406*11042SErik.Nordmark@Sun.COM ixas.ixa_zoneid = ixa->ixa_zoneid; 1407*11042SErik.Nordmark@Sun.COM ixas.ixa_ifindex = 0; 1408*11042SErik.Nordmark@Sun.COM ixas.ixa_ipst = ixa->ixa_ipst; 1409*11042SErik.Nordmark@Sun.COM ixas.ixa_cred = ixa->ixa_cred; 1410*11042SErik.Nordmark@Sun.COM ixas.ixa_cpid = ixa->ixa_cpid; 1411*11042SErik.Nordmark@Sun.COM ixas.ixa_tsl = ixa->ixa_tsl; 1412*11042SErik.Nordmark@Sun.COM ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1413*11042SErik.Nordmark@Sun.COM 1414*11042SErik.Nordmark@Sun.COM (void) ip_output_simple(mp, &ixas); 1415*11042SErik.Nordmark@Sun.COM ixa_cleanup(&ixas); 1416*11042SErik.Nordmark@Sun.COM } 1417*11042SErik.Nordmark@Sun.COM 1418*11042SErik.Nordmark@Sun.COM 1419*11042SErik.Nordmark@Sun.COM static void 1420*11042SErik.Nordmark@Sun.COM multirt_check_v4(ire_t *ire, ipha_t *ipha, ip_xmit_attr_t *ixa) 1421*11042SErik.Nordmark@Sun.COM { 1422*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 1423*11042SErik.Nordmark@Sun.COM 1424*11042SErik.Nordmark@Sun.COM /* Limit the TTL on multirt packets */ 1425*11042SErik.Nordmark@Sun.COM if (ire->ire_type & IRE_MULTICAST) { 1426*11042SErik.Nordmark@Sun.COM if (ipha->ipha_ttl > 1) { 1427*11042SErik.Nordmark@Sun.COM ip2dbg(("ire_send_multirt_v4: forcing multicast " 1428*11042SErik.Nordmark@Sun.COM "multirt TTL to 1 (was %d), dst 0x%08x\n", 1429*11042SErik.Nordmark@Sun.COM ipha->ipha_ttl, ntohl(ire->ire_addr))); 1430*11042SErik.Nordmark@Sun.COM ipha->ipha_ttl = 1; 1431*11042SErik.Nordmark@Sun.COM } 1432*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_NO_TTL_CHANGE; 1433*11042SErik.Nordmark@Sun.COM } else if ((ipst->ips_ip_multirt_ttl > 0) && 1434*11042SErik.Nordmark@Sun.COM (ipha->ipha_ttl > ipst->ips_ip_multirt_ttl)) { 1435*11042SErik.Nordmark@Sun.COM ipha->ipha_ttl = ipst->ips_ip_multirt_ttl; 1436*11042SErik.Nordmark@Sun.COM /* 1437*11042SErik.Nordmark@Sun.COM * Need to ensure we don't increase the ttl should we go through 1438*11042SErik.Nordmark@Sun.COM * ire_send_broadcast or multicast. 1439*11042SErik.Nordmark@Sun.COM */ 1440*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_NO_TTL_CHANGE; 1441*11042SErik.Nordmark@Sun.COM } 1442*11042SErik.Nordmark@Sun.COM } 1443*11042SErik.Nordmark@Sun.COM 1444*11042SErik.Nordmark@Sun.COM /* 1445*11042SErik.Nordmark@Sun.COM * ire_sendfn for IRE_MULTICAST 1446*11042SErik.Nordmark@Sun.COM */ 1447*11042SErik.Nordmark@Sun.COM int 1448*11042SErik.Nordmark@Sun.COM ire_send_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1449*11042SErik.Nordmark@Sun.COM ip_xmit_attr_t *ixa, uint32_t *identp) 1450*11042SErik.Nordmark@Sun.COM { 1451*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)iph_arg; 1452*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 1453*11042SErik.Nordmark@Sun.COM ill_t *ill = ire->ire_ill; 1454*11042SErik.Nordmark@Sun.COM iaflags_t ixaflags = ixa->ixa_flags; 1455*11042SErik.Nordmark@Sun.COM 1456*11042SErik.Nordmark@Sun.COM /* 1457*11042SErik.Nordmark@Sun.COM * The IRE_MULTICAST is the same whether or not multirt is in use. 1458*11042SErik.Nordmark@Sun.COM * Hence we need special-case code. 1459*11042SErik.Nordmark@Sun.COM */ 1460*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_MULTIRT_MULTICAST) 1461*11042SErik.Nordmark@Sun.COM multirt_check_v4(ire, ipha, ixa); 1462*11042SErik.Nordmark@Sun.COM 1463*11042SErik.Nordmark@Sun.COM /* 1464*11042SErik.Nordmark@Sun.COM * Check if anything in ip_input_v4 wants a copy of the transmitted 1465*11042SErik.Nordmark@Sun.COM * packet (after IPsec and fragmentation) 1466*11042SErik.Nordmark@Sun.COM * 1467*11042SErik.Nordmark@Sun.COM * 1. Multicast routers always need a copy unless SO_DONTROUTE is set 1468*11042SErik.Nordmark@Sun.COM * RSVP and the rsvp daemon is an example of a 1469*11042SErik.Nordmark@Sun.COM * protocol and user level process that 1470*11042SErik.Nordmark@Sun.COM * handles it's own routing. Hence, it uses the 1471*11042SErik.Nordmark@Sun.COM * SO_DONTROUTE option to accomplish this. 1472*11042SErik.Nordmark@Sun.COM * 2. If the sender has set IP_MULTICAST_LOOP, then we just 1473*11042SErik.Nordmark@Sun.COM * check whether there are any receivers for the group on the ill 1474*11042SErik.Nordmark@Sun.COM * (ignoring the zoneid). 1475*11042SErik.Nordmark@Sun.COM * 3. If IP_MULTICAST_LOOP is not set, then we check if there are 1476*11042SErik.Nordmark@Sun.COM * any members in other shared-IP zones. 1477*11042SErik.Nordmark@Sun.COM * If such members exist, then we indicate that the sending zone 1478*11042SErik.Nordmark@Sun.COM * shouldn't get a loopback copy to preserve the IP_MULTICAST_LOOP 1479*11042SErik.Nordmark@Sun.COM * behavior. 1480*11042SErik.Nordmark@Sun.COM * 1481*11042SErik.Nordmark@Sun.COM * When we loopback we skip hardware checksum to make sure loopback 1482*11042SErik.Nordmark@Sun.COM * copy is checksumed. 1483*11042SErik.Nordmark@Sun.COM * 1484*11042SErik.Nordmark@Sun.COM * Note that ire_ill is the upper in the case of IPMP. 1485*11042SErik.Nordmark@Sun.COM */ 1486*11042SErik.Nordmark@Sun.COM ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM); 1487*11042SErik.Nordmark@Sun.COM if (ipst->ips_ip_g_mrouter && ill->ill_mrouter_cnt > 0 && 1488*11042SErik.Nordmark@Sun.COM !(ixaflags & IXAF_DONTROUTE)) { 1489*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 1490*11042SErik.Nordmark@Sun.COM } else if (ixaflags & IXAF_MULTICAST_LOOP) { 1491*11042SErik.Nordmark@Sun.COM /* 1492*11042SErik.Nordmark@Sun.COM * If this zone or any other zone has members then loopback 1493*11042SErik.Nordmark@Sun.COM * a copy. 1494*11042SErik.Nordmark@Sun.COM */ 1495*11042SErik.Nordmark@Sun.COM if (ill_hasmembers_v4(ill, ipha->ipha_dst)) 1496*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 1497*11042SErik.Nordmark@Sun.COM } else if (ipst->ips_netstack->netstack_numzones > 1) { 1498*11042SErik.Nordmark@Sun.COM /* 1499*11042SErik.Nordmark@Sun.COM * This zone should not have a copy. But there are some other 1500*11042SErik.Nordmark@Sun.COM * zones which might have members. 1501*11042SErik.Nordmark@Sun.COM */ 1502*11042SErik.Nordmark@Sun.COM if (ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst, 1503*11042SErik.Nordmark@Sun.COM ixa->ixa_zoneid)) { 1504*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_NO_LOOP_ZONEID_SET; 1505*11042SErik.Nordmark@Sun.COM ixa->ixa_no_loop_zoneid = ixa->ixa_zoneid; 1506*11042SErik.Nordmark@Sun.COM ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 1507*11042SErik.Nordmark@Sun.COM } 1508*11042SErik.Nordmark@Sun.COM } 1509*11042SErik.Nordmark@Sun.COM 1510*11042SErik.Nordmark@Sun.COM /* 1511*11042SErik.Nordmark@Sun.COM * Unless ire_send_multirt_v4 or icmp_output_hdrincl already set a ttl, 1512*11042SErik.Nordmark@Sun.COM * force the ttl to the IP_MULTICAST_TTL value 1513*11042SErik.Nordmark@Sun.COM */ 1514*11042SErik.Nordmark@Sun.COM if (!(ixaflags & IXAF_NO_TTL_CHANGE)) { 1515*11042SErik.Nordmark@Sun.COM ipha->ipha_ttl = ixa->ixa_multicast_ttl; 1516*11042SErik.Nordmark@Sun.COM } 1517*11042SErik.Nordmark@Sun.COM 1518*11042SErik.Nordmark@Sun.COM return (ire_send_wire_v4(ire, mp, ipha, ixa, identp)); 1519*11042SErik.Nordmark@Sun.COM } 1520*11042SErik.Nordmark@Sun.COM 1521*11042SErik.Nordmark@Sun.COM /* 1522*11042SErik.Nordmark@Sun.COM * ire_sendfn for IREs with RTF_MULTIRT 1523*11042SErik.Nordmark@Sun.COM */ 1524*11042SErik.Nordmark@Sun.COM int 1525*11042SErik.Nordmark@Sun.COM ire_send_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1526*11042SErik.Nordmark@Sun.COM ip_xmit_attr_t *ixa, uint32_t *identp) 1527*11042SErik.Nordmark@Sun.COM { 1528*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)iph_arg; 1529*11042SErik.Nordmark@Sun.COM 1530*11042SErik.Nordmark@Sun.COM multirt_check_v4(ire, ipha, ixa); 1531*11042SErik.Nordmark@Sun.COM 1532*11042SErik.Nordmark@Sun.COM if (ire->ire_type & IRE_MULTICAST) 1533*11042SErik.Nordmark@Sun.COM return (ire_send_multicast_v4(ire, mp, ipha, ixa, identp)); 1534*11042SErik.Nordmark@Sun.COM else if (ire->ire_type & IRE_BROADCAST) 1535*11042SErik.Nordmark@Sun.COM return (ire_send_broadcast_v4(ire, mp, ipha, ixa, identp)); 1536*11042SErik.Nordmark@Sun.COM else 1537*11042SErik.Nordmark@Sun.COM return (ire_send_wire_v4(ire, mp, ipha, ixa, identp)); 1538*11042SErik.Nordmark@Sun.COM } 1539*11042SErik.Nordmark@Sun.COM 1540*11042SErik.Nordmark@Sun.COM /* 1541*11042SErik.Nordmark@Sun.COM * ire_sendfn for IREs with RTF_REJECT/RTF_BLACKHOLE, including IRE_NOROUTE 1542*11042SErik.Nordmark@Sun.COM */ 1543*11042SErik.Nordmark@Sun.COM int 1544*11042SErik.Nordmark@Sun.COM ire_send_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1545*11042SErik.Nordmark@Sun.COM ip_xmit_attr_t *ixa, uint32_t *identp) 1546*11042SErik.Nordmark@Sun.COM { 1547*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 1548*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)iph_arg; 1549*11042SErik.Nordmark@Sun.COM ill_t *ill; 1550*11042SErik.Nordmark@Sun.COM ip_recv_attr_t iras; 1551*11042SErik.Nordmark@Sun.COM boolean_t dummy; 1552*11042SErik.Nordmark@Sun.COM 1553*11042SErik.Nordmark@Sun.COM /* We assign an IP ident for nice errors */ 1554*11042SErik.Nordmark@Sun.COM ipha->ipha_ident = atomic_add_32_nv(identp, 1); 1555*11042SErik.Nordmark@Sun.COM 1556*11042SErik.Nordmark@Sun.COM BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); 1557*11042SErik.Nordmark@Sun.COM 1558*11042SErik.Nordmark@Sun.COM if (ire->ire_type & IRE_NOROUTE) { 1559*11042SErik.Nordmark@Sun.COM /* A lack of a route as opposed to RTF_REJECT|BLACKHOLE */ 1560*11042SErik.Nordmark@Sun.COM ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0, 1561*11042SErik.Nordmark@Sun.COM RTA_DST, ipst); 1562*11042SErik.Nordmark@Sun.COM } 1563*11042SErik.Nordmark@Sun.COM 1564*11042SErik.Nordmark@Sun.COM if (ire->ire_flags & RTF_BLACKHOLE) { 1565*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutNoRoutes RTF_BLACKHOLE", mp, NULL); 1566*11042SErik.Nordmark@Sun.COM freemsg(mp); 1567*11042SErik.Nordmark@Sun.COM /* No error even for local senders - silent blackhole */ 1568*11042SErik.Nordmark@Sun.COM return (0); 1569*11042SErik.Nordmark@Sun.COM } 1570*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutNoRoutes RTF_REJECT", mp, NULL); 1571*11042SErik.Nordmark@Sun.COM 1572*11042SErik.Nordmark@Sun.COM /* 1573*11042SErik.Nordmark@Sun.COM * We need an ill_t for the ip_recv_attr_t even though this packet 1574*11042SErik.Nordmark@Sun.COM * was never received and icmp_unreachable doesn't currently use 1575*11042SErik.Nordmark@Sun.COM * ira_ill. 1576*11042SErik.Nordmark@Sun.COM */ 1577*11042SErik.Nordmark@Sun.COM ill = ill_lookup_on_name("lo0", B_FALSE, 1578*11042SErik.Nordmark@Sun.COM !(ixa->ixa_flags & IRAF_IS_IPV4), &dummy, ipst); 1579*11042SErik.Nordmark@Sun.COM if (ill == NULL) { 1580*11042SErik.Nordmark@Sun.COM freemsg(mp); 1581*11042SErik.Nordmark@Sun.COM return (EHOSTUNREACH); 1582*11042SErik.Nordmark@Sun.COM } 1583*11042SErik.Nordmark@Sun.COM 1584*11042SErik.Nordmark@Sun.COM bzero(&iras, sizeof (iras)); 1585*11042SErik.Nordmark@Sun.COM /* Map ixa to ira including IPsec policies */ 1586*11042SErik.Nordmark@Sun.COM ipsec_out_to_in(ixa, ill, &iras); 1587*11042SErik.Nordmark@Sun.COM 1588*11042SErik.Nordmark@Sun.COM if (ip_source_routed(ipha, ipst)) { 1589*11042SErik.Nordmark@Sun.COM icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, &iras); 1590*11042SErik.Nordmark@Sun.COM } else { 1591*11042SErik.Nordmark@Sun.COM icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, &iras); 1592*11042SErik.Nordmark@Sun.COM } 1593*11042SErik.Nordmark@Sun.COM /* We moved any IPsec refs from ixa to iras */ 1594*11042SErik.Nordmark@Sun.COM ira_cleanup(&iras, B_FALSE); 1595*11042SErik.Nordmark@Sun.COM ill_refrele(ill); 1596*11042SErik.Nordmark@Sun.COM return (EHOSTUNREACH); 1597*11042SErik.Nordmark@Sun.COM } 1598*11042SErik.Nordmark@Sun.COM 1599*11042SErik.Nordmark@Sun.COM /* 1600*11042SErik.Nordmark@Sun.COM * Calculate a checksum ignoring any hardware capabilities 1601*11042SErik.Nordmark@Sun.COM * 1602*11042SErik.Nordmark@Sun.COM * Returns B_FALSE if the packet was too short for the checksum. Caller 1603*11042SErik.Nordmark@Sun.COM * should free and do stats. 1604*11042SErik.Nordmark@Sun.COM */ 1605*11042SErik.Nordmark@Sun.COM static boolean_t 1606*11042SErik.Nordmark@Sun.COM ip_output_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_xmit_attr_t *ixa) 1607*11042SErik.Nordmark@Sun.COM { 1608*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 1609*11042SErik.Nordmark@Sun.COM uint_t pktlen = ixa->ixa_pktlen; 1610*11042SErik.Nordmark@Sun.COM uint16_t *cksump; 1611*11042SErik.Nordmark@Sun.COM uint32_t cksum; 1612*11042SErik.Nordmark@Sun.COM uint8_t protocol = ixa->ixa_protocol; 1613*11042SErik.Nordmark@Sun.COM uint16_t ip_hdr_length = ixa->ixa_ip_hdr_length; 1614*11042SErik.Nordmark@Sun.COM ipaddr_t dst = ipha->ipha_dst; 1615*11042SErik.Nordmark@Sun.COM ipaddr_t src = ipha->ipha_src; 1616*11042SErik.Nordmark@Sun.COM 1617*11042SErik.Nordmark@Sun.COM /* Just in case it contained garbage */ 1618*11042SErik.Nordmark@Sun.COM DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS; 1619*11042SErik.Nordmark@Sun.COM 1620*11042SErik.Nordmark@Sun.COM /* 1621*11042SErik.Nordmark@Sun.COM * Calculate ULP checksum 1622*11042SErik.Nordmark@Sun.COM */ 1623*11042SErik.Nordmark@Sun.COM if (protocol == IPPROTO_TCP) { 1624*11042SErik.Nordmark@Sun.COM cksump = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_length); 1625*11042SErik.Nordmark@Sun.COM cksum = IP_TCP_CSUM_COMP; 1626*11042SErik.Nordmark@Sun.COM } else if (protocol == IPPROTO_UDP) { 1627*11042SErik.Nordmark@Sun.COM cksump = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_length); 1628*11042SErik.Nordmark@Sun.COM cksum = IP_UDP_CSUM_COMP; 1629*11042SErik.Nordmark@Sun.COM } else if (protocol == IPPROTO_SCTP) { 1630*11042SErik.Nordmark@Sun.COM sctp_hdr_t *sctph; 1631*11042SErik.Nordmark@Sun.COM 1632*11042SErik.Nordmark@Sun.COM ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph))); 1633*11042SErik.Nordmark@Sun.COM sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length); 1634*11042SErik.Nordmark@Sun.COM /* 1635*11042SErik.Nordmark@Sun.COM * Zero out the checksum field to ensure proper 1636*11042SErik.Nordmark@Sun.COM * checksum calculation. 1637*11042SErik.Nordmark@Sun.COM */ 1638*11042SErik.Nordmark@Sun.COM sctph->sh_chksum = 0; 1639*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 1640*11042SErik.Nordmark@Sun.COM if (!skip_sctp_cksum) 1641*11042SErik.Nordmark@Sun.COM #endif 1642*11042SErik.Nordmark@Sun.COM sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length); 1643*11042SErik.Nordmark@Sun.COM goto ip_hdr_cksum; 1644*11042SErik.Nordmark@Sun.COM } else { 1645*11042SErik.Nordmark@Sun.COM goto ip_hdr_cksum; 1646*11042SErik.Nordmark@Sun.COM } 1647*11042SErik.Nordmark@Sun.COM 1648*11042SErik.Nordmark@Sun.COM /* ULP puts the checksum field is in the first mblk */ 1649*11042SErik.Nordmark@Sun.COM ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr); 1650*11042SErik.Nordmark@Sun.COM 1651*11042SErik.Nordmark@Sun.COM /* 1652*11042SErik.Nordmark@Sun.COM * We accumulate the pseudo header checksum in cksum. 1653*11042SErik.Nordmark@Sun.COM * This is pretty hairy code, so watch close. One 1654*11042SErik.Nordmark@Sun.COM * thing to keep in mind is that UDP and TCP have 1655*11042SErik.Nordmark@Sun.COM * stored their respective datagram lengths in their 1656*11042SErik.Nordmark@Sun.COM * checksum fields. This lines things up real nice. 1657*11042SErik.Nordmark@Sun.COM */ 1658*11042SErik.Nordmark@Sun.COM cksum += (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 1659*11042SErik.Nordmark@Sun.COM 1660*11042SErik.Nordmark@Sun.COM cksum = IP_CSUM(mp, ip_hdr_length, cksum); 1661*11042SErik.Nordmark@Sun.COM /* 1662*11042SErik.Nordmark@Sun.COM * For UDP/IPv4 a zero means that the packets wasn't checksummed. 1663*11042SErik.Nordmark@Sun.COM * Change to 0xffff 1664*11042SErik.Nordmark@Sun.COM */ 1665*11042SErik.Nordmark@Sun.COM if (protocol == IPPROTO_UDP && cksum == 0) 1666*11042SErik.Nordmark@Sun.COM *cksump = ~cksum; 1667*11042SErik.Nordmark@Sun.COM else 1668*11042SErik.Nordmark@Sun.COM *cksump = cksum; 1669*11042SErik.Nordmark@Sun.COM 1670*11042SErik.Nordmark@Sun.COM IP_STAT(ipst, ip_out_sw_cksum); 1671*11042SErik.Nordmark@Sun.COM IP_STAT_UPDATE(ipst, ip_out_sw_cksum_bytes, pktlen); 1672*11042SErik.Nordmark@Sun.COM 1673*11042SErik.Nordmark@Sun.COM ip_hdr_cksum: 1674*11042SErik.Nordmark@Sun.COM /* Calculate IPv4 header checksum */ 1675*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = 0; 1676*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1677*11042SErik.Nordmark@Sun.COM return (B_TRUE); 1678*11042SErik.Nordmark@Sun.COM } 1679*11042SErik.Nordmark@Sun.COM 1680*11042SErik.Nordmark@Sun.COM /* 1681*11042SErik.Nordmark@Sun.COM * Calculate the ULP checksum - try to use hardware. 1682*11042SErik.Nordmark@Sun.COM * In the case of MULTIRT, broadcast or multicast the 1683*11042SErik.Nordmark@Sun.COM * IXAF_NO_HW_CKSUM is set in which case we use software. 1684*11042SErik.Nordmark@Sun.COM * 1685*11042SErik.Nordmark@Sun.COM * If the hardware supports IP header checksum offload; then clear the 1686*11042SErik.Nordmark@Sun.COM * contents of IP header checksum field as expected by NIC. 1687*11042SErik.Nordmark@Sun.COM * Do this only if we offloaded either full or partial sum. 1688*11042SErik.Nordmark@Sun.COM * 1689*11042SErik.Nordmark@Sun.COM * Returns B_FALSE if the packet was too short for the checksum. Caller 1690*11042SErik.Nordmark@Sun.COM * should free and do stats. 1691*11042SErik.Nordmark@Sun.COM */ 1692*11042SErik.Nordmark@Sun.COM static boolean_t 1693*11042SErik.Nordmark@Sun.COM ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha, 1694*11042SErik.Nordmark@Sun.COM ip_xmit_attr_t *ixa, ill_t *ill) 1695*11042SErik.Nordmark@Sun.COM { 1696*11042SErik.Nordmark@Sun.COM uint_t pktlen = ixa->ixa_pktlen; 1697*11042SErik.Nordmark@Sun.COM uint16_t *cksump; 1698*11042SErik.Nordmark@Sun.COM uint16_t hck_flags; 1699*11042SErik.Nordmark@Sun.COM uint32_t cksum; 1700*11042SErik.Nordmark@Sun.COM uint8_t protocol = ixa->ixa_protocol; 1701*11042SErik.Nordmark@Sun.COM uint16_t ip_hdr_length = ixa->ixa_ip_hdr_length; 1702*11042SErik.Nordmark@Sun.COM 1703*11042SErik.Nordmark@Sun.COM if ((ixaflags & IXAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 1704*11042SErik.Nordmark@Sun.COM !dohwcksum) { 1705*11042SErik.Nordmark@Sun.COM return (ip_output_sw_cksum_v4(mp, ipha, ixa)); 1706*11042SErik.Nordmark@Sun.COM } 1707*11042SErik.Nordmark@Sun.COM 1708*11042SErik.Nordmark@Sun.COM /* 1709*11042SErik.Nordmark@Sun.COM * Calculate ULP checksum. Note that we don't use cksump and cksum 1710*11042SErik.Nordmark@Sun.COM * if the ill has FULL support. 1711*11042SErik.Nordmark@Sun.COM */ 1712*11042SErik.Nordmark@Sun.COM if (protocol == IPPROTO_TCP) { 1713*11042SErik.Nordmark@Sun.COM cksump = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_length); 1714*11042SErik.Nordmark@Sun.COM cksum = IP_TCP_CSUM_COMP; /* Pseudo-header cksum */ 1715*11042SErik.Nordmark@Sun.COM } else if (protocol == IPPROTO_UDP) { 1716*11042SErik.Nordmark@Sun.COM cksump = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_length); 1717*11042SErik.Nordmark@Sun.COM cksum = IP_UDP_CSUM_COMP; /* Pseudo-header cksum */ 1718*11042SErik.Nordmark@Sun.COM } else if (protocol == IPPROTO_SCTP) { 1719*11042SErik.Nordmark@Sun.COM sctp_hdr_t *sctph; 1720*11042SErik.Nordmark@Sun.COM 1721*11042SErik.Nordmark@Sun.COM ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph))); 1722*11042SErik.Nordmark@Sun.COM sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length); 1723*11042SErik.Nordmark@Sun.COM /* 1724*11042SErik.Nordmark@Sun.COM * Zero out the checksum field to ensure proper 1725*11042SErik.Nordmark@Sun.COM * checksum calculation. 1726*11042SErik.Nordmark@Sun.COM */ 1727*11042SErik.Nordmark@Sun.COM sctph->sh_chksum = 0; 1728*11042SErik.Nordmark@Sun.COM #ifdef DEBUG 1729*11042SErik.Nordmark@Sun.COM if (!skip_sctp_cksum) 1730*11042SErik.Nordmark@Sun.COM #endif 1731*11042SErik.Nordmark@Sun.COM sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length); 1732*11042SErik.Nordmark@Sun.COM goto ip_hdr_cksum; 1733*11042SErik.Nordmark@Sun.COM } else { 1734*11042SErik.Nordmark@Sun.COM ip_hdr_cksum: 1735*11042SErik.Nordmark@Sun.COM /* Calculate IPv4 header checksum */ 1736*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = 0; 1737*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1738*11042SErik.Nordmark@Sun.COM return (B_TRUE); 1739*11042SErik.Nordmark@Sun.COM } 1740*11042SErik.Nordmark@Sun.COM 1741*11042SErik.Nordmark@Sun.COM /* ULP puts the checksum field is in the first mblk */ 1742*11042SErik.Nordmark@Sun.COM ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr); 1743*11042SErik.Nordmark@Sun.COM 1744*11042SErik.Nordmark@Sun.COM /* 1745*11042SErik.Nordmark@Sun.COM * Underlying interface supports hardware checksum offload for 1746*11042SErik.Nordmark@Sun.COM * the payload; leave the payload checksum for the hardware to 1747*11042SErik.Nordmark@Sun.COM * calculate. N.B: We only need to set up checksum info on the 1748*11042SErik.Nordmark@Sun.COM * first mblk. 1749*11042SErik.Nordmark@Sun.COM */ 1750*11042SErik.Nordmark@Sun.COM hck_flags = ill->ill_hcksum_capab->ill_hcksum_txflags; 1751*11042SErik.Nordmark@Sun.COM 1752*11042SErik.Nordmark@Sun.COM DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS; 1753*11042SErik.Nordmark@Sun.COM if (hck_flags & HCKSUM_INET_FULL_V4) { 1754*11042SErik.Nordmark@Sun.COM /* 1755*11042SErik.Nordmark@Sun.COM * Hardware calculates pseudo-header, header and the 1756*11042SErik.Nordmark@Sun.COM * payload checksums, so clear the checksum field in 1757*11042SErik.Nordmark@Sun.COM * the protocol header. 1758*11042SErik.Nordmark@Sun.COM */ 1759*11042SErik.Nordmark@Sun.COM *cksump = 0; 1760*11042SErik.Nordmark@Sun.COM DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM; 1761*11042SErik.Nordmark@Sun.COM 1762*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = 0; 1763*11042SErik.Nordmark@Sun.COM if (hck_flags & HCKSUM_IPHDRCKSUM) { 1764*11042SErik.Nordmark@Sun.COM DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; 1765*11042SErik.Nordmark@Sun.COM } else { 1766*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1767*11042SErik.Nordmark@Sun.COM } 1768*11042SErik.Nordmark@Sun.COM return (B_TRUE); 1769*11042SErik.Nordmark@Sun.COM } 1770*11042SErik.Nordmark@Sun.COM if ((hck_flags) & HCKSUM_INET_PARTIAL) { 1771*11042SErik.Nordmark@Sun.COM ipaddr_t dst = ipha->ipha_dst; 1772*11042SErik.Nordmark@Sun.COM ipaddr_t src = ipha->ipha_src; 1773*11042SErik.Nordmark@Sun.COM /* 1774*11042SErik.Nordmark@Sun.COM * Partial checksum offload has been enabled. Fill 1775*11042SErik.Nordmark@Sun.COM * the checksum field in the protocol header with the 1776*11042SErik.Nordmark@Sun.COM * pseudo-header checksum value. 1777*11042SErik.Nordmark@Sun.COM * 1778*11042SErik.Nordmark@Sun.COM * We accumulate the pseudo header checksum in cksum. 1779*11042SErik.Nordmark@Sun.COM * This is pretty hairy code, so watch close. One 1780*11042SErik.Nordmark@Sun.COM * thing to keep in mind is that UDP and TCP have 1781*11042SErik.Nordmark@Sun.COM * stored their respective datagram lengths in their 1782*11042SErik.Nordmark@Sun.COM * checksum fields. This lines things up real nice. 1783*11042SErik.Nordmark@Sun.COM */ 1784*11042SErik.Nordmark@Sun.COM cksum += (dst >> 16) + (dst & 0xFFFF) + 1785*11042SErik.Nordmark@Sun.COM (src >> 16) + (src & 0xFFFF); 1786*11042SErik.Nordmark@Sun.COM cksum += *(cksump); 1787*11042SErik.Nordmark@Sun.COM cksum = (cksum & 0xFFFF) + (cksum >> 16); 1788*11042SErik.Nordmark@Sun.COM *(cksump) = (cksum & 0xFFFF) + (cksum >> 16); 1789*11042SErik.Nordmark@Sun.COM 1790*11042SErik.Nordmark@Sun.COM /* 1791*11042SErik.Nordmark@Sun.COM * Offsets are relative to beginning of IP header. 1792*11042SErik.Nordmark@Sun.COM */ 1793*11042SErik.Nordmark@Sun.COM DB_CKSUMSTART(mp) = ip_hdr_length; 1794*11042SErik.Nordmark@Sun.COM DB_CKSUMSTUFF(mp) = (uint8_t *)cksump - (uint8_t *)ipha; 1795*11042SErik.Nordmark@Sun.COM DB_CKSUMEND(mp) = pktlen; 1796*11042SErik.Nordmark@Sun.COM DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM; 1797*11042SErik.Nordmark@Sun.COM 1798*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = 0; 1799*11042SErik.Nordmark@Sun.COM if (hck_flags & HCKSUM_IPHDRCKSUM) { 1800*11042SErik.Nordmark@Sun.COM DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; 1801*11042SErik.Nordmark@Sun.COM } else { 1802*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1803*11042SErik.Nordmark@Sun.COM } 1804*11042SErik.Nordmark@Sun.COM return (B_TRUE); 1805*11042SErik.Nordmark@Sun.COM } 1806*11042SErik.Nordmark@Sun.COM /* Hardware capabilities include neither full nor partial IPv4 */ 1807*11042SErik.Nordmark@Sun.COM return (ip_output_sw_cksum_v4(mp, ipha, ixa)); 1808*11042SErik.Nordmark@Sun.COM } 1809*11042SErik.Nordmark@Sun.COM 1810*11042SErik.Nordmark@Sun.COM /* 1811*11042SErik.Nordmark@Sun.COM * ire_sendfn for offlink and onlink destinations. 1812*11042SErik.Nordmark@Sun.COM * Also called from the multicast, broadcast, multirt send functions. 1813*11042SErik.Nordmark@Sun.COM * 1814*11042SErik.Nordmark@Sun.COM * Assumes that the caller has a hold on the ire. 1815*11042SErik.Nordmark@Sun.COM * 1816*11042SErik.Nordmark@Sun.COM * This function doesn't care if the IRE just became condemned since that 1817*11042SErik.Nordmark@Sun.COM * can happen at any time. 1818*11042SErik.Nordmark@Sun.COM */ 1819*11042SErik.Nordmark@Sun.COM /* ARGSUSED */ 1820*11042SErik.Nordmark@Sun.COM int 1821*11042SErik.Nordmark@Sun.COM ire_send_wire_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1822*11042SErik.Nordmark@Sun.COM ip_xmit_attr_t *ixa, uint32_t *identp) 1823*11042SErik.Nordmark@Sun.COM { 1824*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 1825*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)iph_arg; 1826*11042SErik.Nordmark@Sun.COM iaflags_t ixaflags = ixa->ixa_flags; 1827*11042SErik.Nordmark@Sun.COM ill_t *ill; 1828*11042SErik.Nordmark@Sun.COM 1829*11042SErik.Nordmark@Sun.COM ASSERT(ixa->ixa_nce != NULL); 1830*11042SErik.Nordmark@Sun.COM ill = ixa->ixa_nce->nce_ill; 1831*11042SErik.Nordmark@Sun.COM 1832*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_DONTROUTE) 1833*11042SErik.Nordmark@Sun.COM ipha->ipha_ttl = 1; 1834*11042SErik.Nordmark@Sun.COM 1835*11042SErik.Nordmark@Sun.COM /* 1836*11042SErik.Nordmark@Sun.COM * Assign an ident value for this packet. There could be other 1837*11042SErik.Nordmark@Sun.COM * threads targeting the same destination, so we have to arrange 1838*11042SErik.Nordmark@Sun.COM * for a atomic increment. Note that we use a 32-bit atomic add 1839*11042SErik.Nordmark@Sun.COM * because it has better performance than its 16-bit sibling. 1840*11042SErik.Nordmark@Sun.COM * 1841*11042SErik.Nordmark@Sun.COM * Normally ixa_extra_ident is 0, but in the case of LSO it will 1842*11042SErik.Nordmark@Sun.COM * be the number of TCP segments that the driver/hardware will 1843*11042SErik.Nordmark@Sun.COM * extraly construct. 1844*11042SErik.Nordmark@Sun.COM * 1845*11042SErik.Nordmark@Sun.COM * If running in cluster mode and if the source address 1846*11042SErik.Nordmark@Sun.COM * belongs to a replicated service then vector through 1847*11042SErik.Nordmark@Sun.COM * cl_inet_ipident vector to allocate ip identifier 1848*11042SErik.Nordmark@Sun.COM * NOTE: This is a contract private interface with the 1849*11042SErik.Nordmark@Sun.COM * clustering group. 1850*11042SErik.Nordmark@Sun.COM */ 1851*11042SErik.Nordmark@Sun.COM if (cl_inet_ipident != NULL) { 1852*11042SErik.Nordmark@Sun.COM ipaddr_t src = ipha->ipha_src; 1853*11042SErik.Nordmark@Sun.COM ipaddr_t dst = ipha->ipha_dst; 1854*11042SErik.Nordmark@Sun.COM netstackid_t stack_id = ipst->ips_netstack->netstack_stackid; 1855*11042SErik.Nordmark@Sun.COM 1856*11042SErik.Nordmark@Sun.COM ASSERT(cl_inet_isclusterwide != NULL); 1857*11042SErik.Nordmark@Sun.COM if ((*cl_inet_isclusterwide)(stack_id, IPPROTO_IP, 1858*11042SErik.Nordmark@Sun.COM AF_INET, (uint8_t *)(uintptr_t)src, NULL)) { 1859*11042SErik.Nordmark@Sun.COM /* 1860*11042SErik.Nordmark@Sun.COM * Note: not correct with LSO since we can't allocate 1861*11042SErik.Nordmark@Sun.COM * ixa_extra_ident+1 consecutive values. 1862*11042SErik.Nordmark@Sun.COM */ 1863*11042SErik.Nordmark@Sun.COM ipha->ipha_ident = (*cl_inet_ipident)(stack_id, 1864*11042SErik.Nordmark@Sun.COM IPPROTO_IP, AF_INET, (uint8_t *)(uintptr_t)src, 1865*11042SErik.Nordmark@Sun.COM (uint8_t *)(uintptr_t)dst, NULL); 1866*11042SErik.Nordmark@Sun.COM } else { 1867*11042SErik.Nordmark@Sun.COM ipha->ipha_ident = atomic_add_32_nv(identp, 1868*11042SErik.Nordmark@Sun.COM ixa->ixa_extra_ident + 1); 1869*11042SErik.Nordmark@Sun.COM } 1870*11042SErik.Nordmark@Sun.COM } else { 1871*11042SErik.Nordmark@Sun.COM ipha->ipha_ident = atomic_add_32_nv(identp, 1872*11042SErik.Nordmark@Sun.COM ixa->ixa_extra_ident + 1); 1873*11042SErik.Nordmark@Sun.COM } 1874*11042SErik.Nordmark@Sun.COM #ifndef _BIG_ENDIAN 1875*11042SErik.Nordmark@Sun.COM ipha->ipha_ident = htons(ipha->ipha_ident); 1876*11042SErik.Nordmark@Sun.COM #endif 1877*11042SErik.Nordmark@Sun.COM 1878*11042SErik.Nordmark@Sun.COM /* 1879*11042SErik.Nordmark@Sun.COM * This might set b_band, thus the IPsec and fragmentation 1880*11042SErik.Nordmark@Sun.COM * code in IP ensures that b_band is updated in the first mblk. 1881*11042SErik.Nordmark@Sun.COM */ 1882*11042SErik.Nordmark@Sun.COM if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 1883*11042SErik.Nordmark@Sun.COM /* ip_process translates an IS_UNDER_IPMP */ 1884*11042SErik.Nordmark@Sun.COM mp = ip_process(IPP_LOCAL_OUT, mp, ill, ill); 1885*11042SErik.Nordmark@Sun.COM if (mp == NULL) { 1886*11042SErik.Nordmark@Sun.COM /* ip_drop_packet and MIB done */ 1887*11042SErik.Nordmark@Sun.COM return (0); /* Might just be delayed */ 1888*11042SErik.Nordmark@Sun.COM } 1889*11042SErik.Nordmark@Sun.COM } 1890*11042SErik.Nordmark@Sun.COM 1891*11042SErik.Nordmark@Sun.COM /* 1892*11042SErik.Nordmark@Sun.COM * Verify any IPv4 options. 1893*11042SErik.Nordmark@Sun.COM * 1894*11042SErik.Nordmark@Sun.COM * The presense of IP options also forces the network stack to 1895*11042SErik.Nordmark@Sun.COM * calculate the checksum in software. This is because: 1896*11042SErik.Nordmark@Sun.COM * 1897*11042SErik.Nordmark@Sun.COM * Wrap around: certain partial-checksum NICs (eri, ce) limit 1898*11042SErik.Nordmark@Sun.COM * the size of "start offset" width to 6-bit. This effectively 1899*11042SErik.Nordmark@Sun.COM * sets the largest value of the offset to 64-bytes, starting 1900*11042SErik.Nordmark@Sun.COM * from the MAC header. When the cumulative MAC and IP headers 1901*11042SErik.Nordmark@Sun.COM * exceed such limit, the offset will wrap around. This causes 1902*11042SErik.Nordmark@Sun.COM * the checksum to be calculated at the wrong place. 1903*11042SErik.Nordmark@Sun.COM * 1904*11042SErik.Nordmark@Sun.COM * IPv4 source routing: none of the full-checksum capable NICs 1905*11042SErik.Nordmark@Sun.COM * is capable of correctly handling the IPv4 source-routing 1906*11042SErik.Nordmark@Sun.COM * option for purposes of calculating the pseudo-header; the 1907*11042SErik.Nordmark@Sun.COM * actual destination is different from the destination in the 1908*11042SErik.Nordmark@Sun.COM * header which is that of the next-hop. (This case may not be 1909*11042SErik.Nordmark@Sun.COM * true for NICs which can parse IPv6 extension headers, but 1910*11042SErik.Nordmark@Sun.COM * we choose to simplify the implementation by not offloading 1911*11042SErik.Nordmark@Sun.COM * checksum when they are present.) 1912*11042SErik.Nordmark@Sun.COM */ 1913*11042SErik.Nordmark@Sun.COM if (!IS_SIMPLE_IPH(ipha)) { 1914*11042SErik.Nordmark@Sun.COM ixaflags = ixa->ixa_flags |= IXAF_NO_HW_CKSUM; 1915*11042SErik.Nordmark@Sun.COM /* An IS_UNDER_IPMP ill is ok here */ 1916*11042SErik.Nordmark@Sun.COM if (ip_output_options(mp, ipha, ixa, ill)) { 1917*11042SErik.Nordmark@Sun.COM /* Packet has been consumed and ICMP error sent */ 1918*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1919*11042SErik.Nordmark@Sun.COM return (EINVAL); 1920*11042SErik.Nordmark@Sun.COM } 1921*11042SErik.Nordmark@Sun.COM } 1922*11042SErik.Nordmark@Sun.COM 1923*11042SErik.Nordmark@Sun.COM /* 1924*11042SErik.Nordmark@Sun.COM * To handle IPsec/iptun's labeling needs we need to tag packets 1925*11042SErik.Nordmark@Sun.COM * while we still have ixa_tsl 1926*11042SErik.Nordmark@Sun.COM */ 1927*11042SErik.Nordmark@Sun.COM if (is_system_labeled() && ixa->ixa_tsl != NULL && 1928*11042SErik.Nordmark@Sun.COM (ill->ill_mactype == DL_6TO4 || ill->ill_mactype == DL_IPV4 || 1929*11042SErik.Nordmark@Sun.COM ill->ill_mactype == DL_IPV6)) { 1930*11042SErik.Nordmark@Sun.COM cred_t *newcr; 1931*11042SErik.Nordmark@Sun.COM 1932*11042SErik.Nordmark@Sun.COM newcr = copycred_from_tslabel(ixa->ixa_cred, ixa->ixa_tsl, 1933*11042SErik.Nordmark@Sun.COM KM_NOSLEEP); 1934*11042SErik.Nordmark@Sun.COM if (newcr == NULL) { 1935*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1936*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - newcr", 1937*11042SErik.Nordmark@Sun.COM mp, ill); 1938*11042SErik.Nordmark@Sun.COM freemsg(mp); 1939*11042SErik.Nordmark@Sun.COM return (ENOBUFS); 1940*11042SErik.Nordmark@Sun.COM } 1941*11042SErik.Nordmark@Sun.COM mblk_setcred(mp, newcr, NOPID); 1942*11042SErik.Nordmark@Sun.COM crfree(newcr); /* mblk_setcred did its own crhold */ 1943*11042SErik.Nordmark@Sun.COM } 1944*11042SErik.Nordmark@Sun.COM 1945*11042SErik.Nordmark@Sun.COM if (ixa->ixa_pktlen > ixa->ixa_fragsize || 1946*11042SErik.Nordmark@Sun.COM (ixaflags & IXAF_IPSEC_SECURE)) { 1947*11042SErik.Nordmark@Sun.COM uint32_t pktlen; 1948*11042SErik.Nordmark@Sun.COM 1949*11042SErik.Nordmark@Sun.COM pktlen = ixa->ixa_pktlen; 1950*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_IPSEC_SECURE) 1951*11042SErik.Nordmark@Sun.COM pktlen += ipsec_out_extra_length(ixa); 1952*11042SErik.Nordmark@Sun.COM 1953*11042SErik.Nordmark@Sun.COM if (pktlen > IP_MAXPACKET) 1954*11042SErik.Nordmark@Sun.COM return (EMSGSIZE); 1955*11042SErik.Nordmark@Sun.COM 1956*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_SET_ULP_CKSUM) { 1957*11042SErik.Nordmark@Sun.COM /* 1958*11042SErik.Nordmark@Sun.COM * Compute ULP checksum and IP header checksum 1959*11042SErik.Nordmark@Sun.COM * using software 1960*11042SErik.Nordmark@Sun.COM */ 1961*11042SErik.Nordmark@Sun.COM if (!ip_output_sw_cksum_v4(mp, ipha, ixa)) { 1962*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1963*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards", mp, ill); 1964*11042SErik.Nordmark@Sun.COM freemsg(mp); 1965*11042SErik.Nordmark@Sun.COM return (EINVAL); 1966*11042SErik.Nordmark@Sun.COM } 1967*11042SErik.Nordmark@Sun.COM } else { 1968*11042SErik.Nordmark@Sun.COM /* Calculate IPv4 header checksum */ 1969*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = 0; 1970*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1971*11042SErik.Nordmark@Sun.COM } 1972*11042SErik.Nordmark@Sun.COM 1973*11042SErik.Nordmark@Sun.COM /* 1974*11042SErik.Nordmark@Sun.COM * If this packet would generate a icmp_frag_needed 1975*11042SErik.Nordmark@Sun.COM * message, we need to handle it before we do the IPsec 1976*11042SErik.Nordmark@Sun.COM * processing. Otherwise, we need to strip the IPsec 1977*11042SErik.Nordmark@Sun.COM * headers before we send up the message to the ULPs 1978*11042SErik.Nordmark@Sun.COM * which becomes messy and difficult. 1979*11042SErik.Nordmark@Sun.COM * 1980*11042SErik.Nordmark@Sun.COM * We check using IXAF_DONTFRAG. The DF bit in the header 1981*11042SErik.Nordmark@Sun.COM * is not inspected - it will be copied to any generated 1982*11042SErik.Nordmark@Sun.COM * fragments. 1983*11042SErik.Nordmark@Sun.COM */ 1984*11042SErik.Nordmark@Sun.COM if ((pktlen > ixa->ixa_fragsize) && 1985*11042SErik.Nordmark@Sun.COM (ixaflags & IXAF_DONTFRAG)) { 1986*11042SErik.Nordmark@Sun.COM /* Generate ICMP and return error */ 1987*11042SErik.Nordmark@Sun.COM ip_recv_attr_t iras; 1988*11042SErik.Nordmark@Sun.COM 1989*11042SErik.Nordmark@Sun.COM DTRACE_PROBE4(ip4__fragsize__fail, uint_t, pktlen, 1990*11042SErik.Nordmark@Sun.COM uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen, 1991*11042SErik.Nordmark@Sun.COM uint_t, ixa->ixa_pmtu); 1992*11042SErik.Nordmark@Sun.COM 1993*11042SErik.Nordmark@Sun.COM bzero(&iras, sizeof (iras)); 1994*11042SErik.Nordmark@Sun.COM /* Map ixa to ira including IPsec policies */ 1995*11042SErik.Nordmark@Sun.COM ipsec_out_to_in(ixa, ill, &iras); 1996*11042SErik.Nordmark@Sun.COM 1997*11042SErik.Nordmark@Sun.COM ip_drop_output("ICMP_FRAG_NEEDED", mp, ill); 1998*11042SErik.Nordmark@Sun.COM icmp_frag_needed(mp, ixa->ixa_fragsize, &iras); 1999*11042SErik.Nordmark@Sun.COM /* We moved any IPsec refs from ixa to iras */ 2000*11042SErik.Nordmark@Sun.COM ira_cleanup(&iras, B_FALSE); 2001*11042SErik.Nordmark@Sun.COM return (EMSGSIZE); 2002*11042SErik.Nordmark@Sun.COM } 2003*11042SErik.Nordmark@Sun.COM DTRACE_PROBE4(ip4__fragsize__ok, uint_t, pktlen, 2004*11042SErik.Nordmark@Sun.COM uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen, 2005*11042SErik.Nordmark@Sun.COM uint_t, ixa->ixa_pmtu); 2006*11042SErik.Nordmark@Sun.COM 2007*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_IPSEC_SECURE) { 2008*11042SErik.Nordmark@Sun.COM /* 2009*11042SErik.Nordmark@Sun.COM * Pass in sufficient information so that 2010*11042SErik.Nordmark@Sun.COM * IPsec can determine whether to fragment, and 2011*11042SErik.Nordmark@Sun.COM * which function to call after fragmentation. 2012*11042SErik.Nordmark@Sun.COM */ 2013*11042SErik.Nordmark@Sun.COM return (ipsec_out_process(mp, ixa)); 2014*11042SErik.Nordmark@Sun.COM } 2015*11042SErik.Nordmark@Sun.COM return (ip_fragment_v4(mp, ixa->ixa_nce, ixaflags, 2016*11042SErik.Nordmark@Sun.COM ixa->ixa_pktlen, ixa->ixa_fragsize, ixa->ixa_xmit_hint, 2017*11042SErik.Nordmark@Sun.COM ixa->ixa_zoneid, ixa->ixa_no_loop_zoneid, 2018*11042SErik.Nordmark@Sun.COM ixa->ixa_postfragfn, &ixa->ixa_cookie)); 2019*11042SErik.Nordmark@Sun.COM } 2020*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_SET_ULP_CKSUM) { 2021*11042SErik.Nordmark@Sun.COM /* Compute ULP checksum and IP header checksum */ 2022*11042SErik.Nordmark@Sun.COM /* An IS_UNDER_IPMP ill is ok here */ 2023*11042SErik.Nordmark@Sun.COM if (!ip_output_cksum_v4(ixaflags, mp, ipha, ixa, ill)) { 2024*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2025*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards", mp, ill); 2026*11042SErik.Nordmark@Sun.COM freemsg(mp); 2027*11042SErik.Nordmark@Sun.COM return (EINVAL); 2028*11042SErik.Nordmark@Sun.COM } 2029*11042SErik.Nordmark@Sun.COM } else { 2030*11042SErik.Nordmark@Sun.COM /* Calculate IPv4 header checksum */ 2031*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = 0; 2032*11042SErik.Nordmark@Sun.COM ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2033*11042SErik.Nordmark@Sun.COM } 2034*11042SErik.Nordmark@Sun.COM return ((ixa->ixa_postfragfn)(mp, ixa->ixa_nce, ixaflags, 2035*11042SErik.Nordmark@Sun.COM ixa->ixa_pktlen, ixa->ixa_xmit_hint, ixa->ixa_zoneid, 2036*11042SErik.Nordmark@Sun.COM ixa->ixa_no_loop_zoneid, &ixa->ixa_cookie)); 2037*11042SErik.Nordmark@Sun.COM } 2038*11042SErik.Nordmark@Sun.COM 2039*11042SErik.Nordmark@Sun.COM /* 2040*11042SErik.Nordmark@Sun.COM * Send mp into ip_input 2041*11042SErik.Nordmark@Sun.COM * Common for IPv4 and IPv6 2042*11042SErik.Nordmark@Sun.COM */ 2043*11042SErik.Nordmark@Sun.COM void 2044*11042SErik.Nordmark@Sun.COM ip_postfrag_loopback(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, 2045*11042SErik.Nordmark@Sun.COM uint_t pkt_len, zoneid_t nolzid) 2046*11042SErik.Nordmark@Sun.COM { 2047*11042SErik.Nordmark@Sun.COM rtc_t rtc; 2048*11042SErik.Nordmark@Sun.COM ill_t *ill = nce->nce_ill; 2049*11042SErik.Nordmark@Sun.COM ip_recv_attr_t iras; /* NOTE: No bzero for performance */ 2050*11042SErik.Nordmark@Sun.COM ncec_t *ncec; 2051*11042SErik.Nordmark@Sun.COM 2052*11042SErik.Nordmark@Sun.COM ncec = nce->nce_common; 2053*11042SErik.Nordmark@Sun.COM iras.ira_flags = IRAF_VERIFY_IP_CKSUM | IRAF_VERIFY_ULP_CKSUM | 2054*11042SErik.Nordmark@Sun.COM IRAF_LOOPBACK | IRAF_L2SRC_LOOPBACK; 2055*11042SErik.Nordmark@Sun.COM if (ncec->ncec_flags & NCE_F_BCAST) 2056*11042SErik.Nordmark@Sun.COM iras.ira_flags |= IRAF_L2DST_BROADCAST; 2057*11042SErik.Nordmark@Sun.COM else if (ncec->ncec_flags & NCE_F_MCAST) 2058*11042SErik.Nordmark@Sun.COM iras.ira_flags |= IRAF_L2DST_MULTICAST; 2059*11042SErik.Nordmark@Sun.COM 2060*11042SErik.Nordmark@Sun.COM iras.ira_free_flags = 0; 2061*11042SErik.Nordmark@Sun.COM iras.ira_cred = NULL; 2062*11042SErik.Nordmark@Sun.COM iras.ira_cpid = NOPID; 2063*11042SErik.Nordmark@Sun.COM iras.ira_tsl = NULL; 2064*11042SErik.Nordmark@Sun.COM iras.ira_zoneid = ALL_ZONES; 2065*11042SErik.Nordmark@Sun.COM iras.ira_pktlen = pkt_len; 2066*11042SErik.Nordmark@Sun.COM UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, iras.ira_pktlen); 2067*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 2068*11042SErik.Nordmark@Sun.COM 2069*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_IS_IPV4) 2070*11042SErik.Nordmark@Sun.COM iras.ira_flags |= IRAF_IS_IPV4; 2071*11042SErik.Nordmark@Sun.COM 2072*11042SErik.Nordmark@Sun.COM iras.ira_ill = iras.ira_rill = ill; 2073*11042SErik.Nordmark@Sun.COM iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 2074*11042SErik.Nordmark@Sun.COM iras.ira_rifindex = iras.ira_ruifindex; 2075*11042SErik.Nordmark@Sun.COM iras.ira_mhip = NULL; 2076*11042SErik.Nordmark@Sun.COM 2077*11042SErik.Nordmark@Sun.COM iras.ira_flags |= ixaflags & IAF_MASK; 2078*11042SErik.Nordmark@Sun.COM iras.ira_no_loop_zoneid = nolzid; 2079*11042SErik.Nordmark@Sun.COM 2080*11042SErik.Nordmark@Sun.COM /* Broadcast and multicast doesn't care about the squeue */ 2081*11042SErik.Nordmark@Sun.COM iras.ira_sqp = NULL; 2082*11042SErik.Nordmark@Sun.COM 2083*11042SErik.Nordmark@Sun.COM rtc.rtc_ire = NULL; 2084*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_IS_IPV4) { 2085*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)mp->b_rptr; 2086*11042SErik.Nordmark@Sun.COM 2087*11042SErik.Nordmark@Sun.COM rtc.rtc_ipaddr = INADDR_ANY; 2088*11042SErik.Nordmark@Sun.COM 2089*11042SErik.Nordmark@Sun.COM (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc); 2090*11042SErik.Nordmark@Sun.COM if (rtc.rtc_ire != NULL) { 2091*11042SErik.Nordmark@Sun.COM ASSERT(rtc.rtc_ipaddr != INADDR_ANY); 2092*11042SErik.Nordmark@Sun.COM ire_refrele(rtc.rtc_ire); 2093*11042SErik.Nordmark@Sun.COM } 2094*11042SErik.Nordmark@Sun.COM } else { 2095*11042SErik.Nordmark@Sun.COM ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2096*11042SErik.Nordmark@Sun.COM 2097*11042SErik.Nordmark@Sun.COM rtc.rtc_ip6addr = ipv6_all_zeros; 2098*11042SErik.Nordmark@Sun.COM 2099*11042SErik.Nordmark@Sun.COM (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc); 2100*11042SErik.Nordmark@Sun.COM if (rtc.rtc_ire != NULL) { 2101*11042SErik.Nordmark@Sun.COM ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr)); 2102*11042SErik.Nordmark@Sun.COM ire_refrele(rtc.rtc_ire); 2103*11042SErik.Nordmark@Sun.COM } 2104*11042SErik.Nordmark@Sun.COM } 2105*11042SErik.Nordmark@Sun.COM /* Any references to clean up? No hold on ira */ 2106*11042SErik.Nordmark@Sun.COM if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 2107*11042SErik.Nordmark@Sun.COM ira_cleanup(&iras, B_FALSE); 2108*11042SErik.Nordmark@Sun.COM } 2109*11042SErik.Nordmark@Sun.COM 2110*11042SErik.Nordmark@Sun.COM /* 2111*11042SErik.Nordmark@Sun.COM * Post fragmentation function for IRE_MULTICAST and IRE_BROADCAST which 2112*11042SErik.Nordmark@Sun.COM * looks at the IXAF_LOOPBACK_COPY flag. 2113*11042SErik.Nordmark@Sun.COM * Common for IPv4 and IPv6. 2114*11042SErik.Nordmark@Sun.COM * 2115*11042SErik.Nordmark@Sun.COM * If the loopback copy fails (due to no memory) but we send the packet out 2116*11042SErik.Nordmark@Sun.COM * on the wire we return no failure. Only in the case we supress the wire 2117*11042SErik.Nordmark@Sun.COM * sending do we take the loopback failure into account. 2118*11042SErik.Nordmark@Sun.COM * 2119*11042SErik.Nordmark@Sun.COM * Note that we do not perform DTRACE_IP7 and FW_HOOKS for the looped back copy. 2120*11042SErik.Nordmark@Sun.COM * Those operations are performed on this packet in ip_xmit() and it would 2121*11042SErik.Nordmark@Sun.COM * be odd to do it twice for the same packet. 2122*11042SErik.Nordmark@Sun.COM */ 2123*11042SErik.Nordmark@Sun.COM int 2124*11042SErik.Nordmark@Sun.COM ip_postfrag_loopcheck(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, 2125*11042SErik.Nordmark@Sun.COM uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 2126*11042SErik.Nordmark@Sun.COM uintptr_t *ixacookie) 2127*11042SErik.Nordmark@Sun.COM { 2128*11042SErik.Nordmark@Sun.COM ill_t *ill = nce->nce_ill; 2129*11042SErik.Nordmark@Sun.COM int error = 0; 2130*11042SErik.Nordmark@Sun.COM 2131*11042SErik.Nordmark@Sun.COM /* 2132*11042SErik.Nordmark@Sun.COM * Check for IXAF_LOOPBACK_COPY - send a copy to ip as if the driver 2133*11042SErik.Nordmark@Sun.COM * had looped it back 2134*11042SErik.Nordmark@Sun.COM */ 2135*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_LOOPBACK_COPY) { 2136*11042SErik.Nordmark@Sun.COM mblk_t *mp1; 2137*11042SErik.Nordmark@Sun.COM 2138*11042SErik.Nordmark@Sun.COM mp1 = copymsg(mp); 2139*11042SErik.Nordmark@Sun.COM if (mp1 == NULL) { 2140*11042SErik.Nordmark@Sun.COM /* Failed to deliver the loopback copy. */ 2141*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2142*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards", mp, ill); 2143*11042SErik.Nordmark@Sun.COM error = ENOBUFS; 2144*11042SErik.Nordmark@Sun.COM } else { 2145*11042SErik.Nordmark@Sun.COM ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len, 2146*11042SErik.Nordmark@Sun.COM nolzid); 2147*11042SErik.Nordmark@Sun.COM } 2148*11042SErik.Nordmark@Sun.COM } 2149*11042SErik.Nordmark@Sun.COM 2150*11042SErik.Nordmark@Sun.COM /* 2151*11042SErik.Nordmark@Sun.COM * If TTL = 0 then only do the loopback to this host i.e. we are 2152*11042SErik.Nordmark@Sun.COM * done. We are also done if this was the 2153*11042SErik.Nordmark@Sun.COM * loopback interface since it is sufficient 2154*11042SErik.Nordmark@Sun.COM * to loopback one copy of a multicast packet. 2155*11042SErik.Nordmark@Sun.COM */ 2156*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_IS_IPV4) { 2157*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)mp->b_rptr; 2158*11042SErik.Nordmark@Sun.COM 2159*11042SErik.Nordmark@Sun.COM if (ipha->ipha_ttl == 0) { 2160*11042SErik.Nordmark@Sun.COM ip_drop_output("multicast ipha_ttl not sent to wire", 2161*11042SErik.Nordmark@Sun.COM mp, ill); 2162*11042SErik.Nordmark@Sun.COM freemsg(mp); 2163*11042SErik.Nordmark@Sun.COM return (error); 2164*11042SErik.Nordmark@Sun.COM } 2165*11042SErik.Nordmark@Sun.COM } else { 2166*11042SErik.Nordmark@Sun.COM ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2167*11042SErik.Nordmark@Sun.COM 2168*11042SErik.Nordmark@Sun.COM if (ip6h->ip6_hops == 0) { 2169*11042SErik.Nordmark@Sun.COM ip_drop_output("multicast ipha_ttl not sent to wire", 2170*11042SErik.Nordmark@Sun.COM mp, ill); 2171*11042SErik.Nordmark@Sun.COM freemsg(mp); 2172*11042SErik.Nordmark@Sun.COM return (error); 2173*11042SErik.Nordmark@Sun.COM } 2174*11042SErik.Nordmark@Sun.COM } 2175*11042SErik.Nordmark@Sun.COM if (nce->nce_ill->ill_wq == NULL) { 2176*11042SErik.Nordmark@Sun.COM /* Loopback interface */ 2177*11042SErik.Nordmark@Sun.COM ip_drop_output("multicast on lo0 not sent to wire", mp, ill); 2178*11042SErik.Nordmark@Sun.COM freemsg(mp); 2179*11042SErik.Nordmark@Sun.COM return (error); 2180*11042SErik.Nordmark@Sun.COM } 2181*11042SErik.Nordmark@Sun.COM 2182*11042SErik.Nordmark@Sun.COM return (ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0, 2183*11042SErik.Nordmark@Sun.COM ixacookie)); 2184*11042SErik.Nordmark@Sun.COM } 2185*11042SErik.Nordmark@Sun.COM 2186*11042SErik.Nordmark@Sun.COM /* 2187*11042SErik.Nordmark@Sun.COM * Post fragmentation function for RTF_MULTIRT routes. 2188*11042SErik.Nordmark@Sun.COM * Since IRE_BROADCASTs can have RTF_MULTIRT, this function 2189*11042SErik.Nordmark@Sun.COM * checks IXAF_LOOPBACK_COPY. 2190*11042SErik.Nordmark@Sun.COM * 2191*11042SErik.Nordmark@Sun.COM * If no packet is sent due to failures then we return an errno, but if at 2192*11042SErik.Nordmark@Sun.COM * least one succeeded we return zero. 2193*11042SErik.Nordmark@Sun.COM */ 2194*11042SErik.Nordmark@Sun.COM int 2195*11042SErik.Nordmark@Sun.COM ip_postfrag_multirt_v4(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, 2196*11042SErik.Nordmark@Sun.COM uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 2197*11042SErik.Nordmark@Sun.COM uintptr_t *ixacookie) 2198*11042SErik.Nordmark@Sun.COM { 2199*11042SErik.Nordmark@Sun.COM irb_t *irb; 2200*11042SErik.Nordmark@Sun.COM ipha_t *ipha = (ipha_t *)mp->b_rptr; 2201*11042SErik.Nordmark@Sun.COM ire_t *ire; 2202*11042SErik.Nordmark@Sun.COM ire_t *ire1; 2203*11042SErik.Nordmark@Sun.COM mblk_t *mp1; 2204*11042SErik.Nordmark@Sun.COM nce_t *nce1; 2205*11042SErik.Nordmark@Sun.COM ill_t *ill = nce->nce_ill; 2206*11042SErik.Nordmark@Sun.COM ill_t *ill1; 2207*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ill->ill_ipst; 2208*11042SErik.Nordmark@Sun.COM int error = 0; 2209*11042SErik.Nordmark@Sun.COM int num_sent = 0; 2210*11042SErik.Nordmark@Sun.COM int err; 2211*11042SErik.Nordmark@Sun.COM uint_t ire_type; 2212*11042SErik.Nordmark@Sun.COM ipaddr_t nexthop; 2213*11042SErik.Nordmark@Sun.COM 2214*11042SErik.Nordmark@Sun.COM ASSERT(ixaflags & IXAF_IS_IPV4); 2215*11042SErik.Nordmark@Sun.COM 2216*11042SErik.Nordmark@Sun.COM /* Check for IXAF_LOOPBACK_COPY */ 2217*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_LOOPBACK_COPY) { 2218*11042SErik.Nordmark@Sun.COM mblk_t *mp1; 2219*11042SErik.Nordmark@Sun.COM 2220*11042SErik.Nordmark@Sun.COM mp1 = copymsg(mp); 2221*11042SErik.Nordmark@Sun.COM if (mp1 == NULL) { 2222*11042SErik.Nordmark@Sun.COM /* Failed to deliver the loopback copy. */ 2223*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2224*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards", mp, ill); 2225*11042SErik.Nordmark@Sun.COM error = ENOBUFS; 2226*11042SErik.Nordmark@Sun.COM } else { 2227*11042SErik.Nordmark@Sun.COM ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len, 2228*11042SErik.Nordmark@Sun.COM nolzid); 2229*11042SErik.Nordmark@Sun.COM } 2230*11042SErik.Nordmark@Sun.COM } 2231*11042SErik.Nordmark@Sun.COM 2232*11042SErik.Nordmark@Sun.COM /* 2233*11042SErik.Nordmark@Sun.COM * Loop over RTF_MULTIRT for ipha_dst in the same bucket. Send 2234*11042SErik.Nordmark@Sun.COM * a copy to each one. 2235*11042SErik.Nordmark@Sun.COM * Use the nce (nexthop) and ipha_dst to find the ire. 2236*11042SErik.Nordmark@Sun.COM * 2237*11042SErik.Nordmark@Sun.COM * MULTIRT is not designed to work with shared-IP zones thus we don't 2238*11042SErik.Nordmark@Sun.COM * need to pass a zoneid or a label to the IRE lookup. 2239*11042SErik.Nordmark@Sun.COM */ 2240*11042SErik.Nordmark@Sun.COM if (V4_PART_OF_V6(nce->nce_addr) == ipha->ipha_dst) { 2241*11042SErik.Nordmark@Sun.COM /* Broadcast and multicast case */ 2242*11042SErik.Nordmark@Sun.COM ire = ire_ftable_lookup_v4(ipha->ipha_dst, 0, 0, 0, 2243*11042SErik.Nordmark@Sun.COM NULL, ALL_ZONES, NULL, MATCH_IRE_DSTONLY, 0, ipst, NULL); 2244*11042SErik.Nordmark@Sun.COM } else { 2245*11042SErik.Nordmark@Sun.COM ipaddr_t v4addr = V4_PART_OF_V6(nce->nce_addr); 2246*11042SErik.Nordmark@Sun.COM 2247*11042SErik.Nordmark@Sun.COM /* Unicast case */ 2248*11042SErik.Nordmark@Sun.COM ire = ire_ftable_lookup_v4(ipha->ipha_dst, 0, v4addr, 0, 2249*11042SErik.Nordmark@Sun.COM NULL, ALL_ZONES, NULL, MATCH_IRE_GW, 0, ipst, NULL); 2250*11042SErik.Nordmark@Sun.COM } 2251*11042SErik.Nordmark@Sun.COM 2252*11042SErik.Nordmark@Sun.COM if (ire == NULL || 2253*11042SErik.Nordmark@Sun.COM (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2254*11042SErik.Nordmark@Sun.COM !(ire->ire_flags & RTF_MULTIRT)) { 2255*11042SErik.Nordmark@Sun.COM /* Drop */ 2256*11042SErik.Nordmark@Sun.COM ip_drop_output("ip_postfrag_multirt didn't find route", 2257*11042SErik.Nordmark@Sun.COM mp, nce->nce_ill); 2258*11042SErik.Nordmark@Sun.COM if (ire != NULL) 2259*11042SErik.Nordmark@Sun.COM ire_refrele(ire); 2260*11042SErik.Nordmark@Sun.COM return (ENETUNREACH); 2261*11042SErik.Nordmark@Sun.COM } 2262*11042SErik.Nordmark@Sun.COM 2263*11042SErik.Nordmark@Sun.COM irb = ire->ire_bucket; 2264*11042SErik.Nordmark@Sun.COM irb_refhold(irb); 2265*11042SErik.Nordmark@Sun.COM for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 2266*11042SErik.Nordmark@Sun.COM /* 2267*11042SErik.Nordmark@Sun.COM * For broadcast we can have a mixture of IRE_BROADCAST and 2268*11042SErik.Nordmark@Sun.COM * IRE_HOST due to the manually added IRE_HOSTs that are used 2269*11042SErik.Nordmark@Sun.COM * to trigger the creation of the special CGTP broadcast routes. 2270*11042SErik.Nordmark@Sun.COM * Thus we have to skip if ire_type doesn't match the original. 2271*11042SErik.Nordmark@Sun.COM */ 2272*11042SErik.Nordmark@Sun.COM if (IRE_IS_CONDEMNED(ire1) || 2273*11042SErik.Nordmark@Sun.COM !(ire1->ire_flags & RTF_MULTIRT) || 2274*11042SErik.Nordmark@Sun.COM ire1->ire_type != ire->ire_type) 2275*11042SErik.Nordmark@Sun.COM continue; 2276*11042SErik.Nordmark@Sun.COM 2277*11042SErik.Nordmark@Sun.COM /* Do the ire argument one after the loop */ 2278*11042SErik.Nordmark@Sun.COM if (ire1 == ire) 2279*11042SErik.Nordmark@Sun.COM continue; 2280*11042SErik.Nordmark@Sun.COM 2281*11042SErik.Nordmark@Sun.COM ill1 = ire_nexthop_ill(ire1); 2282*11042SErik.Nordmark@Sun.COM if (ill1 == NULL) { 2283*11042SErik.Nordmark@Sun.COM /* 2284*11042SErik.Nordmark@Sun.COM * This ire might not have been picked by 2285*11042SErik.Nordmark@Sun.COM * ire_route_recursive, in which case ire_dep might 2286*11042SErik.Nordmark@Sun.COM * not have been setup yet. 2287*11042SErik.Nordmark@Sun.COM * We kick ire_route_recursive to try to resolve 2288*11042SErik.Nordmark@Sun.COM * starting at ire1. 2289*11042SErik.Nordmark@Sun.COM */ 2290*11042SErik.Nordmark@Sun.COM ire_t *ire2; 2291*11042SErik.Nordmark@Sun.COM 2292*11042SErik.Nordmark@Sun.COM ire2 = ire_route_recursive_impl_v4(ire1, 2293*11042SErik.Nordmark@Sun.COM ire1->ire_addr, ire1->ire_type, ire1->ire_ill, 2294*11042SErik.Nordmark@Sun.COM ire1->ire_zoneid, NULL, MATCH_IRE_DSTONLY, 2295*11042SErik.Nordmark@Sun.COM B_TRUE, 0, ipst, NULL, NULL, NULL); 2296*11042SErik.Nordmark@Sun.COM if (ire2 != NULL) 2297*11042SErik.Nordmark@Sun.COM ire_refrele(ire2); 2298*11042SErik.Nordmark@Sun.COM ill1 = ire_nexthop_ill(ire1); 2299*11042SErik.Nordmark@Sun.COM } 2300*11042SErik.Nordmark@Sun.COM 2301*11042SErik.Nordmark@Sun.COM if (ill1 == NULL) { 2302*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2303*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - no ill", 2304*11042SErik.Nordmark@Sun.COM mp, ill); 2305*11042SErik.Nordmark@Sun.COM error = ENETUNREACH; 2306*11042SErik.Nordmark@Sun.COM continue; 2307*11042SErik.Nordmark@Sun.COM } 2308*11042SErik.Nordmark@Sun.COM 2309*11042SErik.Nordmark@Sun.COM /* Pick the addr and type to use for arp_nce_init */ 2310*11042SErik.Nordmark@Sun.COM if (nce->nce_common->ncec_flags & NCE_F_BCAST) { 2311*11042SErik.Nordmark@Sun.COM ire_type = IRE_BROADCAST; 2312*11042SErik.Nordmark@Sun.COM nexthop = ire1->ire_gateway_addr; 2313*11042SErik.Nordmark@Sun.COM } else if (nce->nce_common->ncec_flags & NCE_F_MCAST) { 2314*11042SErik.Nordmark@Sun.COM ire_type = IRE_MULTICAST; 2315*11042SErik.Nordmark@Sun.COM nexthop = ipha->ipha_dst; 2316*11042SErik.Nordmark@Sun.COM } else { 2317*11042SErik.Nordmark@Sun.COM ire_type = ire1->ire_type; /* Doesn't matter */ 2318*11042SErik.Nordmark@Sun.COM nexthop = ire1->ire_gateway_addr; 2319*11042SErik.Nordmark@Sun.COM } 2320*11042SErik.Nordmark@Sun.COM 2321*11042SErik.Nordmark@Sun.COM /* If IPMP meta or under, then we just drop */ 2322*11042SErik.Nordmark@Sun.COM if (ill1->ill_grp != NULL) { 2323*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards); 2324*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - IPMP", 2325*11042SErik.Nordmark@Sun.COM mp, ill1); 2326*11042SErik.Nordmark@Sun.COM ill_refrele(ill1); 2327*11042SErik.Nordmark@Sun.COM error = ENETUNREACH; 2328*11042SErik.Nordmark@Sun.COM continue; 2329*11042SErik.Nordmark@Sun.COM } 2330*11042SErik.Nordmark@Sun.COM 2331*11042SErik.Nordmark@Sun.COM nce1 = arp_nce_init(ill1, nexthop, ire_type); 2332*11042SErik.Nordmark@Sun.COM if (nce1 == NULL) { 2333*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards); 2334*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards - no nce", 2335*11042SErik.Nordmark@Sun.COM mp, ill1); 2336*11042SErik.Nordmark@Sun.COM ill_refrele(ill1); 2337*11042SErik.Nordmark@Sun.COM error = ENETUNREACH; 2338*11042SErik.Nordmark@Sun.COM continue; 2339*11042SErik.Nordmark@Sun.COM } 2340*11042SErik.Nordmark@Sun.COM mp1 = copymsg(mp); 2341*11042SErik.Nordmark@Sun.COM if (mp1 == NULL) { 2342*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards); 2343*11042SErik.Nordmark@Sun.COM ip_drop_output("ipIfStatsOutDiscards", mp, ill1); 2344*11042SErik.Nordmark@Sun.COM nce_refrele(nce1); 2345*11042SErik.Nordmark@Sun.COM ill_refrele(ill1); 2346*11042SErik.Nordmark@Sun.COM error = ENOBUFS; 2347*11042SErik.Nordmark@Sun.COM continue; 2348*11042SErik.Nordmark@Sun.COM } 2349*11042SErik.Nordmark@Sun.COM /* Preserve HW checksum for this copy */ 2350*11042SErik.Nordmark@Sun.COM DB_CKSUMSTART(mp1) = DB_CKSUMSTART(mp); 2351*11042SErik.Nordmark@Sun.COM DB_CKSUMSTUFF(mp1) = DB_CKSUMSTUFF(mp); 2352*11042SErik.Nordmark@Sun.COM DB_CKSUMEND(mp1) = DB_CKSUMEND(mp); 2353*11042SErik.Nordmark@Sun.COM DB_CKSUMFLAGS(mp1) = DB_CKSUMFLAGS(mp); 2354*11042SErik.Nordmark@Sun.COM DB_LSOMSS(mp1) = DB_LSOMSS(mp); 2355*11042SErik.Nordmark@Sun.COM 2356*11042SErik.Nordmark@Sun.COM ire1->ire_ob_pkt_count++; 2357*11042SErik.Nordmark@Sun.COM err = ip_xmit(mp1, nce1, ixaflags, pkt_len, xmit_hint, szone, 2358*11042SErik.Nordmark@Sun.COM 0, ixacookie); 2359*11042SErik.Nordmark@Sun.COM if (err == 0) 2360*11042SErik.Nordmark@Sun.COM num_sent++; 2361*11042SErik.Nordmark@Sun.COM else 2362*11042SErik.Nordmark@Sun.COM error = err; 2363*11042SErik.Nordmark@Sun.COM nce_refrele(nce1); 2364*11042SErik.Nordmark@Sun.COM ill_refrele(ill1); 2365*11042SErik.Nordmark@Sun.COM } 2366*11042SErik.Nordmark@Sun.COM irb_refrele(irb); 2367*11042SErik.Nordmark@Sun.COM ire_refrele(ire); 2368*11042SErik.Nordmark@Sun.COM /* Finally, the main one */ 2369*11042SErik.Nordmark@Sun.COM err = ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0, 2370*11042SErik.Nordmark@Sun.COM ixacookie); 2371*11042SErik.Nordmark@Sun.COM if (err == 0) 2372*11042SErik.Nordmark@Sun.COM num_sent++; 2373*11042SErik.Nordmark@Sun.COM else 2374*11042SErik.Nordmark@Sun.COM error = err; 2375*11042SErik.Nordmark@Sun.COM if (num_sent > 0) 2376*11042SErik.Nordmark@Sun.COM return (0); 2377*11042SErik.Nordmark@Sun.COM else 2378*11042SErik.Nordmark@Sun.COM return (error); 2379*11042SErik.Nordmark@Sun.COM } 2380*11042SErik.Nordmark@Sun.COM 2381*11042SErik.Nordmark@Sun.COM /* 2382*11042SErik.Nordmark@Sun.COM * Verify local connectivity. This check is called by ULP fusion code. 2383*11042SErik.Nordmark@Sun.COM * The generation number on an IRE_LOCAL or IRE_LOOPBACK only changes if 2384*11042SErik.Nordmark@Sun.COM * the interface is brought down and back up. So we simply fail the local 2385*11042SErik.Nordmark@Sun.COM * process. The caller, TCP Fusion, should unfuse the connection. 2386*11042SErik.Nordmark@Sun.COM */ 2387*11042SErik.Nordmark@Sun.COM boolean_t 2388*11042SErik.Nordmark@Sun.COM ip_output_verify_local(ip_xmit_attr_t *ixa) 2389*11042SErik.Nordmark@Sun.COM { 2390*11042SErik.Nordmark@Sun.COM ire_t *ire = ixa->ixa_ire; 2391*11042SErik.Nordmark@Sun.COM 2392*11042SErik.Nordmark@Sun.COM if (!(ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) 2393*11042SErik.Nordmark@Sun.COM return (B_FALSE); 2394*11042SErik.Nordmark@Sun.COM 2395*11042SErik.Nordmark@Sun.COM return (ixa->ixa_ire->ire_generation == ixa->ixa_ire_generation); 2396*11042SErik.Nordmark@Sun.COM } 2397*11042SErik.Nordmark@Sun.COM 2398*11042SErik.Nordmark@Sun.COM /* 2399*11042SErik.Nordmark@Sun.COM * Local process for ULP loopback, TCP Fusion. Handle both IPv4 and IPv6. 2400*11042SErik.Nordmark@Sun.COM * 2401*11042SErik.Nordmark@Sun.COM * The caller must call ip_output_verify_local() first. This function handles 2402*11042SErik.Nordmark@Sun.COM * IPobs, FW_HOOKS, and/or IPsec cases sequentially. 2403*11042SErik.Nordmark@Sun.COM */ 2404*11042SErik.Nordmark@Sun.COM mblk_t * 2405*11042SErik.Nordmark@Sun.COM ip_output_process_local(mblk_t *mp, ip_xmit_attr_t *ixa, boolean_t hooks_out, 2406*11042SErik.Nordmark@Sun.COM boolean_t hooks_in, conn_t *peer_connp) 2407*11042SErik.Nordmark@Sun.COM { 2408*11042SErik.Nordmark@Sun.COM ill_t *ill = ixa->ixa_ire->ire_ill; 2409*11042SErik.Nordmark@Sun.COM ipha_t *ipha = NULL; 2410*11042SErik.Nordmark@Sun.COM ip6_t *ip6h = NULL; 2411*11042SErik.Nordmark@Sun.COM ip_stack_t *ipst = ixa->ixa_ipst; 2412*11042SErik.Nordmark@Sun.COM iaflags_t ixaflags = ixa->ixa_flags; 2413*11042SErik.Nordmark@Sun.COM ip_recv_attr_t iras; 2414*11042SErik.Nordmark@Sun.COM int error; 2415*11042SErik.Nordmark@Sun.COM 2416*11042SErik.Nordmark@Sun.COM ASSERT(mp != NULL); 2417*11042SErik.Nordmark@Sun.COM 2418*11042SErik.Nordmark@Sun.COM if (ixaflags & IXAF_IS_IPV4) { 2419*11042SErik.Nordmark@Sun.COM ipha = (ipha_t *)mp->b_rptr; 2420*11042SErik.Nordmark@Sun.COM 2421*11042SErik.Nordmark@Sun.COM /* 2422*11042SErik.Nordmark@Sun.COM * If a callback is enabled then we need to know the 2423*11042SErik.Nordmark@Sun.COM * source and destination zoneids for the packet. We already 2424*11042SErik.Nordmark@Sun.COM * have those handy. 2425*11042SErik.Nordmark@Sun.COM */ 2426*11042SErik.Nordmark@Sun.COM if (ipst->ips_ip4_observe.he_interested) { 2427*11042SErik.Nordmark@Sun.COM zoneid_t szone, dzone; 2428*11042SErik.Nordmark@Sun.COM zoneid_t stackzoneid; 2429*11042SErik.Nordmark@Sun.COM 2430*11042SErik.Nordmark@Sun.COM stackzoneid = netstackid_to_zoneid( 2431*11042SErik.Nordmark@Sun.COM ipst->ips_netstack->netstack_stackid); 2432*11042SErik.Nordmark@Sun.COM 2433*11042SErik.Nordmark@Sun.COM if (stackzoneid == GLOBAL_ZONEID) { 2434*11042SErik.Nordmark@Sun.COM /* Shared-IP zone */ 2435*11042SErik.Nordmark@Sun.COM dzone = ixa->ixa_ire->ire_zoneid; 2436*11042SErik.Nordmark@Sun.COM szone = ixa->ixa_zoneid; 2437*11042SErik.Nordmark@Sun.COM } else { 2438*11042SErik.Nordmark@Sun.COM szone = dzone = stackzoneid; 2439*11042SErik.Nordmark@Sun.COM } 2440*11042SErik.Nordmark@Sun.COM ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 2441*11042SErik.Nordmark@Sun.COM ipst); 2442*11042SErik.Nordmark@Sun.COM } 2443*11042SErik.Nordmark@Sun.COM DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 2444*11042SErik.Nordmark@Sun.COM ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, 2445*11042SErik.Nordmark@Sun.COM NULL, int, 1); 2446*11042SErik.Nordmark@Sun.COM 2447*11042SErik.Nordmark@Sun.COM /* FW_HOOKS: LOOPBACK_OUT */ 2448*11042SErik.Nordmark@Sun.COM if (hooks_out) { 2449*11042SErik.Nordmark@Sun.COM DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL, 2450*11042SErik.Nordmark@Sun.COM ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 2451*11042SErik.Nordmark@Sun.COM FW_HOOKS(ipst->ips_ip4_loopback_out_event, 2452*11042SErik.Nordmark@Sun.COM ipst->ips_ipv4firewall_loopback_out, 2453*11042SErik.Nordmark@Sun.COM NULL, ill, ipha, mp, mp, 0, ipst, error); 2454*11042SErik.Nordmark@Sun.COM DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, mp); 2455*11042SErik.Nordmark@Sun.COM } 2456*11042SErik.Nordmark@Sun.COM if (mp == NULL) 2457*11042SErik.Nordmark@Sun.COM return (NULL); 2458*11042SErik.Nordmark@Sun.COM 2459*11042SErik.Nordmark@Sun.COM /* FW_HOOKS: LOOPBACK_IN */ 2460*11042SErik.Nordmark@Sun.COM if (hooks_in) { 2461*11042SErik.Nordmark@Sun.COM DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill, 2462*11042SErik.Nordmark@Sun.COM ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp); 2463*11042SErik.Nordmark@Sun.COM FW_HOOKS(ipst->ips_ip4_loopback_in_event, 2464*11042SErik.Nordmark@Sun.COM ipst->ips_ipv4firewall_loopback_in, 2465*11042SErik.Nordmark@Sun.COM ill, NULL, ipha, mp, mp, 0, ipst, error); 2466*11042SErik.Nordmark@Sun.COM DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, mp); 2467*11042SErik.Nordmark@Sun.COM } 2468*11042SErik.Nordmark@Sun.COM if (mp == NULL) 2469*11042SErik.Nordmark@Sun.COM return (NULL); 2470*11042SErik.Nordmark@Sun.COM 2471*11042SErik.Nordmark@Sun.COM DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 2472*11042SErik.Nordmark@Sun.COM ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, 2473*11042SErik.Nordmark@Sun.COM NULL, int, 1); 2474*11042SErik.Nordmark@Sun.COM 2475*11042SErik.Nordmark@Sun.COM /* Inbound IPsec polocies */ 2476*11042SErik.Nordmark@Sun.COM if (peer_connp != NULL) { 2477*11042SErik.Nordmark@Sun.COM /* Map ixa to ira including IPsec policies. */ 2478*11042SErik.Nordmark@Sun.COM ipsec_out_to_in(ixa, ill, &iras); 2479*11042SErik.Nordmark@Sun.COM mp = ipsec_check_inbound_policy(mp, peer_connp, ipha, 2480*11042SErik.Nordmark@Sun.COM NULL, &iras); 2481*11042SErik.Nordmark@Sun.COM } 2482*11042SErik.Nordmark@Sun.COM } else { 2483*11042SErik.Nordmark@Sun.COM ip6h = (ip6_t *)mp->b_rptr; 2484*11042SErik.Nordmark@Sun.COM 2485*11042SErik.Nordmark@Sun.COM /* 2486*11042SErik.Nordmark@Sun.COM * If a callback is enabled then we need to know the 2487*11042SErik.Nordmark@Sun.COM * source and destination zoneids for the packet. We already 2488*11042SErik.Nordmark@Sun.COM * have those handy. 2489*11042SErik.Nordmark@Sun.COM */ 2490*11042SErik.Nordmark@Sun.COM if (ipst->ips_ip6_observe.he_interested) { 2491*11042SErik.Nordmark@Sun.COM zoneid_t szone, dzone; 2492*11042SErik.Nordmark@Sun.COM zoneid_t stackzoneid; 2493*11042SErik.Nordmark@Sun.COM 2494*11042SErik.Nordmark@Sun.COM stackzoneid = netstackid_to_zoneid( 2495*11042SErik.Nordmark@Sun.COM ipst->ips_netstack->netstack_stackid); 2496*11042SErik.Nordmark@Sun.COM 2497*11042SErik.Nordmark@Sun.COM if (stackzoneid == GLOBAL_ZONEID) { 2498*11042SErik.Nordmark@Sun.COM /* Shared-IP zone */ 2499*11042SErik.Nordmark@Sun.COM dzone = ixa->ixa_ire->ire_zoneid; 2500*11042SErik.Nordmark@Sun.COM szone = ixa->ixa_zoneid; 2501*11042SErik.Nordmark@Sun.COM } else { 2502*11042SErik.Nordmark@Sun.COM szone = dzone = stackzoneid; 2503*11042SErik.Nordmark@Sun.COM } 2504*11042SErik.Nordmark@Sun.COM ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 2505*11042SErik.Nordmark@Sun.COM ipst); 2506*11042SErik.Nordmark@Sun.COM } 2507*11042SErik.Nordmark@Sun.COM DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 2508*11042SErik.Nordmark@Sun.COM ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, 2509*11042SErik.Nordmark@Sun.COM ip6h, int, 1); 2510*11042SErik.Nordmark@Sun.COM 2511*11042SErik.Nordmark@Sun.COM /* FW_HOOKS: LOOPBACK_OUT */ 2512*11042SErik.Nordmark@Sun.COM if (hooks_out) { 2513*11042SErik.Nordmark@Sun.COM DTRACE_PROBE4(ip6__loopback__out__start, ill_t *, NULL, 2514*11042SErik.Nordmark@Sun.COM ill_t *, ill, ip6_t *, ip6h, mblk_t *, mp); 2515*11042SErik.Nordmark@Sun.COM FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 2516*11042SErik.Nordmark@Sun.COM ipst->ips_ipv6firewall_loopback_out, 2517*11042SErik.Nordmark@Sun.COM NULL, ill, ip6h, mp, mp, 0, ipst, error); 2518*11042SErik.Nordmark@Sun.COM DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, mp); 2519*11042SErik.Nordmark@Sun.COM } 2520*11042SErik.Nordmark@Sun.COM if (mp == NULL) 2521*11042SErik.Nordmark@Sun.COM return (NULL); 2522*11042SErik.Nordmark@Sun.COM 2523*11042SErik.Nordmark@Sun.COM /* FW_HOOKS: LOOPBACK_IN */ 2524*11042SErik.Nordmark@Sun.COM if (hooks_in) { 2525*11042SErik.Nordmark@Sun.COM DTRACE_PROBE4(ip6__loopback__in__start, ill_t *, ill, 2526*11042SErik.Nordmark@Sun.COM ill_t *, NULL, ip6_t *, ip6h, mblk_t *, mp); 2527*11042SErik.Nordmark@Sun.COM FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 2528*11042SErik.Nordmark@Sun.COM ipst->ips_ipv6firewall_loopback_in, 2529*11042SErik.Nordmark@Sun.COM ill, NULL, ip6h, mp, mp, 0, ipst, error); 2530*11042SErik.Nordmark@Sun.COM DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, mp); 2531*11042SErik.Nordmark@Sun.COM } 2532*11042SErik.Nordmark@Sun.COM if (mp == NULL) 2533*11042SErik.Nordmark@Sun.COM return (NULL); 2534*11042SErik.Nordmark@Sun.COM 2535*11042SErik.Nordmark@Sun.COM DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 2536*11042SErik.Nordmark@Sun.COM ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, 2537*11042SErik.Nordmark@Sun.COM ip6h, int, 1); 2538*11042SErik.Nordmark@Sun.COM 2539*11042SErik.Nordmark@Sun.COM /* Inbound IPsec polocies */ 2540*11042SErik.Nordmark@Sun.COM if (peer_connp != NULL) { 2541*11042SErik.Nordmark@Sun.COM /* Map ixa to ira including IPsec policies. */ 2542*11042SErik.Nordmark@Sun.COM ipsec_out_to_in(ixa, ill, &iras); 2543*11042SErik.Nordmark@Sun.COM mp = ipsec_check_inbound_policy(mp, peer_connp, NULL, 2544*11042SErik.Nordmark@Sun.COM ip6h, &iras); 2545*11042SErik.Nordmark@Sun.COM } 2546*11042SErik.Nordmark@Sun.COM } 2547*11042SErik.Nordmark@Sun.COM 2548*11042SErik.Nordmark@Sun.COM if (mp == NULL) { 2549*11042SErik.Nordmark@Sun.COM BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2550*11042SErik.Nordmark@Sun.COM ip_drop_input("ipIfStatsInDiscards", NULL, ill); 2551*11042SErik.Nordmark@Sun.COM } 2552*11042SErik.Nordmark@Sun.COM 2553*11042SErik.Nordmark@Sun.COM return (mp); 2554*11042SErik.Nordmark@Sun.COM } 2555