xref: /onnv-gate/usr/src/uts/common/inet/ip/ip_output.c (revision 11042:2d6e217af1b4)
1*11042SErik.Nordmark@Sun.COM /*
2*11042SErik.Nordmark@Sun.COM  * CDDL HEADER START
3*11042SErik.Nordmark@Sun.COM  *
4*11042SErik.Nordmark@Sun.COM  * The contents of this file are subject to the terms of the
5*11042SErik.Nordmark@Sun.COM  * Common Development and Distribution License (the "License").
6*11042SErik.Nordmark@Sun.COM  * You may not use this file except in compliance with the License.
7*11042SErik.Nordmark@Sun.COM  *
8*11042SErik.Nordmark@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*11042SErik.Nordmark@Sun.COM  * or http://www.opensolaris.org/os/licensing.
10*11042SErik.Nordmark@Sun.COM  * See the License for the specific language governing permissions
11*11042SErik.Nordmark@Sun.COM  * and limitations under the License.
12*11042SErik.Nordmark@Sun.COM  *
13*11042SErik.Nordmark@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
14*11042SErik.Nordmark@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*11042SErik.Nordmark@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
16*11042SErik.Nordmark@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
17*11042SErik.Nordmark@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
18*11042SErik.Nordmark@Sun.COM  *
19*11042SErik.Nordmark@Sun.COM  * CDDL HEADER END
20*11042SErik.Nordmark@Sun.COM  */
21*11042SErik.Nordmark@Sun.COM 
22*11042SErik.Nordmark@Sun.COM /*
23*11042SErik.Nordmark@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24*11042SErik.Nordmark@Sun.COM  * Use is subject to license terms.
25*11042SErik.Nordmark@Sun.COM  */
26*11042SErik.Nordmark@Sun.COM /* Copyright (c) 1990 Mentat Inc. */
27*11042SErik.Nordmark@Sun.COM 
28*11042SErik.Nordmark@Sun.COM #include <sys/types.h>
29*11042SErik.Nordmark@Sun.COM #include <sys/stream.h>
30*11042SErik.Nordmark@Sun.COM #include <sys/strsubr.h>
31*11042SErik.Nordmark@Sun.COM #include <sys/dlpi.h>
32*11042SErik.Nordmark@Sun.COM #include <sys/strsun.h>
33*11042SErik.Nordmark@Sun.COM #include <sys/zone.h>
34*11042SErik.Nordmark@Sun.COM #include <sys/ddi.h>
35*11042SErik.Nordmark@Sun.COM #include <sys/sunddi.h>
36*11042SErik.Nordmark@Sun.COM #include <sys/cmn_err.h>
37*11042SErik.Nordmark@Sun.COM #include <sys/debug.h>
38*11042SErik.Nordmark@Sun.COM #include <sys/atomic.h>
39*11042SErik.Nordmark@Sun.COM 
40*11042SErik.Nordmark@Sun.COM #include <sys/systm.h>
41*11042SErik.Nordmark@Sun.COM #include <sys/param.h>
42*11042SErik.Nordmark@Sun.COM #include <sys/kmem.h>
43*11042SErik.Nordmark@Sun.COM #include <sys/sdt.h>
44*11042SErik.Nordmark@Sun.COM #include <sys/socket.h>
45*11042SErik.Nordmark@Sun.COM #include <sys/mac.h>
46*11042SErik.Nordmark@Sun.COM #include <net/if.h>
47*11042SErik.Nordmark@Sun.COM #include <net/if_arp.h>
48*11042SErik.Nordmark@Sun.COM #include <net/route.h>
49*11042SErik.Nordmark@Sun.COM #include <sys/sockio.h>
50*11042SErik.Nordmark@Sun.COM #include <netinet/in.h>
51*11042SErik.Nordmark@Sun.COM #include <net/if_dl.h>
52*11042SErik.Nordmark@Sun.COM 
53*11042SErik.Nordmark@Sun.COM #include <inet/common.h>
54*11042SErik.Nordmark@Sun.COM #include <inet/mi.h>
55*11042SErik.Nordmark@Sun.COM #include <inet/mib2.h>
56*11042SErik.Nordmark@Sun.COM #include <inet/nd.h>
57*11042SErik.Nordmark@Sun.COM #include <inet/arp.h>
58*11042SErik.Nordmark@Sun.COM #include <inet/snmpcom.h>
59*11042SErik.Nordmark@Sun.COM #include <inet/kstatcom.h>
60*11042SErik.Nordmark@Sun.COM 
61*11042SErik.Nordmark@Sun.COM #include <netinet/igmp_var.h>
62*11042SErik.Nordmark@Sun.COM #include <netinet/ip6.h>
63*11042SErik.Nordmark@Sun.COM #include <netinet/icmp6.h>
64*11042SErik.Nordmark@Sun.COM #include <netinet/sctp.h>
65*11042SErik.Nordmark@Sun.COM 
66*11042SErik.Nordmark@Sun.COM #include <inet/ip.h>
67*11042SErik.Nordmark@Sun.COM #include <inet/ip_impl.h>
68*11042SErik.Nordmark@Sun.COM #include <inet/ip6.h>
69*11042SErik.Nordmark@Sun.COM #include <inet/ip6_asp.h>
70*11042SErik.Nordmark@Sun.COM #include <inet/tcp.h>
71*11042SErik.Nordmark@Sun.COM #include <inet/ip_multi.h>
72*11042SErik.Nordmark@Sun.COM #include <inet/ip_if.h>
73*11042SErik.Nordmark@Sun.COM #include <inet/ip_ire.h>
74*11042SErik.Nordmark@Sun.COM #include <inet/ip_ftable.h>
75*11042SErik.Nordmark@Sun.COM #include <inet/ip_rts.h>
76*11042SErik.Nordmark@Sun.COM #include <inet/optcom.h>
77*11042SErik.Nordmark@Sun.COM #include <inet/ip_ndp.h>
78*11042SErik.Nordmark@Sun.COM #include <inet/ip_listutils.h>
79*11042SErik.Nordmark@Sun.COM #include <netinet/igmp.h>
80*11042SErik.Nordmark@Sun.COM #include <netinet/ip_mroute.h>
81*11042SErik.Nordmark@Sun.COM #include <inet/ipp_common.h>
82*11042SErik.Nordmark@Sun.COM 
83*11042SErik.Nordmark@Sun.COM #include <net/pfkeyv2.h>
84*11042SErik.Nordmark@Sun.COM #include <inet/sadb.h>
85*11042SErik.Nordmark@Sun.COM #include <inet/ipsec_impl.h>
86*11042SErik.Nordmark@Sun.COM #include <inet/ipdrop.h>
87*11042SErik.Nordmark@Sun.COM #include <inet/ip_netinfo.h>
88*11042SErik.Nordmark@Sun.COM 
89*11042SErik.Nordmark@Sun.COM #include <sys/pattr.h>
90*11042SErik.Nordmark@Sun.COM #include <inet/ipclassifier.h>
91*11042SErik.Nordmark@Sun.COM #include <inet/sctp_ip.h>
92*11042SErik.Nordmark@Sun.COM #include <inet/sctp/sctp_impl.h>
93*11042SErik.Nordmark@Sun.COM #include <inet/udp_impl.h>
94*11042SErik.Nordmark@Sun.COM #include <sys/sunddi.h>
95*11042SErik.Nordmark@Sun.COM 
96*11042SErik.Nordmark@Sun.COM #include <sys/tsol/label.h>
97*11042SErik.Nordmark@Sun.COM #include <sys/tsol/tnet.h>
98*11042SErik.Nordmark@Sun.COM 
99*11042SErik.Nordmark@Sun.COM #ifdef	DEBUG
100*11042SErik.Nordmark@Sun.COM extern boolean_t skip_sctp_cksum;
101*11042SErik.Nordmark@Sun.COM #endif
102*11042SErik.Nordmark@Sun.COM 
103*11042SErik.Nordmark@Sun.COM static int	ip_verify_nce(mblk_t *, ip_xmit_attr_t *);
104*11042SErik.Nordmark@Sun.COM static int	ip_verify_dce(mblk_t *, ip_xmit_attr_t *);
105*11042SErik.Nordmark@Sun.COM static boolean_t ip_verify_lso(ill_t *, ip_xmit_attr_t *);
106*11042SErik.Nordmark@Sun.COM static boolean_t ip_verify_zcopy(ill_t *, ip_xmit_attr_t *);
107*11042SErik.Nordmark@Sun.COM static void	ip_output_simple_broadcast(ip_xmit_attr_t *, mblk_t *);
108*11042SErik.Nordmark@Sun.COM 
109*11042SErik.Nordmark@Sun.COM /*
110*11042SErik.Nordmark@Sun.COM  * There are two types of output functions for IP used for different
111*11042SErik.Nordmark@Sun.COM  * purposes:
112*11042SErik.Nordmark@Sun.COM  *  - ip_output_simple() is when sending ICMP errors, TCP resets, etc when there
113*11042SErik.Nordmark@Sun.COM  *     is no context in the form of a conn_t. However, there is a
114*11042SErik.Nordmark@Sun.COM  *     ip_xmit_attr_t that the callers use to influence interface selection
115*11042SErik.Nordmark@Sun.COM  *     (needed for ICMP echo as well as IPv6 link-locals) and IPsec.
116*11042SErik.Nordmark@Sun.COM  *
117*11042SErik.Nordmark@Sun.COM  *  - conn_ip_output() is used when sending packets with a conn_t and
118*11042SErik.Nordmark@Sun.COM  *    ip_set_destination has been called to cache information. In that case
119*11042SErik.Nordmark@Sun.COM  *    various socket options are recorded in the ip_xmit_attr_t and should
120*11042SErik.Nordmark@Sun.COM  *    be taken into account.
121*11042SErik.Nordmark@Sun.COM  */
122*11042SErik.Nordmark@Sun.COM 
123*11042SErik.Nordmark@Sun.COM /*
124*11042SErik.Nordmark@Sun.COM  * The caller *must* have called conn_connect() or ip_attr_connect()
125*11042SErik.Nordmark@Sun.COM  * before calling conn_ip_output(). The caller needs to redo that each time
126*11042SErik.Nordmark@Sun.COM  * the destination IP address or port changes, as well as each time there is
127*11042SErik.Nordmark@Sun.COM  * a change to any socket option that would modify how packets are routed out
128*11042SErik.Nordmark@Sun.COM  * of the box (e.g., SO_DONTROUTE, IP_NEXTHOP, IP_BOUND_IF).
129*11042SErik.Nordmark@Sun.COM  *
130*11042SErik.Nordmark@Sun.COM  * The ULP caller has to serialize the use of a single ip_xmit_attr_t.
131*11042SErik.Nordmark@Sun.COM  * We assert for that here.
132*11042SErik.Nordmark@Sun.COM  */
133*11042SErik.Nordmark@Sun.COM int
134*11042SErik.Nordmark@Sun.COM conn_ip_output(mblk_t *mp, ip_xmit_attr_t *ixa)
135*11042SErik.Nordmark@Sun.COM {
136*11042SErik.Nordmark@Sun.COM 	iaflags_t	ixaflags = ixa->ixa_flags;
137*11042SErik.Nordmark@Sun.COM 	ire_t		*ire;
138*11042SErik.Nordmark@Sun.COM 	nce_t		*nce;
139*11042SErik.Nordmark@Sun.COM 	dce_t		*dce;
140*11042SErik.Nordmark@Sun.COM 	ill_t		*ill;
141*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
142*11042SErik.Nordmark@Sun.COM 	int		error;
143*11042SErik.Nordmark@Sun.COM 
144*11042SErik.Nordmark@Sun.COM 	/* We defer ipIfStatsHCOutRequests until an error or we have an ill */
145*11042SErik.Nordmark@Sun.COM 
146*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_ire != NULL);
147*11042SErik.Nordmark@Sun.COM 	/* Note there is no ixa_nce when reject and blackhole routes */
148*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_dce != NULL);	/* Could be default dce */
149*11042SErik.Nordmark@Sun.COM 
150*11042SErik.Nordmark@Sun.COM #ifdef DEBUG
151*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_curthread == NULL);
152*11042SErik.Nordmark@Sun.COM 	ixa->ixa_curthread = curthread;
153*11042SErik.Nordmark@Sun.COM #endif
154*11042SErik.Nordmark@Sun.COM 
155*11042SErik.Nordmark@Sun.COM 	/*
156*11042SErik.Nordmark@Sun.COM 	 * Even on labeled systems we can have a NULL ixa_tsl e.g.,
157*11042SErik.Nordmark@Sun.COM 	 * for IGMP/MLD traffic.
158*11042SErik.Nordmark@Sun.COM 	 */
159*11042SErik.Nordmark@Sun.COM 
160*11042SErik.Nordmark@Sun.COM 	ire = ixa->ixa_ire;
161*11042SErik.Nordmark@Sun.COM 
162*11042SErik.Nordmark@Sun.COM 	/*
163*11042SErik.Nordmark@Sun.COM 	 * If the ULP says the (old) IRE resulted in reachability we
164*11042SErik.Nordmark@Sun.COM 	 * record this before determine whether to use a new IRE.
165*11042SErik.Nordmark@Sun.COM 	 * No locking for performance reasons.
166*11042SErik.Nordmark@Sun.COM 	 */
167*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_REACH_CONF)
168*11042SErik.Nordmark@Sun.COM 		ire->ire_badcnt = 0;
169*11042SErik.Nordmark@Sun.COM 
170*11042SErik.Nordmark@Sun.COM 	/*
171*11042SErik.Nordmark@Sun.COM 	 * Has routing changed since we cached the results of the lookup?
172*11042SErik.Nordmark@Sun.COM 	 *
173*11042SErik.Nordmark@Sun.COM 	 * This check captures all of:
174*11042SErik.Nordmark@Sun.COM 	 *  - the cached ire being deleted (by means of the special
175*11042SErik.Nordmark@Sun.COM 	 *    IRE_GENERATION_CONDEMNED)
176*11042SErik.Nordmark@Sun.COM 	 *  - A potentially better ire being added (ire_generation being
177*11042SErik.Nordmark@Sun.COM 	 *    increased)
178*11042SErik.Nordmark@Sun.COM 	 *  - A deletion of the nexthop ire that was used when we did the
179*11042SErik.Nordmark@Sun.COM 	 *    lookup.
180*11042SErik.Nordmark@Sun.COM 	 *  - An addition of a potentially better nexthop ire.
181*11042SErik.Nordmark@Sun.COM 	 * The last two are handled by walking and increasing the generation
182*11042SErik.Nordmark@Sun.COM 	 * number on all dependant IREs in ire_flush_cache().
183*11042SErik.Nordmark@Sun.COM 	 *
184*11042SErik.Nordmark@Sun.COM 	 * The check also handles all cases of RTF_REJECT and RTF_BLACKHOLE
185*11042SErik.Nordmark@Sun.COM 	 * since we ensure that each time we set ixa_ire to such an IRE we
186*11042SErik.Nordmark@Sun.COM 	 * make sure the ixa_ire_generation does not match (by using
187*11042SErik.Nordmark@Sun.COM 	 * IRE_GENERATION_VERIFY).
188*11042SErik.Nordmark@Sun.COM 	 */
189*11042SErik.Nordmark@Sun.COM 	if (ire->ire_generation != ixa->ixa_ire_generation) {
190*11042SErik.Nordmark@Sun.COM 		error = ip_verify_ire(mp, ixa);
191*11042SErik.Nordmark@Sun.COM 		if (error != 0) {
192*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - verify ire",
193*11042SErik.Nordmark@Sun.COM 			    mp, NULL);
194*11042SErik.Nordmark@Sun.COM 			goto drop;
195*11042SErik.Nordmark@Sun.COM 		}
196*11042SErik.Nordmark@Sun.COM 		ire = ixa->ixa_ire;
197*11042SErik.Nordmark@Sun.COM 		ASSERT(ire != NULL);
198*11042SErik.Nordmark@Sun.COM 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
199*11042SErik.Nordmark@Sun.COM #ifdef DEBUG
200*11042SErik.Nordmark@Sun.COM 			ASSERT(ixa->ixa_curthread == curthread);
201*11042SErik.Nordmark@Sun.COM 			ixa->ixa_curthread = NULL;
202*11042SErik.Nordmark@Sun.COM #endif
203*11042SErik.Nordmark@Sun.COM 			ire->ire_ob_pkt_count++;
204*11042SErik.Nordmark@Sun.COM 			/* ixa_dce might be condemned; use default one */
205*11042SErik.Nordmark@Sun.COM 			return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, ixa,
206*11042SErik.Nordmark@Sun.COM 			    &ipst->ips_dce_default->dce_ident));
207*11042SErik.Nordmark@Sun.COM 		}
208*11042SErik.Nordmark@Sun.COM 		/*
209*11042SErik.Nordmark@Sun.COM 		 * If the ncec changed then ip_verify_ire already set
210*11042SErik.Nordmark@Sun.COM 		 * ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
211*11042SErik.Nordmark@Sun.COM 		 * so we can recheck the interface mtu.
212*11042SErik.Nordmark@Sun.COM 		 */
213*11042SErik.Nordmark@Sun.COM 
214*11042SErik.Nordmark@Sun.COM 		/*
215*11042SErik.Nordmark@Sun.COM 		 * Note that ire->ire_generation could already have changed.
216*11042SErik.Nordmark@Sun.COM 		 * We catch that next time we send a packet.
217*11042SErik.Nordmark@Sun.COM 		 */
218*11042SErik.Nordmark@Sun.COM 	}
219*11042SErik.Nordmark@Sun.COM 
220*11042SErik.Nordmark@Sun.COM 	/*
221*11042SErik.Nordmark@Sun.COM 	 * No need to lock access to ixa_nce since the ip_xmit_attr usage
222*11042SErik.Nordmark@Sun.COM 	 * is single threaded.
223*11042SErik.Nordmark@Sun.COM 	 */
224*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_nce != NULL);
225*11042SErik.Nordmark@Sun.COM 	nce = ixa->ixa_nce;
226*11042SErik.Nordmark@Sun.COM 	if (nce->nce_is_condemned) {
227*11042SErik.Nordmark@Sun.COM 		error = ip_verify_nce(mp, ixa);
228*11042SErik.Nordmark@Sun.COM 		/*
229*11042SErik.Nordmark@Sun.COM 		 * In case ZEROCOPY capability become not available, we
230*11042SErik.Nordmark@Sun.COM 		 * copy the message and free the original one. We might
231*11042SErik.Nordmark@Sun.COM 		 * be copying more data than needed but it doesn't hurt
232*11042SErik.Nordmark@Sun.COM 		 * since such change rarely happens.
233*11042SErik.Nordmark@Sun.COM 		 */
234*11042SErik.Nordmark@Sun.COM 		switch (error) {
235*11042SErik.Nordmark@Sun.COM 		case 0:
236*11042SErik.Nordmark@Sun.COM 			break;
237*11042SErik.Nordmark@Sun.COM 		case ENOTSUP: { /* ZEROCOPY */
238*11042SErik.Nordmark@Sun.COM 			mblk_t *nmp;
239*11042SErik.Nordmark@Sun.COM 
240*11042SErik.Nordmark@Sun.COM 			if ((nmp = copymsg(mp)) != NULL) {
241*11042SErik.Nordmark@Sun.COM 				freemsg(mp);
242*11042SErik.Nordmark@Sun.COM 				mp = nmp;
243*11042SErik.Nordmark@Sun.COM 
244*11042SErik.Nordmark@Sun.COM 				break;
245*11042SErik.Nordmark@Sun.COM 			}
246*11042SErik.Nordmark@Sun.COM 			/* FALLTHROUGH */
247*11042SErik.Nordmark@Sun.COM 		}
248*11042SErik.Nordmark@Sun.COM 		default:
249*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - verify nce",
250*11042SErik.Nordmark@Sun.COM 			    mp, NULL);
251*11042SErik.Nordmark@Sun.COM 			goto drop;
252*11042SErik.Nordmark@Sun.COM 		}
253*11042SErik.Nordmark@Sun.COM 		ire = ixa->ixa_ire;
254*11042SErik.Nordmark@Sun.COM 		ASSERT(ire != NULL);
255*11042SErik.Nordmark@Sun.COM 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
256*11042SErik.Nordmark@Sun.COM #ifdef DEBUG
257*11042SErik.Nordmark@Sun.COM 			ASSERT(ixa->ixa_curthread == curthread);
258*11042SErik.Nordmark@Sun.COM 			ixa->ixa_curthread = NULL;
259*11042SErik.Nordmark@Sun.COM #endif
260*11042SErik.Nordmark@Sun.COM 			ire->ire_ob_pkt_count++;
261*11042SErik.Nordmark@Sun.COM 			/* ixa_dce might be condemned; use default one */
262*11042SErik.Nordmark@Sun.COM 			return ((ire->ire_sendfn)(ire, mp, mp->b_rptr,
263*11042SErik.Nordmark@Sun.COM 			    ixa, &ipst->ips_dce_default->dce_ident));
264*11042SErik.Nordmark@Sun.COM 		}
265*11042SErik.Nordmark@Sun.COM 		ASSERT(ixa->ixa_nce != NULL);
266*11042SErik.Nordmark@Sun.COM 		nce = ixa->ixa_nce;
267*11042SErik.Nordmark@Sun.COM 
268*11042SErik.Nordmark@Sun.COM 		/*
269*11042SErik.Nordmark@Sun.COM 		 * Note that some other event could already have made
270*11042SErik.Nordmark@Sun.COM 		 * the new nce condemned. We catch that next time we
271*11042SErik.Nordmark@Sun.COM 		 * try to send a packet.
272*11042SErik.Nordmark@Sun.COM 		 */
273*11042SErik.Nordmark@Sun.COM 	}
274*11042SErik.Nordmark@Sun.COM 	/*
275*11042SErik.Nordmark@Sun.COM 	 * If there is no per-destination dce_t then we have a reference to
276*11042SErik.Nordmark@Sun.COM 	 * the default dce_t (which merely contains the dce_ipid).
277*11042SErik.Nordmark@Sun.COM 	 * The generation check captures both the introduction of a
278*11042SErik.Nordmark@Sun.COM 	 * per-destination dce_t (e.g., due to ICMP packet too big) and
279*11042SErik.Nordmark@Sun.COM 	 * any change to the per-destination dce (including it becoming
280*11042SErik.Nordmark@Sun.COM 	 * condemned by use of the special DCE_GENERATION_CONDEMNED).
281*11042SErik.Nordmark@Sun.COM 	 */
282*11042SErik.Nordmark@Sun.COM 	dce = ixa->ixa_dce;
283*11042SErik.Nordmark@Sun.COM 
284*11042SErik.Nordmark@Sun.COM 	/*
285*11042SErik.Nordmark@Sun.COM 	 * To avoid a periodic timer to increase the path MTU we
286*11042SErik.Nordmark@Sun.COM 	 * look at dce_last_change_time each time we send a packet.
287*11042SErik.Nordmark@Sun.COM 	 */
288*11042SErik.Nordmark@Sun.COM 	if ((dce->dce_flags & DCEF_PMTU) &&
289*11042SErik.Nordmark@Sun.COM 	    (TICK_TO_SEC(lbolt64) - dce->dce_last_change_time >
290*11042SErik.Nordmark@Sun.COM 	    ipst->ips_ip_pathmtu_interval)) {
291*11042SErik.Nordmark@Sun.COM 		/*
292*11042SErik.Nordmark@Sun.COM 		 * Older than 20 minutes. Drop the path MTU information.
293*11042SErik.Nordmark@Sun.COM 		 * Since the path MTU changes as a result of this, twiddle
294*11042SErik.Nordmark@Sun.COM 		 * ixa_dce_generation to make us go through the dce
295*11042SErik.Nordmark@Sun.COM 		 * verification code in conn_ip_output.
296*11042SErik.Nordmark@Sun.COM 		 */
297*11042SErik.Nordmark@Sun.COM 		mutex_enter(&dce->dce_lock);
298*11042SErik.Nordmark@Sun.COM 		dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU);
299*11042SErik.Nordmark@Sun.COM 		dce->dce_last_change_time = TICK_TO_SEC(lbolt64);
300*11042SErik.Nordmark@Sun.COM 		mutex_exit(&dce->dce_lock);
301*11042SErik.Nordmark@Sun.COM 		dce_increment_generation(dce);
302*11042SErik.Nordmark@Sun.COM 	}
303*11042SErik.Nordmark@Sun.COM 
304*11042SErik.Nordmark@Sun.COM 	if (dce->dce_generation != ixa->ixa_dce_generation) {
305*11042SErik.Nordmark@Sun.COM 		error = ip_verify_dce(mp, ixa);
306*11042SErik.Nordmark@Sun.COM 		if (error != 0) {
307*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - verify dce",
308*11042SErik.Nordmark@Sun.COM 			    mp, NULL);
309*11042SErik.Nordmark@Sun.COM 			goto drop;
310*11042SErik.Nordmark@Sun.COM 		}
311*11042SErik.Nordmark@Sun.COM 		dce = ixa->ixa_dce;
312*11042SErik.Nordmark@Sun.COM 
313*11042SErik.Nordmark@Sun.COM 		/*
314*11042SErik.Nordmark@Sun.COM 		 * Note that some other event could already have made the
315*11042SErik.Nordmark@Sun.COM 		 * new dce's generation number change.
316*11042SErik.Nordmark@Sun.COM 		 * We catch that next time we try to send a packet.
317*11042SErik.Nordmark@Sun.COM 		 */
318*11042SErik.Nordmark@Sun.COM 	}
319*11042SErik.Nordmark@Sun.COM 
320*11042SErik.Nordmark@Sun.COM 	ill = nce->nce_ill;
321*11042SErik.Nordmark@Sun.COM 
322*11042SErik.Nordmark@Sun.COM 	/*
323*11042SErik.Nordmark@Sun.COM 	 * An initial ixa_fragsize was set in ip_set_destination
324*11042SErik.Nordmark@Sun.COM 	 * and we update it if any routing changes above.
325*11042SErik.Nordmark@Sun.COM 	 * A change to ill_mtu with ifconfig will increase all dce_generation
326*11042SErik.Nordmark@Sun.COM 	 * so that we will detect that with the generation check.
327*11042SErik.Nordmark@Sun.COM 	 */
328*11042SErik.Nordmark@Sun.COM 
329*11042SErik.Nordmark@Sun.COM 	/*
330*11042SErik.Nordmark@Sun.COM 	 * Caller needs to make sure IXAF_VERIFY_SRC is not set if
331*11042SErik.Nordmark@Sun.COM 	 * conn_unspec_src.
332*11042SErik.Nordmark@Sun.COM 	 */
333*11042SErik.Nordmark@Sun.COM 	if ((ixaflags & IXAF_VERIFY_SOURCE) &&
334*11042SErik.Nordmark@Sun.COM 	    ixa->ixa_src_generation != ipst->ips_src_generation) {
335*11042SErik.Nordmark@Sun.COM 		/* Check if the IP source is still assigned to the host. */
336*11042SErik.Nordmark@Sun.COM 		uint_t gen;
337*11042SErik.Nordmark@Sun.COM 
338*11042SErik.Nordmark@Sun.COM 		if (!ip_verify_src(mp, ixa, &gen)) {
339*11042SErik.Nordmark@Sun.COM 			/* Don't send a packet with a source that isn't ours */
340*11042SErik.Nordmark@Sun.COM 			error = EADDRNOTAVAIL;
341*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - invalid src",
342*11042SErik.Nordmark@Sun.COM 			    mp, NULL);
343*11042SErik.Nordmark@Sun.COM 			goto drop;
344*11042SErik.Nordmark@Sun.COM 		}
345*11042SErik.Nordmark@Sun.COM 		/* The source is still valid - update the generation number */
346*11042SErik.Nordmark@Sun.COM 		ixa->ixa_src_generation = gen;
347*11042SErik.Nordmark@Sun.COM 	}
348*11042SErik.Nordmark@Sun.COM 
349*11042SErik.Nordmark@Sun.COM 	/*
350*11042SErik.Nordmark@Sun.COM 	 * We don't have an IRE when we fragment, hence ire_ob_pkt_count
351*11042SErik.Nordmark@Sun.COM 	 * can only count the use prior to fragmentation. However the MIB
352*11042SErik.Nordmark@Sun.COM 	 * counters on the ill will be incremented in post fragmentation.
353*11042SErik.Nordmark@Sun.COM 	 */
354*11042SErik.Nordmark@Sun.COM 	ire->ire_ob_pkt_count++;
355*11042SErik.Nordmark@Sun.COM 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
356*11042SErik.Nordmark@Sun.COM 
357*11042SErik.Nordmark@Sun.COM 	/*
358*11042SErik.Nordmark@Sun.COM 	 * Based on ire_type and ire_flags call one of:
359*11042SErik.Nordmark@Sun.COM 	 *	ire_send_local_v* - for IRE_LOCAL and IRE_LOOPBACK
360*11042SErik.Nordmark@Sun.COM 	 *	ire_send_multirt_v* - if RTF_MULTIRT
361*11042SErik.Nordmark@Sun.COM 	 *	ire_send_noroute_v* - if RTF_REJECT or RTF_BLACHOLE
362*11042SErik.Nordmark@Sun.COM 	 *	ire_send_multicast_v* - for IRE_MULTICAST
363*11042SErik.Nordmark@Sun.COM 	 *	ire_send_broadcast_v4 - for IRE_BROADCAST
364*11042SErik.Nordmark@Sun.COM 	 *	ire_send_wire_v* - for the rest.
365*11042SErik.Nordmark@Sun.COM 	 */
366*11042SErik.Nordmark@Sun.COM #ifdef DEBUG
367*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_curthread == curthread);
368*11042SErik.Nordmark@Sun.COM 	ixa->ixa_curthread = NULL;
369*11042SErik.Nordmark@Sun.COM #endif
370*11042SErik.Nordmark@Sun.COM 	return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, ixa, &dce->dce_ident));
371*11042SErik.Nordmark@Sun.COM 
372*11042SErik.Nordmark@Sun.COM drop:
373*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_IS_IPV4) {
374*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
375*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
376*11042SErik.Nordmark@Sun.COM 	} else {
377*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsHCOutRequests);
378*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards);
379*11042SErik.Nordmark@Sun.COM 	}
380*11042SErik.Nordmark@Sun.COM 	freemsg(mp);
381*11042SErik.Nordmark@Sun.COM #ifdef DEBUG
382*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_curthread == curthread);
383*11042SErik.Nordmark@Sun.COM 	ixa->ixa_curthread = NULL;
384*11042SErik.Nordmark@Sun.COM #endif
385*11042SErik.Nordmark@Sun.COM 	return (error);
386*11042SErik.Nordmark@Sun.COM }
387*11042SErik.Nordmark@Sun.COM 
388*11042SErik.Nordmark@Sun.COM /*
389*11042SErik.Nordmark@Sun.COM  * Handle both IPv4 and IPv6. Sets the generation number
390*11042SErik.Nordmark@Sun.COM  * to allow the caller to know when to call us again.
391*11042SErik.Nordmark@Sun.COM  * Returns true if the source address in the packet is a valid source.
392*11042SErik.Nordmark@Sun.COM  * We handle callers which try to send with a zero address (since we only
393*11042SErik.Nordmark@Sun.COM  * get here if UNSPEC_SRC is not set).
394*11042SErik.Nordmark@Sun.COM  */
395*11042SErik.Nordmark@Sun.COM boolean_t
396*11042SErik.Nordmark@Sun.COM ip_verify_src(mblk_t *mp, ip_xmit_attr_t *ixa, uint_t *generationp)
397*11042SErik.Nordmark@Sun.COM {
398*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
399*11042SErik.Nordmark@Sun.COM 
400*11042SErik.Nordmark@Sun.COM 	/*
401*11042SErik.Nordmark@Sun.COM 	 * Need to grab the generation number before we check to
402*11042SErik.Nordmark@Sun.COM 	 * avoid a race with a change to the set of local addresses.
403*11042SErik.Nordmark@Sun.COM 	 * No lock needed since the thread which updates the set of local
404*11042SErik.Nordmark@Sun.COM 	 * addresses use ipif/ill locks and exit those (hence a store memory
405*11042SErik.Nordmark@Sun.COM 	 * barrier) before doing the atomic increase of ips_src_generation.
406*11042SErik.Nordmark@Sun.COM 	 */
407*11042SErik.Nordmark@Sun.COM 	if (generationp != NULL)
408*11042SErik.Nordmark@Sun.COM 		*generationp = ipst->ips_src_generation;
409*11042SErik.Nordmark@Sun.COM 
410*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
411*11042SErik.Nordmark@Sun.COM 		ipha_t	*ipha = (ipha_t *)mp->b_rptr;
412*11042SErik.Nordmark@Sun.COM 
413*11042SErik.Nordmark@Sun.COM 		if (ipha->ipha_src == INADDR_ANY)
414*11042SErik.Nordmark@Sun.COM 			return (B_FALSE);
415*11042SErik.Nordmark@Sun.COM 
416*11042SErik.Nordmark@Sun.COM 		return (ip_laddr_verify_v4(ipha->ipha_src, ixa->ixa_zoneid,
417*11042SErik.Nordmark@Sun.COM 		    ipst, B_FALSE) != IPVL_BAD);
418*11042SErik.Nordmark@Sun.COM 	} else {
419*11042SErik.Nordmark@Sun.COM 		ip6_t	*ip6h = (ip6_t *)mp->b_rptr;
420*11042SErik.Nordmark@Sun.COM 		uint_t	scopeid;
421*11042SErik.Nordmark@Sun.COM 
422*11042SErik.Nordmark@Sun.COM 		if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src))
423*11042SErik.Nordmark@Sun.COM 			return (B_FALSE);
424*11042SErik.Nordmark@Sun.COM 
425*11042SErik.Nordmark@Sun.COM 		if (ixa->ixa_flags & IXAF_SCOPEID_SET)
426*11042SErik.Nordmark@Sun.COM 			scopeid = ixa->ixa_scopeid;
427*11042SErik.Nordmark@Sun.COM 		else
428*11042SErik.Nordmark@Sun.COM 			scopeid = 0;
429*11042SErik.Nordmark@Sun.COM 
430*11042SErik.Nordmark@Sun.COM 		return (ip_laddr_verify_v6(&ip6h->ip6_src, ixa->ixa_zoneid,
431*11042SErik.Nordmark@Sun.COM 		    ipst, B_FALSE, scopeid) != IPVL_BAD);
432*11042SErik.Nordmark@Sun.COM 	}
433*11042SErik.Nordmark@Sun.COM }
434*11042SErik.Nordmark@Sun.COM 
435*11042SErik.Nordmark@Sun.COM /*
436*11042SErik.Nordmark@Sun.COM  * Handle both IPv4 and IPv6. Reverify/recalculate the IRE to use.
437*11042SErik.Nordmark@Sun.COM  */
438*11042SErik.Nordmark@Sun.COM int
439*11042SErik.Nordmark@Sun.COM ip_verify_ire(mblk_t *mp, ip_xmit_attr_t *ixa)
440*11042SErik.Nordmark@Sun.COM {
441*11042SErik.Nordmark@Sun.COM 	uint_t		gen;
442*11042SErik.Nordmark@Sun.COM 	ire_t		*ire;
443*11042SErik.Nordmark@Sun.COM 	nce_t		*nce;
444*11042SErik.Nordmark@Sun.COM 	int		error;
445*11042SErik.Nordmark@Sun.COM 	boolean_t	multirt = B_FALSE;
446*11042SErik.Nordmark@Sun.COM 
447*11042SErik.Nordmark@Sun.COM 	/*
448*11042SErik.Nordmark@Sun.COM 	 * Redo ip_select_route.
449*11042SErik.Nordmark@Sun.COM 	 * Need to grab generation number as part of the lookup to
450*11042SErik.Nordmark@Sun.COM 	 * avoid race.
451*11042SErik.Nordmark@Sun.COM 	 */
452*11042SErik.Nordmark@Sun.COM 	error = 0;
453*11042SErik.Nordmark@Sun.COM 	ire = ip_select_route_pkt(mp, ixa, &gen, &error, &multirt);
454*11042SErik.Nordmark@Sun.COM 	ASSERT(ire != NULL); /* IRE_NOROUTE if none found */
455*11042SErik.Nordmark@Sun.COM 	if (error != 0) {
456*11042SErik.Nordmark@Sun.COM 		ire_refrele(ire);
457*11042SErik.Nordmark@Sun.COM 		return (error);
458*11042SErik.Nordmark@Sun.COM 	}
459*11042SErik.Nordmark@Sun.COM 
460*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_ire != NULL)
461*11042SErik.Nordmark@Sun.COM 		ire_refrele_notr(ixa->ixa_ire);
462*11042SErik.Nordmark@Sun.COM #ifdef DEBUG
463*11042SErik.Nordmark@Sun.COM 	ire_refhold_notr(ire);
464*11042SErik.Nordmark@Sun.COM 	ire_refrele(ire);
465*11042SErik.Nordmark@Sun.COM #endif
466*11042SErik.Nordmark@Sun.COM 	ixa->ixa_ire = ire;
467*11042SErik.Nordmark@Sun.COM 	ixa->ixa_ire_generation = gen;
468*11042SErik.Nordmark@Sun.COM 	if (multirt) {
469*11042SErik.Nordmark@Sun.COM 		if (ixa->ixa_flags & IXAF_IS_IPV4)
470*11042SErik.Nordmark@Sun.COM 			ixa->ixa_postfragfn = ip_postfrag_multirt_v4;
471*11042SErik.Nordmark@Sun.COM 		else
472*11042SErik.Nordmark@Sun.COM 			ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
473*11042SErik.Nordmark@Sun.COM 		ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
474*11042SErik.Nordmark@Sun.COM 	} else {
475*11042SErik.Nordmark@Sun.COM 		ixa->ixa_postfragfn = ire->ire_postfragfn;
476*11042SErik.Nordmark@Sun.COM 		ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
477*11042SErik.Nordmark@Sun.COM 	}
478*11042SErik.Nordmark@Sun.COM 
479*11042SErik.Nordmark@Sun.COM 	/*
480*11042SErik.Nordmark@Sun.COM 	 * Don't look for an nce for reject or blackhole.
481*11042SErik.Nordmark@Sun.COM 	 * They have ire_generation set to IRE_GENERATION_VERIFY which
482*11042SErik.Nordmark@Sun.COM 	 * makes conn_ip_output avoid references to ixa_nce.
483*11042SErik.Nordmark@Sun.COM 	 */
484*11042SErik.Nordmark@Sun.COM 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
485*11042SErik.Nordmark@Sun.COM 		ASSERT(ixa->ixa_ire_generation == IRE_GENERATION_VERIFY);
486*11042SErik.Nordmark@Sun.COM 		ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
487*11042SErik.Nordmark@Sun.COM 		return (0);
488*11042SErik.Nordmark@Sun.COM 	}
489*11042SErik.Nordmark@Sun.COM 
490*11042SErik.Nordmark@Sun.COM 	/* The NCE could now be different */
491*11042SErik.Nordmark@Sun.COM 	nce = ire_to_nce_pkt(ire, mp);
492*11042SErik.Nordmark@Sun.COM 	if (nce == NULL) {
493*11042SErik.Nordmark@Sun.COM 		/*
494*11042SErik.Nordmark@Sun.COM 		 * Allocation failure. Make sure we redo ire/nce selection
495*11042SErik.Nordmark@Sun.COM 		 * next time we send.
496*11042SErik.Nordmark@Sun.COM 		 */
497*11042SErik.Nordmark@Sun.COM 		ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
498*11042SErik.Nordmark@Sun.COM 		ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
499*11042SErik.Nordmark@Sun.COM 		return (ENOBUFS);
500*11042SErik.Nordmark@Sun.COM 	}
501*11042SErik.Nordmark@Sun.COM 	if (nce == ixa->ixa_nce) {
502*11042SErik.Nordmark@Sun.COM 		/* No change */
503*11042SErik.Nordmark@Sun.COM 		nce_refrele(nce);
504*11042SErik.Nordmark@Sun.COM 		return (0);
505*11042SErik.Nordmark@Sun.COM 	}
506*11042SErik.Nordmark@Sun.COM 
507*11042SErik.Nordmark@Sun.COM 	/*
508*11042SErik.Nordmark@Sun.COM 	 * Since the path MTU might change as a result of this
509*11042SErik.Nordmark@Sun.COM 	 * route change, we twiddle ixa_dce_generation to
510*11042SErik.Nordmark@Sun.COM 	 * make conn_ip_output go through the ip_verify_dce code.
511*11042SErik.Nordmark@Sun.COM 	 */
512*11042SErik.Nordmark@Sun.COM 	ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
513*11042SErik.Nordmark@Sun.COM 
514*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_nce != NULL)
515*11042SErik.Nordmark@Sun.COM 		nce_refrele(ixa->ixa_nce);
516*11042SErik.Nordmark@Sun.COM 	ixa->ixa_nce = nce;
517*11042SErik.Nordmark@Sun.COM 	return (0);
518*11042SErik.Nordmark@Sun.COM }
519*11042SErik.Nordmark@Sun.COM 
520*11042SErik.Nordmark@Sun.COM /*
521*11042SErik.Nordmark@Sun.COM  * Handle both IPv4 and IPv6. Reverify/recalculate the NCE to use.
522*11042SErik.Nordmark@Sun.COM  */
523*11042SErik.Nordmark@Sun.COM static int
524*11042SErik.Nordmark@Sun.COM ip_verify_nce(mblk_t *mp, ip_xmit_attr_t *ixa)
525*11042SErik.Nordmark@Sun.COM {
526*11042SErik.Nordmark@Sun.COM 	ire_t		*ire = ixa->ixa_ire;
527*11042SErik.Nordmark@Sun.COM 	nce_t		*nce;
528*11042SErik.Nordmark@Sun.COM 	int		error = 0;
529*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha = NULL;
530*11042SErik.Nordmark@Sun.COM 	ip6_t		*ip6h = NULL;
531*11042SErik.Nordmark@Sun.COM 
532*11042SErik.Nordmark@Sun.COM 	if (ire->ire_ipversion == IPV4_VERSION)
533*11042SErik.Nordmark@Sun.COM 		ipha = (ipha_t *)mp->b_rptr;
534*11042SErik.Nordmark@Sun.COM 	else
535*11042SErik.Nordmark@Sun.COM 		ip6h = (ip6_t *)mp->b_rptr;
536*11042SErik.Nordmark@Sun.COM 
537*11042SErik.Nordmark@Sun.COM 	nce = ire_handle_condemned_nce(ixa->ixa_nce, ire, ipha, ip6h, B_TRUE);
538*11042SErik.Nordmark@Sun.COM 	if (nce == NULL) {
539*11042SErik.Nordmark@Sun.COM 		/* Try to find a better ire */
540*11042SErik.Nordmark@Sun.COM 		return (ip_verify_ire(mp, ixa));
541*11042SErik.Nordmark@Sun.COM 	}
542*11042SErik.Nordmark@Sun.COM 
543*11042SErik.Nordmark@Sun.COM 	/*
544*11042SErik.Nordmark@Sun.COM 	 * The hardware offloading capabilities, for example LSO, of the
545*11042SErik.Nordmark@Sun.COM 	 * interface might have changed, so do sanity verification here.
546*11042SErik.Nordmark@Sun.COM 	 */
547*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_flags & IXAF_VERIFY_LSO) {
548*11042SErik.Nordmark@Sun.COM 		if (!ip_verify_lso(nce->nce_ill, ixa)) {
549*11042SErik.Nordmark@Sun.COM 			ASSERT(ixa->ixa_notify != NULL);
550*11042SErik.Nordmark@Sun.COM 			ixa->ixa_notify(ixa->ixa_notify_cookie, ixa,
551*11042SErik.Nordmark@Sun.COM 			    IXAN_LSO, 0);
552*11042SErik.Nordmark@Sun.COM 			error = ENOTSUP;
553*11042SErik.Nordmark@Sun.COM 		}
554*11042SErik.Nordmark@Sun.COM 	}
555*11042SErik.Nordmark@Sun.COM 
556*11042SErik.Nordmark@Sun.COM 	/*
557*11042SErik.Nordmark@Sun.COM 	 * Verify ZEROCOPY capability of underlying ill. Notify the ULP with
558*11042SErik.Nordmark@Sun.COM 	 * any ZEROCOPY changes. In case ZEROCOPY capability is not available
559*11042SErik.Nordmark@Sun.COM 	 * any more, return error so that conn_ip_output() can take care of
560*11042SErik.Nordmark@Sun.COM 	 * the ZEROCOPY message properly. It's safe to continue send the
561*11042SErik.Nordmark@Sun.COM 	 * message when ZEROCOPY newly become available.
562*11042SErik.Nordmark@Sun.COM 	 */
563*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_flags & IXAF_VERIFY_ZCOPY) {
564*11042SErik.Nordmark@Sun.COM 		if (!ip_verify_zcopy(nce->nce_ill, ixa)) {
565*11042SErik.Nordmark@Sun.COM 			ASSERT(ixa->ixa_notify != NULL);
566*11042SErik.Nordmark@Sun.COM 			ixa->ixa_notify(ixa->ixa_notify_cookie, ixa,
567*11042SErik.Nordmark@Sun.COM 			    IXAN_ZCOPY, 0);
568*11042SErik.Nordmark@Sun.COM 			if ((ixa->ixa_flags & IXAF_ZCOPY_CAPAB) == 0)
569*11042SErik.Nordmark@Sun.COM 				error = ENOTSUP;
570*11042SErik.Nordmark@Sun.COM 		}
571*11042SErik.Nordmark@Sun.COM 	}
572*11042SErik.Nordmark@Sun.COM 
573*11042SErik.Nordmark@Sun.COM 	/*
574*11042SErik.Nordmark@Sun.COM 	 * Since the path MTU might change as a result of this
575*11042SErik.Nordmark@Sun.COM 	 * change, we twiddle ixa_dce_generation to
576*11042SErik.Nordmark@Sun.COM 	 * make conn_ip_output go through the ip_verify_dce code.
577*11042SErik.Nordmark@Sun.COM 	 */
578*11042SErik.Nordmark@Sun.COM 	ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
579*11042SErik.Nordmark@Sun.COM 
580*11042SErik.Nordmark@Sun.COM 	nce_refrele(ixa->ixa_nce);
581*11042SErik.Nordmark@Sun.COM 	ixa->ixa_nce = nce;
582*11042SErik.Nordmark@Sun.COM 	return (error);
583*11042SErik.Nordmark@Sun.COM }
584*11042SErik.Nordmark@Sun.COM 
585*11042SErik.Nordmark@Sun.COM /*
586*11042SErik.Nordmark@Sun.COM  * Handle both IPv4 and IPv6. Reverify/recalculate the DCE to use.
587*11042SErik.Nordmark@Sun.COM  */
588*11042SErik.Nordmark@Sun.COM static int
589*11042SErik.Nordmark@Sun.COM ip_verify_dce(mblk_t *mp, ip_xmit_attr_t *ixa)
590*11042SErik.Nordmark@Sun.COM {
591*11042SErik.Nordmark@Sun.COM 	dce_t		*dce;
592*11042SErik.Nordmark@Sun.COM 	uint_t		gen;
593*11042SErik.Nordmark@Sun.COM 	uint_t		pmtu;
594*11042SErik.Nordmark@Sun.COM 
595*11042SErik.Nordmark@Sun.COM 	dce = dce_lookup_pkt(mp, ixa, &gen);
596*11042SErik.Nordmark@Sun.COM 	ASSERT(dce != NULL);
597*11042SErik.Nordmark@Sun.COM 
598*11042SErik.Nordmark@Sun.COM 	dce_refrele_notr(ixa->ixa_dce);
599*11042SErik.Nordmark@Sun.COM #ifdef DEBUG
600*11042SErik.Nordmark@Sun.COM 	dce_refhold_notr(dce);
601*11042SErik.Nordmark@Sun.COM 	dce_refrele(dce);
602*11042SErik.Nordmark@Sun.COM #endif
603*11042SErik.Nordmark@Sun.COM 	ixa->ixa_dce = dce;
604*11042SErik.Nordmark@Sun.COM 	ixa->ixa_dce_generation = gen;
605*11042SErik.Nordmark@Sun.COM 
606*11042SErik.Nordmark@Sun.COM 	/* Extract the (path) mtu from the dce, ncec_ill etc */
607*11042SErik.Nordmark@Sun.COM 	pmtu = ip_get_pmtu(ixa);
608*11042SErik.Nordmark@Sun.COM 
609*11042SErik.Nordmark@Sun.COM 	/*
610*11042SErik.Nordmark@Sun.COM 	 * Tell ULP about PMTU changes - increase or decrease - by returning
611*11042SErik.Nordmark@Sun.COM 	 * an error if IXAF_VERIFY_PMTU is set. In such case, ULP should update
612*11042SErik.Nordmark@Sun.COM 	 * both ixa_pmtu and ixa_fragsize appropriately.
613*11042SErik.Nordmark@Sun.COM 	 *
614*11042SErik.Nordmark@Sun.COM 	 * If ULP doesn't set that flag then we need to update ixa_fragsize
615*11042SErik.Nordmark@Sun.COM 	 * since routing could have changed the ill after after ixa_fragsize
616*11042SErik.Nordmark@Sun.COM 	 * was set previously in the conn_ip_output path or in
617*11042SErik.Nordmark@Sun.COM 	 * ip_set_destination.
618*11042SErik.Nordmark@Sun.COM 	 *
619*11042SErik.Nordmark@Sun.COM 	 * In case of LSO, ixa_fragsize might be greater than ixa_pmtu.
620*11042SErik.Nordmark@Sun.COM 	 *
621*11042SErik.Nordmark@Sun.COM 	 * In the case of a path MTU increase we send the packet after the
622*11042SErik.Nordmark@Sun.COM 	 * notify to the ULP.
623*11042SErik.Nordmark@Sun.COM 	 */
624*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_flags & IXAF_VERIFY_PMTU) {
625*11042SErik.Nordmark@Sun.COM 		if (ixa->ixa_pmtu != pmtu) {
626*11042SErik.Nordmark@Sun.COM 			uint_t oldmtu = ixa->ixa_pmtu;
627*11042SErik.Nordmark@Sun.COM 
628*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE2(verify_pmtu, uint32_t, pmtu,
629*11042SErik.Nordmark@Sun.COM 			    uint32_t, ixa->ixa_pmtu);
630*11042SErik.Nordmark@Sun.COM 			ASSERT(ixa->ixa_notify != NULL);
631*11042SErik.Nordmark@Sun.COM 			ixa->ixa_notify(ixa->ixa_notify_cookie, ixa,
632*11042SErik.Nordmark@Sun.COM 			    IXAN_PMTU, pmtu);
633*11042SErik.Nordmark@Sun.COM 			if (pmtu < oldmtu)
634*11042SErik.Nordmark@Sun.COM 				return (EMSGSIZE);
635*11042SErik.Nordmark@Sun.COM 		}
636*11042SErik.Nordmark@Sun.COM 	} else {
637*11042SErik.Nordmark@Sun.COM 		ixa->ixa_fragsize = pmtu;
638*11042SErik.Nordmark@Sun.COM 	}
639*11042SErik.Nordmark@Sun.COM 	return (0);
640*11042SErik.Nordmark@Sun.COM }
641*11042SErik.Nordmark@Sun.COM 
642*11042SErik.Nordmark@Sun.COM /*
643*11042SErik.Nordmark@Sun.COM  * Verify LSO usability. Keep the return value simple to indicate whether
644*11042SErik.Nordmark@Sun.COM  * the LSO capability has changed. Handle both IPv4 and IPv6.
645*11042SErik.Nordmark@Sun.COM  */
646*11042SErik.Nordmark@Sun.COM static boolean_t
647*11042SErik.Nordmark@Sun.COM ip_verify_lso(ill_t *ill, ip_xmit_attr_t *ixa)
648*11042SErik.Nordmark@Sun.COM {
649*11042SErik.Nordmark@Sun.COM 	ill_lso_capab_t	*lsoc = &ixa->ixa_lso_capab;
650*11042SErik.Nordmark@Sun.COM 	ill_lso_capab_t	*new_lsoc = ill->ill_lso_capab;
651*11042SErik.Nordmark@Sun.COM 
652*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_flags & IXAF_LSO_CAPAB) {
653*11042SErik.Nordmark@Sun.COM 		/*
654*11042SErik.Nordmark@Sun.COM 		 * Not unsable any more.
655*11042SErik.Nordmark@Sun.COM 		 */
656*11042SErik.Nordmark@Sun.COM 		if ((ixa->ixa_flags & IXAF_IPSEC_SECURE) ||
657*11042SErik.Nordmark@Sun.COM 		    (ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) ||
658*11042SErik.Nordmark@Sun.COM 		    (ixa->ixa_ire->ire_flags & RTF_MULTIRT) ||
659*11042SErik.Nordmark@Sun.COM 		    ((ixa->ixa_flags & IXAF_IS_IPV4) ?
660*11042SErik.Nordmark@Sun.COM 		    !ILL_LSO_TCP_IPV4_USABLE(ill) :
661*11042SErik.Nordmark@Sun.COM 		    !ILL_LSO_TCP_IPV6_USABLE(ill))) {
662*11042SErik.Nordmark@Sun.COM 			ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
663*11042SErik.Nordmark@Sun.COM 
664*11042SErik.Nordmark@Sun.COM 			return (B_FALSE);
665*11042SErik.Nordmark@Sun.COM 		}
666*11042SErik.Nordmark@Sun.COM 
667*11042SErik.Nordmark@Sun.COM 		/*
668*11042SErik.Nordmark@Sun.COM 		 * Capability has changed, refresh the copy in ixa.
669*11042SErik.Nordmark@Sun.COM 		 */
670*11042SErik.Nordmark@Sun.COM 		if (lsoc->ill_lso_max != new_lsoc->ill_lso_max) {
671*11042SErik.Nordmark@Sun.COM 			*lsoc = *new_lsoc;
672*11042SErik.Nordmark@Sun.COM 
673*11042SErik.Nordmark@Sun.COM 			return (B_FALSE);
674*11042SErik.Nordmark@Sun.COM 		}
675*11042SErik.Nordmark@Sun.COM 	} else { /* Was not usable */
676*11042SErik.Nordmark@Sun.COM 		if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
677*11042SErik.Nordmark@Sun.COM 		    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
678*11042SErik.Nordmark@Sun.COM 		    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
679*11042SErik.Nordmark@Sun.COM 		    ((ixa->ixa_flags & IXAF_IS_IPV4) ?
680*11042SErik.Nordmark@Sun.COM 		    ILL_LSO_TCP_IPV4_USABLE(ill) :
681*11042SErik.Nordmark@Sun.COM 		    ILL_LSO_TCP_IPV6_USABLE(ill))) {
682*11042SErik.Nordmark@Sun.COM 			*lsoc = *new_lsoc;
683*11042SErik.Nordmark@Sun.COM 			ixa->ixa_flags |= IXAF_LSO_CAPAB;
684*11042SErik.Nordmark@Sun.COM 
685*11042SErik.Nordmark@Sun.COM 			return (B_FALSE);
686*11042SErik.Nordmark@Sun.COM 		}
687*11042SErik.Nordmark@Sun.COM 	}
688*11042SErik.Nordmark@Sun.COM 
689*11042SErik.Nordmark@Sun.COM 	return (B_TRUE);
690*11042SErik.Nordmark@Sun.COM }
691*11042SErik.Nordmark@Sun.COM 
692*11042SErik.Nordmark@Sun.COM /*
693*11042SErik.Nordmark@Sun.COM  * Verify ZEROCOPY usability. Keep the return value simple to indicate whether
694*11042SErik.Nordmark@Sun.COM  * the ZEROCOPY capability has changed. Handle both IPv4 and IPv6.
695*11042SErik.Nordmark@Sun.COM  */
696*11042SErik.Nordmark@Sun.COM static boolean_t
697*11042SErik.Nordmark@Sun.COM ip_verify_zcopy(ill_t *ill, ip_xmit_attr_t *ixa)
698*11042SErik.Nordmark@Sun.COM {
699*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_flags & IXAF_ZCOPY_CAPAB) {
700*11042SErik.Nordmark@Sun.COM 		/*
701*11042SErik.Nordmark@Sun.COM 		 * Not unsable any more.
702*11042SErik.Nordmark@Sun.COM 		 */
703*11042SErik.Nordmark@Sun.COM 		if ((ixa->ixa_flags & IXAF_IPSEC_SECURE) ||
704*11042SErik.Nordmark@Sun.COM 		    (ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) ||
705*11042SErik.Nordmark@Sun.COM 		    (ixa->ixa_ire->ire_flags & RTF_MULTIRT) ||
706*11042SErik.Nordmark@Sun.COM 		    !ILL_ZCOPY_USABLE(ill)) {
707*11042SErik.Nordmark@Sun.COM 			ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
708*11042SErik.Nordmark@Sun.COM 
709*11042SErik.Nordmark@Sun.COM 			return (B_FALSE);
710*11042SErik.Nordmark@Sun.COM 		}
711*11042SErik.Nordmark@Sun.COM 	} else { /* Was not usable */
712*11042SErik.Nordmark@Sun.COM 		if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
713*11042SErik.Nordmark@Sun.COM 		    !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
714*11042SErik.Nordmark@Sun.COM 		    !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
715*11042SErik.Nordmark@Sun.COM 		    ILL_ZCOPY_USABLE(ill)) {
716*11042SErik.Nordmark@Sun.COM 			ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
717*11042SErik.Nordmark@Sun.COM 
718*11042SErik.Nordmark@Sun.COM 			return (B_FALSE);
719*11042SErik.Nordmark@Sun.COM 		}
720*11042SErik.Nordmark@Sun.COM 	}
721*11042SErik.Nordmark@Sun.COM 
722*11042SErik.Nordmark@Sun.COM 	return (B_TRUE);
723*11042SErik.Nordmark@Sun.COM }
724*11042SErik.Nordmark@Sun.COM 
725*11042SErik.Nordmark@Sun.COM 
726*11042SErik.Nordmark@Sun.COM /*
727*11042SErik.Nordmark@Sun.COM  * When there is no conn_t context, this will send a packet.
728*11042SErik.Nordmark@Sun.COM  * The caller must *not* have called conn_connect() or ip_attr_connect()
729*11042SErik.Nordmark@Sun.COM  * before calling ip_output_simple().
730*11042SErik.Nordmark@Sun.COM  * Handles IPv4 and IPv6. Returns zero or an errno such as ENETUNREACH.
731*11042SErik.Nordmark@Sun.COM  * Honors IXAF_SET_SOURCE.
732*11042SErik.Nordmark@Sun.COM  *
733*11042SErik.Nordmark@Sun.COM  * We acquire the ire and after calling ire_sendfn we release
734*11042SErik.Nordmark@Sun.COM  * the hold on the ire. Ditto for the nce and dce.
735*11042SErik.Nordmark@Sun.COM  *
736*11042SErik.Nordmark@Sun.COM  * This assumes that the caller has set the following in ip_xmit_attr_t:
737*11042SErik.Nordmark@Sun.COM  *	ixa_tsl, ixa_zoneid, and ixa_ipst must always be set.
738*11042SErik.Nordmark@Sun.COM  *	If ixa_ifindex is non-zero it means send out that ill. (If it is
739*11042SErik.Nordmark@Sun.COM  *	an upper IPMP ill we load balance across the group; if a lower we send
740*11042SErik.Nordmark@Sun.COM  *	on that lower ill without load balancing.)
741*11042SErik.Nordmark@Sun.COM  *	IXAF_IS_IPV4 must be set correctly.
742*11042SErik.Nordmark@Sun.COM  *	If IXAF_IPSEC_SECURE is set then the ixa_ipsec_* fields must be set.
743*11042SErik.Nordmark@Sun.COM  *	If IXAF_NO_IPSEC is set we'd skip IPsec policy lookup.
744*11042SErik.Nordmark@Sun.COM  *	If neither of those two are set we do an IPsec policy lookup.
745*11042SErik.Nordmark@Sun.COM  *
746*11042SErik.Nordmark@Sun.COM  * We handle setting things like
747*11042SErik.Nordmark@Sun.COM  *	ixa_pktlen
748*11042SErik.Nordmark@Sun.COM  *	ixa_ip_hdr_length
749*11042SErik.Nordmark@Sun.COM  *	ixa->ixa_protocol
750*11042SErik.Nordmark@Sun.COM  *
751*11042SErik.Nordmark@Sun.COM  * The caller may set ixa_xmit_hint, which is used for ECMP selection and
752*11042SErik.Nordmark@Sun.COM  * transmit ring selecting in GLD.
753*11042SErik.Nordmark@Sun.COM  *
754*11042SErik.Nordmark@Sun.COM  * The caller must do an ixa_cleanup() to release any IPsec references
755*11042SErik.Nordmark@Sun.COM  * after we return.
756*11042SErik.Nordmark@Sun.COM  */
757*11042SErik.Nordmark@Sun.COM int
758*11042SErik.Nordmark@Sun.COM ip_output_simple(mblk_t *mp, ip_xmit_attr_t *ixa)
759*11042SErik.Nordmark@Sun.COM {
760*11042SErik.Nordmark@Sun.COM 	ts_label_t	*effective_tsl = NULL;
761*11042SErik.Nordmark@Sun.COM 	int		err;
762*11042SErik.Nordmark@Sun.COM 
763*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_ipst != NULL);
764*11042SErik.Nordmark@Sun.COM 
765*11042SErik.Nordmark@Sun.COM 	if (is_system_labeled()) {
766*11042SErik.Nordmark@Sun.COM 		ip_stack_t *ipst = ixa->ixa_ipst;
767*11042SErik.Nordmark@Sun.COM 
768*11042SErik.Nordmark@Sun.COM 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
769*11042SErik.Nordmark@Sun.COM 			err = tsol_check_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
770*11042SErik.Nordmark@Sun.COM 			    &mp, CONN_MAC_DEFAULT, B_FALSE, ixa->ixa_ipst,
771*11042SErik.Nordmark@Sun.COM 			    &effective_tsl);
772*11042SErik.Nordmark@Sun.COM 		} else {
773*11042SErik.Nordmark@Sun.COM 			err = tsol_check_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
774*11042SErik.Nordmark@Sun.COM 			    &mp, CONN_MAC_DEFAULT, B_FALSE, ixa->ixa_ipst,
775*11042SErik.Nordmark@Sun.COM 			    &effective_tsl);
776*11042SErik.Nordmark@Sun.COM 		}
777*11042SErik.Nordmark@Sun.COM 		if (err != 0) {
778*11042SErik.Nordmark@Sun.COM 			ip2dbg(("tsol_check: label check failed (%d)\n", err));
779*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
780*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
781*11042SErik.Nordmark@Sun.COM 			ip_drop_output("tsol_check_label", mp, NULL);
782*11042SErik.Nordmark@Sun.COM 			freemsg(mp);
783*11042SErik.Nordmark@Sun.COM 			return (err);
784*11042SErik.Nordmark@Sun.COM 		}
785*11042SErik.Nordmark@Sun.COM 		if (effective_tsl != NULL) {
786*11042SErik.Nordmark@Sun.COM 			/* Update the label */
787*11042SErik.Nordmark@Sun.COM 			ip_xmit_attr_replace_tsl(ixa, effective_tsl);
788*11042SErik.Nordmark@Sun.COM 		}
789*11042SErik.Nordmark@Sun.COM 	}
790*11042SErik.Nordmark@Sun.COM 
791*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_flags & IXAF_IS_IPV4)
792*11042SErik.Nordmark@Sun.COM 		return (ip_output_simple_v4(mp, ixa));
793*11042SErik.Nordmark@Sun.COM 	else
794*11042SErik.Nordmark@Sun.COM 		return (ip_output_simple_v6(mp, ixa));
795*11042SErik.Nordmark@Sun.COM }
796*11042SErik.Nordmark@Sun.COM 
797*11042SErik.Nordmark@Sun.COM int
798*11042SErik.Nordmark@Sun.COM ip_output_simple_v4(mblk_t *mp, ip_xmit_attr_t *ixa)
799*11042SErik.Nordmark@Sun.COM {
800*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha;
801*11042SErik.Nordmark@Sun.COM 	ipaddr_t	firsthop; /* In IP header */
802*11042SErik.Nordmark@Sun.COM 	ipaddr_t	dst;	/* End of source route, or ipha_dst if none */
803*11042SErik.Nordmark@Sun.COM 	ire_t		*ire;
804*11042SErik.Nordmark@Sun.COM 	ipaddr_t	setsrc;	/* RTF_SETSRC */
805*11042SErik.Nordmark@Sun.COM 	int		error;
806*11042SErik.Nordmark@Sun.COM 	ill_t		*ill = NULL;
807*11042SErik.Nordmark@Sun.COM 	dce_t		*dce = NULL;
808*11042SErik.Nordmark@Sun.COM 	nce_t		*nce;
809*11042SErik.Nordmark@Sun.COM 	iaflags_t	ixaflags = ixa->ixa_flags;
810*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
811*11042SErik.Nordmark@Sun.COM 	boolean_t	repeat = B_FALSE;
812*11042SErik.Nordmark@Sun.COM 	boolean_t	multirt = B_FALSE;
813*11042SErik.Nordmark@Sun.COM 
814*11042SErik.Nordmark@Sun.COM 	ipha = (ipha_t *)mp->b_rptr;
815*11042SErik.Nordmark@Sun.COM 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
816*11042SErik.Nordmark@Sun.COM 
817*11042SErik.Nordmark@Sun.COM 	/*
818*11042SErik.Nordmark@Sun.COM 	 * Even on labeled systems we can have a NULL ixa_tsl e.g.,
819*11042SErik.Nordmark@Sun.COM 	 * for IGMP/MLD traffic.
820*11042SErik.Nordmark@Sun.COM 	 */
821*11042SErik.Nordmark@Sun.COM 
822*11042SErik.Nordmark@Sun.COM 	/* Caller already set flags */
823*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
824*11042SErik.Nordmark@Sun.COM 
825*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_nce == NULL);
826*11042SErik.Nordmark@Sun.COM 
827*11042SErik.Nordmark@Sun.COM 	ixa->ixa_pktlen = ntohs(ipha->ipha_length);
828*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_pktlen == msgdsize(mp));
829*11042SErik.Nordmark@Sun.COM 	ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(ipha);
830*11042SErik.Nordmark@Sun.COM 	ixa->ixa_protocol = ipha->ipha_protocol;
831*11042SErik.Nordmark@Sun.COM 
832*11042SErik.Nordmark@Sun.COM 	/*
833*11042SErik.Nordmark@Sun.COM 	 * Assumes that source routed packets have already been massaged by
834*11042SErik.Nordmark@Sun.COM 	 * the ULP (ip_massage_options) and as a result ipha_dst is the next
835*11042SErik.Nordmark@Sun.COM 	 * hop in the source route. The final destination is used for IPsec
836*11042SErik.Nordmark@Sun.COM 	 * policy and DCE lookup.
837*11042SErik.Nordmark@Sun.COM 	 */
838*11042SErik.Nordmark@Sun.COM 	firsthop = ipha->ipha_dst;
839*11042SErik.Nordmark@Sun.COM 	dst = ip_get_dst(ipha);
840*11042SErik.Nordmark@Sun.COM 
841*11042SErik.Nordmark@Sun.COM repeat_ire:
842*11042SErik.Nordmark@Sun.COM 	error = 0;
843*11042SErik.Nordmark@Sun.COM 	setsrc = INADDR_ANY;
844*11042SErik.Nordmark@Sun.COM 	ire = ip_select_route_v4(firsthop, ixa, NULL, &setsrc, &error,
845*11042SErik.Nordmark@Sun.COM 	    &multirt);
846*11042SErik.Nordmark@Sun.COM 	ASSERT(ire != NULL);	/* IRE_NOROUTE if none found */
847*11042SErik.Nordmark@Sun.COM 	if (error != 0) {
848*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
849*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
850*11042SErik.Nordmark@Sun.COM 		ip_drop_output("ipIfStatsOutDiscards - select route", mp, NULL);
851*11042SErik.Nordmark@Sun.COM 		freemsg(mp);
852*11042SErik.Nordmark@Sun.COM 		goto done;
853*11042SErik.Nordmark@Sun.COM 	}
854*11042SErik.Nordmark@Sun.COM 
855*11042SErik.Nordmark@Sun.COM 	if (ire->ire_flags & (RTF_BLACKHOLE|RTF_REJECT)) {
856*11042SErik.Nordmark@Sun.COM 		/* ire_ill might be NULL hence need to skip some code */
857*11042SErik.Nordmark@Sun.COM 		if (ixaflags & IXAF_SET_SOURCE)
858*11042SErik.Nordmark@Sun.COM 			ipha->ipha_src = htonl(INADDR_LOOPBACK);
859*11042SErik.Nordmark@Sun.COM 		ixa->ixa_fragsize = IP_MAXPACKET;
860*11042SErik.Nordmark@Sun.COM 		ill = NULL;
861*11042SErik.Nordmark@Sun.COM 		nce = NULL;
862*11042SErik.Nordmark@Sun.COM 		ire->ire_ob_pkt_count++;
863*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
864*11042SErik.Nordmark@Sun.COM 		/* No dce yet; use default one */
865*11042SErik.Nordmark@Sun.COM 		error = (ire->ire_sendfn)(ire, mp, ipha, ixa,
866*11042SErik.Nordmark@Sun.COM 		    &ipst->ips_dce_default->dce_ident);
867*11042SErik.Nordmark@Sun.COM 		goto done;
868*11042SErik.Nordmark@Sun.COM 	}
869*11042SErik.Nordmark@Sun.COM 
870*11042SErik.Nordmark@Sun.COM 	/* Note that ipha_dst is only used for IRE_MULTICAST */
871*11042SErik.Nordmark@Sun.COM 	nce = ire_to_nce(ire, ipha->ipha_dst, NULL);
872*11042SErik.Nordmark@Sun.COM 	if (nce == NULL) {
873*11042SErik.Nordmark@Sun.COM 		/* Allocation failure? */
874*11042SErik.Nordmark@Sun.COM 		ip_drop_output("ire_to_nce", mp, ill);
875*11042SErik.Nordmark@Sun.COM 		freemsg(mp);
876*11042SErik.Nordmark@Sun.COM 		error = ENOBUFS;
877*11042SErik.Nordmark@Sun.COM 		goto done;
878*11042SErik.Nordmark@Sun.COM 	}
879*11042SErik.Nordmark@Sun.COM 	if (nce->nce_is_condemned) {
880*11042SErik.Nordmark@Sun.COM 		nce_t *nce1;
881*11042SErik.Nordmark@Sun.COM 
882*11042SErik.Nordmark@Sun.COM 		nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_TRUE);
883*11042SErik.Nordmark@Sun.COM 		nce_refrele(nce);
884*11042SErik.Nordmark@Sun.COM 		if (nce1 == NULL) {
885*11042SErik.Nordmark@Sun.COM 			if (!repeat) {
886*11042SErik.Nordmark@Sun.COM 				/* Try finding a better IRE */
887*11042SErik.Nordmark@Sun.COM 				repeat = B_TRUE;
888*11042SErik.Nordmark@Sun.COM 				ire_refrele(ire);
889*11042SErik.Nordmark@Sun.COM 				goto repeat_ire;
890*11042SErik.Nordmark@Sun.COM 			}
891*11042SErik.Nordmark@Sun.COM 			/* Tried twice - drop packet */
892*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
893*11042SErik.Nordmark@Sun.COM 			ip_drop_output("No nce", mp, ill);
894*11042SErik.Nordmark@Sun.COM 			freemsg(mp);
895*11042SErik.Nordmark@Sun.COM 			error = ENOBUFS;
896*11042SErik.Nordmark@Sun.COM 			goto done;
897*11042SErik.Nordmark@Sun.COM 		}
898*11042SErik.Nordmark@Sun.COM 		nce = nce1;
899*11042SErik.Nordmark@Sun.COM 	}
900*11042SErik.Nordmark@Sun.COM 
901*11042SErik.Nordmark@Sun.COM 	/*
902*11042SErik.Nordmark@Sun.COM 	 * For multicast with multirt we have a flag passed back from
903*11042SErik.Nordmark@Sun.COM 	 * ire_lookup_multi_ill_v4 since we don't have an IRE for each
904*11042SErik.Nordmark@Sun.COM 	 * possible multicast address.
905*11042SErik.Nordmark@Sun.COM 	 * We also need a flag for multicast since we can't check
906*11042SErik.Nordmark@Sun.COM 	 * whether RTF_MULTIRT is set in ixa_ire for multicast.
907*11042SErik.Nordmark@Sun.COM 	 */
908*11042SErik.Nordmark@Sun.COM 	if (multirt) {
909*11042SErik.Nordmark@Sun.COM 		ixa->ixa_postfragfn = ip_postfrag_multirt_v4;
910*11042SErik.Nordmark@Sun.COM 		ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
911*11042SErik.Nordmark@Sun.COM 	} else {
912*11042SErik.Nordmark@Sun.COM 		ixa->ixa_postfragfn = ire->ire_postfragfn;
913*11042SErik.Nordmark@Sun.COM 		ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
914*11042SErik.Nordmark@Sun.COM 	}
915*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_nce == NULL);
916*11042SErik.Nordmark@Sun.COM 	ixa->ixa_nce = nce;
917*11042SErik.Nordmark@Sun.COM 
918*11042SErik.Nordmark@Sun.COM 	/*
919*11042SErik.Nordmark@Sun.COM 	 * Check for a dce_t with a path mtu.
920*11042SErik.Nordmark@Sun.COM 	 */
921*11042SErik.Nordmark@Sun.COM 	dce = dce_lookup_v4(dst, ipst, NULL);
922*11042SErik.Nordmark@Sun.COM 	ASSERT(dce != NULL);
923*11042SErik.Nordmark@Sun.COM 
924*11042SErik.Nordmark@Sun.COM 	if (!(ixaflags & IXAF_PMTU_DISCOVERY)) {
925*11042SErik.Nordmark@Sun.COM 		ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire);
926*11042SErik.Nordmark@Sun.COM 	} else if (dce->dce_flags & DCEF_PMTU) {
927*11042SErik.Nordmark@Sun.COM 		/*
928*11042SErik.Nordmark@Sun.COM 		 * To avoid a periodic timer to increase the path MTU we
929*11042SErik.Nordmark@Sun.COM 		 * look at dce_last_change_time each time we send a packet.
930*11042SErik.Nordmark@Sun.COM 		 */
931*11042SErik.Nordmark@Sun.COM 		if (TICK_TO_SEC(lbolt64) - dce->dce_last_change_time >
932*11042SErik.Nordmark@Sun.COM 		    ipst->ips_ip_pathmtu_interval) {
933*11042SErik.Nordmark@Sun.COM 			/*
934*11042SErik.Nordmark@Sun.COM 			 * Older than 20 minutes. Drop the path MTU information.
935*11042SErik.Nordmark@Sun.COM 			 */
936*11042SErik.Nordmark@Sun.COM 			mutex_enter(&dce->dce_lock);
937*11042SErik.Nordmark@Sun.COM 			dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU);
938*11042SErik.Nordmark@Sun.COM 			dce->dce_last_change_time = TICK_TO_SEC(lbolt64);
939*11042SErik.Nordmark@Sun.COM 			mutex_exit(&dce->dce_lock);
940*11042SErik.Nordmark@Sun.COM 			dce_increment_generation(dce);
941*11042SErik.Nordmark@Sun.COM 			ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire);
942*11042SErik.Nordmark@Sun.COM 		} else {
943*11042SErik.Nordmark@Sun.COM 			uint_t fragsize;
944*11042SErik.Nordmark@Sun.COM 
945*11042SErik.Nordmark@Sun.COM 			fragsize = ip_get_base_mtu(nce->nce_ill, ire);
946*11042SErik.Nordmark@Sun.COM 			if (fragsize > dce->dce_pmtu)
947*11042SErik.Nordmark@Sun.COM 				fragsize = dce->dce_pmtu;
948*11042SErik.Nordmark@Sun.COM 			ixa->ixa_fragsize = fragsize;
949*11042SErik.Nordmark@Sun.COM 		}
950*11042SErik.Nordmark@Sun.COM 	} else {
951*11042SErik.Nordmark@Sun.COM 		ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire);
952*11042SErik.Nordmark@Sun.COM 	}
953*11042SErik.Nordmark@Sun.COM 
954*11042SErik.Nordmark@Sun.COM 	/*
955*11042SErik.Nordmark@Sun.COM 	 * We use use ire_nexthop_ill (and not ncec_ill) to avoid the under ipmp
956*11042SErik.Nordmark@Sun.COM 	 * interface for source address selection.
957*11042SErik.Nordmark@Sun.COM 	 */
958*11042SErik.Nordmark@Sun.COM 	ill = ire_nexthop_ill(ire);
959*11042SErik.Nordmark@Sun.COM 
960*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_SET_SOURCE) {
961*11042SErik.Nordmark@Sun.COM 		ipaddr_t	src;
962*11042SErik.Nordmark@Sun.COM 
963*11042SErik.Nordmark@Sun.COM 		/*
964*11042SErik.Nordmark@Sun.COM 		 * We use the final destination to get
965*11042SErik.Nordmark@Sun.COM 		 * correct selection for source routed packets
966*11042SErik.Nordmark@Sun.COM 		 */
967*11042SErik.Nordmark@Sun.COM 
968*11042SErik.Nordmark@Sun.COM 		/* If unreachable we have no ill but need some source */
969*11042SErik.Nordmark@Sun.COM 		if (ill == NULL) {
970*11042SErik.Nordmark@Sun.COM 			src = htonl(INADDR_LOOPBACK);
971*11042SErik.Nordmark@Sun.COM 			error = 0;
972*11042SErik.Nordmark@Sun.COM 		} else {
973*11042SErik.Nordmark@Sun.COM 			error = ip_select_source_v4(ill, setsrc, dst,
974*11042SErik.Nordmark@Sun.COM 			    ixa->ixa_multicast_ifaddr, ixa->ixa_zoneid, ipst,
975*11042SErik.Nordmark@Sun.COM 			    &src, NULL, NULL);
976*11042SErik.Nordmark@Sun.COM 		}
977*11042SErik.Nordmark@Sun.COM 		if (error != 0) {
978*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
979*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
980*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - no source",
981*11042SErik.Nordmark@Sun.COM 			    mp, ill);
982*11042SErik.Nordmark@Sun.COM 			freemsg(mp);
983*11042SErik.Nordmark@Sun.COM 			goto done;
984*11042SErik.Nordmark@Sun.COM 		}
985*11042SErik.Nordmark@Sun.COM 		ipha->ipha_src = src;
986*11042SErik.Nordmark@Sun.COM 	} else if (ixaflags & IXAF_VERIFY_SOURCE) {
987*11042SErik.Nordmark@Sun.COM 		/* Check if the IP source is assigned to the host. */
988*11042SErik.Nordmark@Sun.COM 		if (!ip_verify_src(mp, ixa, NULL)) {
989*11042SErik.Nordmark@Sun.COM 			/* Don't send a packet with a source that isn't ours */
990*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
991*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
992*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - invalid source",
993*11042SErik.Nordmark@Sun.COM 			    mp, ill);
994*11042SErik.Nordmark@Sun.COM 			freemsg(mp);
995*11042SErik.Nordmark@Sun.COM 			error = EADDRNOTAVAIL;
996*11042SErik.Nordmark@Sun.COM 			goto done;
997*11042SErik.Nordmark@Sun.COM 		}
998*11042SErik.Nordmark@Sun.COM 	}
999*11042SErik.Nordmark@Sun.COM 
1000*11042SErik.Nordmark@Sun.COM 
1001*11042SErik.Nordmark@Sun.COM 	/*
1002*11042SErik.Nordmark@Sun.COM 	 * Check against global IPsec policy to set the AH/ESP attributes.
1003*11042SErik.Nordmark@Sun.COM 	 * IPsec will set IXAF_IPSEC_* and ixa_ipsec_* as appropriate.
1004*11042SErik.Nordmark@Sun.COM 	 */
1005*11042SErik.Nordmark@Sun.COM 	if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) {
1006*11042SErik.Nordmark@Sun.COM 		ASSERT(ixa->ixa_ipsec_policy == NULL);
1007*11042SErik.Nordmark@Sun.COM 		mp = ip_output_attach_policy(mp, ipha, NULL, NULL, ixa);
1008*11042SErik.Nordmark@Sun.COM 		if (mp == NULL) {
1009*11042SErik.Nordmark@Sun.COM 			/* MIB and ip_drop_packet already done */
1010*11042SErik.Nordmark@Sun.COM 			return (EHOSTUNREACH);	/* IPsec policy failure */
1011*11042SErik.Nordmark@Sun.COM 		}
1012*11042SErik.Nordmark@Sun.COM 	}
1013*11042SErik.Nordmark@Sun.COM 
1014*11042SErik.Nordmark@Sun.COM 	if (ill != NULL) {
1015*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests);
1016*11042SErik.Nordmark@Sun.COM 	} else {
1017*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests);
1018*11042SErik.Nordmark@Sun.COM 	}
1019*11042SErik.Nordmark@Sun.COM 
1020*11042SErik.Nordmark@Sun.COM 	/*
1021*11042SErik.Nordmark@Sun.COM 	 * We update the statistics on the most specific IRE i.e., the first
1022*11042SErik.Nordmark@Sun.COM 	 * one we found.
1023*11042SErik.Nordmark@Sun.COM 	 * We don't have an IRE when we fragment, hence ire_ob_pkt_count
1024*11042SErik.Nordmark@Sun.COM 	 * can only count the use prior to fragmentation. However the MIB
1025*11042SErik.Nordmark@Sun.COM 	 * counters on the ill will be incremented in post fragmentation.
1026*11042SErik.Nordmark@Sun.COM 	 */
1027*11042SErik.Nordmark@Sun.COM 	ire->ire_ob_pkt_count++;
1028*11042SErik.Nordmark@Sun.COM 
1029*11042SErik.Nordmark@Sun.COM 	/*
1030*11042SErik.Nordmark@Sun.COM 	 * Based on ire_type and ire_flags call one of:
1031*11042SErik.Nordmark@Sun.COM 	 *	ire_send_local_v4 - for IRE_LOCAL and IRE_LOOPBACK
1032*11042SErik.Nordmark@Sun.COM 	 *	ire_send_multirt_v4 - if RTF_MULTIRT
1033*11042SErik.Nordmark@Sun.COM 	 *	ire_send_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE
1034*11042SErik.Nordmark@Sun.COM 	 *	ire_send_multicast_v4 - for IRE_MULTICAST
1035*11042SErik.Nordmark@Sun.COM 	 *	ire_send_broadcast_v4 - for IRE_BROADCAST
1036*11042SErik.Nordmark@Sun.COM 	 *	ire_send_wire_v4 - for the rest.
1037*11042SErik.Nordmark@Sun.COM 	 */
1038*11042SErik.Nordmark@Sun.COM 	error = (ire->ire_sendfn)(ire, mp, ipha, ixa, &dce->dce_ident);
1039*11042SErik.Nordmark@Sun.COM done:
1040*11042SErik.Nordmark@Sun.COM 	ire_refrele(ire);
1041*11042SErik.Nordmark@Sun.COM 	if (dce != NULL)
1042*11042SErik.Nordmark@Sun.COM 		dce_refrele(dce);
1043*11042SErik.Nordmark@Sun.COM 	if (ill != NULL)
1044*11042SErik.Nordmark@Sun.COM 		ill_refrele(ill);
1045*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_nce != NULL)
1046*11042SErik.Nordmark@Sun.COM 		nce_refrele(ixa->ixa_nce);
1047*11042SErik.Nordmark@Sun.COM 	ixa->ixa_nce = NULL;
1048*11042SErik.Nordmark@Sun.COM 	return (error);
1049*11042SErik.Nordmark@Sun.COM }
1050*11042SErik.Nordmark@Sun.COM 
1051*11042SErik.Nordmark@Sun.COM /*
1052*11042SErik.Nordmark@Sun.COM  * ire_sendfn() functions.
1053*11042SErik.Nordmark@Sun.COM  * These functions use the following xmit_attr:
1054*11042SErik.Nordmark@Sun.COM  *  - ixa_fragsize - read to determine whether or not to fragment
1055*11042SErik.Nordmark@Sun.COM  *  - IXAF_IPSEC_SECURE - to determine whether or not to invoke IPsec
1056*11042SErik.Nordmark@Sun.COM  *  - ixa_ipsec_*  are used inside IPsec
1057*11042SErik.Nordmark@Sun.COM  *  - IXAF_SET_SOURCE - replace IP source in broadcast case.
1058*11042SErik.Nordmark@Sun.COM  *  - IXAF_LOOPBACK_COPY - for multicast and broadcast
1059*11042SErik.Nordmark@Sun.COM  */
1060*11042SErik.Nordmark@Sun.COM 
1061*11042SErik.Nordmark@Sun.COM 
1062*11042SErik.Nordmark@Sun.COM /*
1063*11042SErik.Nordmark@Sun.COM  * ire_sendfn for IRE_LOCAL and IRE_LOOPBACK
1064*11042SErik.Nordmark@Sun.COM  *
1065*11042SErik.Nordmark@Sun.COM  * The checks for restrict_interzone_loopback are done in ire_route_recursive.
1066*11042SErik.Nordmark@Sun.COM  */
1067*11042SErik.Nordmark@Sun.COM /* ARGSUSED4 */
1068*11042SErik.Nordmark@Sun.COM int
1069*11042SErik.Nordmark@Sun.COM ire_send_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1070*11042SErik.Nordmark@Sun.COM     ip_xmit_attr_t *ixa, uint32_t *identp)
1071*11042SErik.Nordmark@Sun.COM {
1072*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha = (ipha_t *)iph_arg;
1073*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
1074*11042SErik.Nordmark@Sun.COM 	ill_t		*ill = ire->ire_ill;
1075*11042SErik.Nordmark@Sun.COM 	ip_recv_attr_t	iras;	/* NOTE: No bzero for performance */
1076*11042SErik.Nordmark@Sun.COM 	uint_t		pktlen = ixa->ixa_pktlen;
1077*11042SErik.Nordmark@Sun.COM 
1078*11042SErik.Nordmark@Sun.COM 	/*
1079*11042SErik.Nordmark@Sun.COM 	 * No fragmentation, no nce, no application of IPsec,
1080*11042SErik.Nordmark@Sun.COM 	 * and no ipha_ident assignment.
1081*11042SErik.Nordmark@Sun.COM 	 *
1082*11042SErik.Nordmark@Sun.COM 	 * Note different order between IP provider and FW_HOOKS than in
1083*11042SErik.Nordmark@Sun.COM 	 * send_wire case.
1084*11042SErik.Nordmark@Sun.COM 	 */
1085*11042SErik.Nordmark@Sun.COM 
1086*11042SErik.Nordmark@Sun.COM 	/*
1087*11042SErik.Nordmark@Sun.COM 	 * DTrace this as ip:::send.  A packet blocked by FW_HOOKS will fire the
1088*11042SErik.Nordmark@Sun.COM 	 * send probe, but not the receive probe.
1089*11042SErik.Nordmark@Sun.COM 	 */
1090*11042SErik.Nordmark@Sun.COM 	DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
1091*11042SErik.Nordmark@Sun.COM 	    ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL,
1092*11042SErik.Nordmark@Sun.COM 	    int, 1);
1093*11042SErik.Nordmark@Sun.COM 
1094*11042SErik.Nordmark@Sun.COM 	if (HOOKS4_INTERESTED_LOOPBACK_OUT(ipst)) {
1095*11042SErik.Nordmark@Sun.COM 		int error;
1096*11042SErik.Nordmark@Sun.COM 
1097*11042SErik.Nordmark@Sun.COM 		DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL,
1098*11042SErik.Nordmark@Sun.COM 		    ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
1099*11042SErik.Nordmark@Sun.COM 		FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1100*11042SErik.Nordmark@Sun.COM 		    ipst->ips_ipv4firewall_loopback_out,
1101*11042SErik.Nordmark@Sun.COM 		    NULL, ill, ipha, mp, mp, 0, ipst, error);
1102*11042SErik.Nordmark@Sun.COM 		DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, mp);
1103*11042SErik.Nordmark@Sun.COM 		if (mp == NULL)
1104*11042SErik.Nordmark@Sun.COM 			return (error);
1105*11042SErik.Nordmark@Sun.COM 
1106*11042SErik.Nordmark@Sun.COM 		/*
1107*11042SErik.Nordmark@Sun.COM 		 * Even if the destination was changed by the filter we use the
1108*11042SErik.Nordmark@Sun.COM 		 * forwarding decision that was made based on the address
1109*11042SErik.Nordmark@Sun.COM 		 * in ip_output/ip_set_destination.
1110*11042SErik.Nordmark@Sun.COM 		 */
1111*11042SErik.Nordmark@Sun.COM 		/* Length could be different */
1112*11042SErik.Nordmark@Sun.COM 		ipha = (ipha_t *)mp->b_rptr;
1113*11042SErik.Nordmark@Sun.COM 		pktlen = ntohs(ipha->ipha_length);
1114*11042SErik.Nordmark@Sun.COM 	}
1115*11042SErik.Nordmark@Sun.COM 
1116*11042SErik.Nordmark@Sun.COM 	/*
1117*11042SErik.Nordmark@Sun.COM 	 * If a callback is enabled then we need to know the
1118*11042SErik.Nordmark@Sun.COM 	 * source and destination zoneids for the packet. We already
1119*11042SErik.Nordmark@Sun.COM 	 * have those handy.
1120*11042SErik.Nordmark@Sun.COM 	 */
1121*11042SErik.Nordmark@Sun.COM 	if (ipst->ips_ip4_observe.he_interested) {
1122*11042SErik.Nordmark@Sun.COM 		zoneid_t szone, dzone;
1123*11042SErik.Nordmark@Sun.COM 		zoneid_t stackzoneid;
1124*11042SErik.Nordmark@Sun.COM 
1125*11042SErik.Nordmark@Sun.COM 		stackzoneid = netstackid_to_zoneid(
1126*11042SErik.Nordmark@Sun.COM 		    ipst->ips_netstack->netstack_stackid);
1127*11042SErik.Nordmark@Sun.COM 
1128*11042SErik.Nordmark@Sun.COM 		if (stackzoneid == GLOBAL_ZONEID) {
1129*11042SErik.Nordmark@Sun.COM 			/* Shared-IP zone */
1130*11042SErik.Nordmark@Sun.COM 			dzone = ire->ire_zoneid;
1131*11042SErik.Nordmark@Sun.COM 			szone = ixa->ixa_zoneid;
1132*11042SErik.Nordmark@Sun.COM 		} else {
1133*11042SErik.Nordmark@Sun.COM 			szone = dzone = stackzoneid;
1134*11042SErik.Nordmark@Sun.COM 		}
1135*11042SErik.Nordmark@Sun.COM 		ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst);
1136*11042SErik.Nordmark@Sun.COM 	}
1137*11042SErik.Nordmark@Sun.COM 
1138*11042SErik.Nordmark@Sun.COM 	/* Handle lo0 stats */
1139*11042SErik.Nordmark@Sun.COM 	ipst->ips_loopback_packets++;
1140*11042SErik.Nordmark@Sun.COM 
1141*11042SErik.Nordmark@Sun.COM 	/* Map ixa to ira including IPsec policies */
1142*11042SErik.Nordmark@Sun.COM 	ipsec_out_to_in(ixa, ill, &iras);
1143*11042SErik.Nordmark@Sun.COM 	iras.ira_pktlen = pktlen;
1144*11042SErik.Nordmark@Sun.COM 
1145*11042SErik.Nordmark@Sun.COM 	if (!IS_SIMPLE_IPH(ipha)) {
1146*11042SErik.Nordmark@Sun.COM 		ip_output_local_options(ipha, ipst);
1147*11042SErik.Nordmark@Sun.COM 		iras.ira_flags |= IRAF_IPV4_OPTIONS;
1148*11042SErik.Nordmark@Sun.COM 	}
1149*11042SErik.Nordmark@Sun.COM 
1150*11042SErik.Nordmark@Sun.COM 	if (HOOKS4_INTERESTED_LOOPBACK_IN(ipst)) {
1151*11042SErik.Nordmark@Sun.COM 		int error;
1152*11042SErik.Nordmark@Sun.COM 
1153*11042SErik.Nordmark@Sun.COM 		DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill,
1154*11042SErik.Nordmark@Sun.COM 		    ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp);
1155*11042SErik.Nordmark@Sun.COM 		FW_HOOKS(ipst->ips_ip4_loopback_in_event,
1156*11042SErik.Nordmark@Sun.COM 		    ipst->ips_ipv4firewall_loopback_in,
1157*11042SErik.Nordmark@Sun.COM 		    ill, NULL, ipha, mp, mp, 0, ipst, error);
1158*11042SErik.Nordmark@Sun.COM 
1159*11042SErik.Nordmark@Sun.COM 		DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, mp);
1160*11042SErik.Nordmark@Sun.COM 		if (mp == NULL) {
1161*11042SErik.Nordmark@Sun.COM 			ira_cleanup(&iras, B_FALSE);
1162*11042SErik.Nordmark@Sun.COM 			return (error);
1163*11042SErik.Nordmark@Sun.COM 		}
1164*11042SErik.Nordmark@Sun.COM 		/*
1165*11042SErik.Nordmark@Sun.COM 		 * Even if the destination was changed by the filter we use the
1166*11042SErik.Nordmark@Sun.COM 		 * forwarding decision that was made based on the address
1167*11042SErik.Nordmark@Sun.COM 		 * in ip_output/ip_set_destination.
1168*11042SErik.Nordmark@Sun.COM 		 */
1169*11042SErik.Nordmark@Sun.COM 		/* Length could be different */
1170*11042SErik.Nordmark@Sun.COM 		ipha = (ipha_t *)mp->b_rptr;
1171*11042SErik.Nordmark@Sun.COM 		pktlen = iras.ira_pktlen = ntohs(ipha->ipha_length);
1172*11042SErik.Nordmark@Sun.COM 	}
1173*11042SErik.Nordmark@Sun.COM 
1174*11042SErik.Nordmark@Sun.COM 	DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
1175*11042SErik.Nordmark@Sun.COM 	    ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL,
1176*11042SErik.Nordmark@Sun.COM 	    int, 1);
1177*11042SErik.Nordmark@Sun.COM 
1178*11042SErik.Nordmark@Sun.COM 	ire->ire_ib_pkt_count++;
1179*11042SErik.Nordmark@Sun.COM 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
1180*11042SErik.Nordmark@Sun.COM 	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, pktlen);
1181*11042SErik.Nordmark@Sun.COM 
1182*11042SErik.Nordmark@Sun.COM 	/* Destined to ire_zoneid - use that for fanout */
1183*11042SErik.Nordmark@Sun.COM 	iras.ira_zoneid = ire->ire_zoneid;
1184*11042SErik.Nordmark@Sun.COM 
1185*11042SErik.Nordmark@Sun.COM 	if (is_system_labeled()) {
1186*11042SErik.Nordmark@Sun.COM 		iras.ira_flags |= IRAF_SYSTEM_LABELED;
1187*11042SErik.Nordmark@Sun.COM 
1188*11042SErik.Nordmark@Sun.COM 		/*
1189*11042SErik.Nordmark@Sun.COM 		 * This updates ira_cred, ira_tsl and ira_free_flags based
1190*11042SErik.Nordmark@Sun.COM 		 * on the label. We don't expect this to ever fail for
1191*11042SErik.Nordmark@Sun.COM 		 * loopback packets, so we silently drop the packet should it
1192*11042SErik.Nordmark@Sun.COM 		 * fail.
1193*11042SErik.Nordmark@Sun.COM 		 */
1194*11042SErik.Nordmark@Sun.COM 		if (!tsol_get_pkt_label(mp, IPV4_VERSION, &iras)) {
1195*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1196*11042SErik.Nordmark@Sun.COM 			ip_drop_input("tsol_get_pkt_label", mp, ill);
1197*11042SErik.Nordmark@Sun.COM 			freemsg(mp);
1198*11042SErik.Nordmark@Sun.COM 			return (0);
1199*11042SErik.Nordmark@Sun.COM 		}
1200*11042SErik.Nordmark@Sun.COM 		ASSERT(iras.ira_tsl != NULL);
1201*11042SErik.Nordmark@Sun.COM 
1202*11042SErik.Nordmark@Sun.COM 		/* tsol_get_pkt_label sometimes does pullupmsg */
1203*11042SErik.Nordmark@Sun.COM 		ipha = (ipha_t *)mp->b_rptr;
1204*11042SErik.Nordmark@Sun.COM 	}
1205*11042SErik.Nordmark@Sun.COM 
1206*11042SErik.Nordmark@Sun.COM 	ip_fanout_v4(mp, ipha, &iras);
1207*11042SErik.Nordmark@Sun.COM 
1208*11042SErik.Nordmark@Sun.COM 	/* We moved any IPsec refs from ixa to iras */
1209*11042SErik.Nordmark@Sun.COM 	ira_cleanup(&iras, B_FALSE);
1210*11042SErik.Nordmark@Sun.COM 	return (0);
1211*11042SErik.Nordmark@Sun.COM }
1212*11042SErik.Nordmark@Sun.COM 
1213*11042SErik.Nordmark@Sun.COM /*
1214*11042SErik.Nordmark@Sun.COM  * ire_sendfn for IRE_BROADCAST
1215*11042SErik.Nordmark@Sun.COM  * If the broadcast address is present on multiple ills and ixa_ifindex
1216*11042SErik.Nordmark@Sun.COM  * isn't set, then we generate
1217*11042SErik.Nordmark@Sun.COM  * a separate datagram (potentially with different source address) for
1218*11042SErik.Nordmark@Sun.COM  * those ills. In any case, only one copy is looped back to ip_input_v4.
1219*11042SErik.Nordmark@Sun.COM  */
1220*11042SErik.Nordmark@Sun.COM int
1221*11042SErik.Nordmark@Sun.COM ire_send_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1222*11042SErik.Nordmark@Sun.COM     ip_xmit_attr_t *ixa, uint32_t *identp)
1223*11042SErik.Nordmark@Sun.COM {
1224*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha = (ipha_t *)iph_arg;
1225*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
1226*11042SErik.Nordmark@Sun.COM 	irb_t		*irb = ire->ire_bucket;
1227*11042SErik.Nordmark@Sun.COM 	ire_t		*ire1;
1228*11042SErik.Nordmark@Sun.COM 	mblk_t		*mp1;
1229*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha1;
1230*11042SErik.Nordmark@Sun.COM 	iaflags_t	ixaflags = ixa->ixa_flags;
1231*11042SErik.Nordmark@Sun.COM 	nce_t		*nce1, *nce_orig;
1232*11042SErik.Nordmark@Sun.COM 
1233*11042SErik.Nordmark@Sun.COM 	/*
1234*11042SErik.Nordmark@Sun.COM 	 * Unless ire_send_multirt_v4 already set a ttl, force the
1235*11042SErik.Nordmark@Sun.COM 	 * ttl to a smallish value.
1236*11042SErik.Nordmark@Sun.COM 	 */
1237*11042SErik.Nordmark@Sun.COM 	if (!(ixa->ixa_flags & IXAF_NO_TTL_CHANGE)) {
1238*11042SErik.Nordmark@Sun.COM 		/*
1239*11042SErik.Nordmark@Sun.COM 		 * To avoid broadcast storms, we usually set the TTL to 1 for
1240*11042SErik.Nordmark@Sun.COM 		 * broadcasts.  This can
1241*11042SErik.Nordmark@Sun.COM 		 * be overridden stack-wide through the ip_broadcast_ttl
1242*11042SErik.Nordmark@Sun.COM 		 * ndd tunable, or on a per-connection basis through the
1243*11042SErik.Nordmark@Sun.COM 		 * IP_BROADCAST_TTL socket option.
1244*11042SErik.Nordmark@Sun.COM 		 *
1245*11042SErik.Nordmark@Sun.COM 		 * If SO_DONTROUTE/IXAF_DONTROUTE is set, then ire_send_wire_v4
1246*11042SErik.Nordmark@Sun.COM 		 * will force ttl to one after we've set this.
1247*11042SErik.Nordmark@Sun.COM 		 */
1248*11042SErik.Nordmark@Sun.COM 		if (ixaflags & IXAF_BROADCAST_TTL_SET)
1249*11042SErik.Nordmark@Sun.COM 			ipha->ipha_ttl = ixa->ixa_broadcast_ttl;
1250*11042SErik.Nordmark@Sun.COM 		else
1251*11042SErik.Nordmark@Sun.COM 			ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl;
1252*11042SErik.Nordmark@Sun.COM 	}
1253*11042SErik.Nordmark@Sun.COM 	/*
1254*11042SErik.Nordmark@Sun.COM 	 * Make sure we get a loopback copy (after IPsec and frag)
1255*11042SErik.Nordmark@Sun.COM 	 * Skip hardware checksum so that loopback copy is checksumed.
1256*11042SErik.Nordmark@Sun.COM 	 */
1257*11042SErik.Nordmark@Sun.COM 	ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
1258*11042SErik.Nordmark@Sun.COM 
1259*11042SErik.Nordmark@Sun.COM 	/* Do we need to potentially generate multiple copies? */
1260*11042SErik.Nordmark@Sun.COM 	if (irb->irb_ire_cnt == 1 || ixa->ixa_ifindex != 0)
1261*11042SErik.Nordmark@Sun.COM 		return (ire_send_wire_v4(ire, mp, ipha, ixa, identp));
1262*11042SErik.Nordmark@Sun.COM 
1263*11042SErik.Nordmark@Sun.COM 	/*
1264*11042SErik.Nordmark@Sun.COM 	 * Loop over all IRE_BROADCAST in the bucket (might only be one).
1265*11042SErik.Nordmark@Sun.COM 	 * Note that everything in the bucket has the same destination address.
1266*11042SErik.Nordmark@Sun.COM 	 */
1267*11042SErik.Nordmark@Sun.COM 	irb_refhold(irb);
1268*11042SErik.Nordmark@Sun.COM 	for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
1269*11042SErik.Nordmark@Sun.COM 		/* We do the main IRE after the end of the loop */
1270*11042SErik.Nordmark@Sun.COM 		if (ire1 == ire)
1271*11042SErik.Nordmark@Sun.COM 			continue;
1272*11042SErik.Nordmark@Sun.COM 
1273*11042SErik.Nordmark@Sun.COM 		/*
1274*11042SErik.Nordmark@Sun.COM 		 * Only IREs for the same IP address should be in the same
1275*11042SErik.Nordmark@Sun.COM 		 * bucket.
1276*11042SErik.Nordmark@Sun.COM 		 * But could have IRE_HOSTs in the case of CGTP.
1277*11042SErik.Nordmark@Sun.COM 		 * If we find any multirt routes we bail out of the loop
1278*11042SErik.Nordmark@Sun.COM 		 * and just do the single packet at the end; ip_postfrag_multirt
1279*11042SErik.Nordmark@Sun.COM 		 * will duplicate the packet.
1280*11042SErik.Nordmark@Sun.COM 		 */
1281*11042SErik.Nordmark@Sun.COM 		ASSERT(ire1->ire_addr == ire->ire_addr);
1282*11042SErik.Nordmark@Sun.COM 		if (!(ire1->ire_type & IRE_BROADCAST))
1283*11042SErik.Nordmark@Sun.COM 			continue;
1284*11042SErik.Nordmark@Sun.COM 
1285*11042SErik.Nordmark@Sun.COM 		if (IRE_IS_CONDEMNED(ire1))
1286*11042SErik.Nordmark@Sun.COM 			continue;
1287*11042SErik.Nordmark@Sun.COM 
1288*11042SErik.Nordmark@Sun.COM 		if (ixa->ixa_zoneid != ALL_ZONES &&
1289*11042SErik.Nordmark@Sun.COM 		    ire->ire_zoneid != ire1->ire_zoneid)
1290*11042SErik.Nordmark@Sun.COM 			continue;
1291*11042SErik.Nordmark@Sun.COM 
1292*11042SErik.Nordmark@Sun.COM 		ASSERT(ire->ire_ill != ire1->ire_ill && ire1->ire_ill != NULL);
1293*11042SErik.Nordmark@Sun.COM 
1294*11042SErik.Nordmark@Sun.COM 		if (ire1->ire_flags & RTF_MULTIRT)
1295*11042SErik.Nordmark@Sun.COM 			break;
1296*11042SErik.Nordmark@Sun.COM 
1297*11042SErik.Nordmark@Sun.COM 		/*
1298*11042SErik.Nordmark@Sun.COM 		 * For IPMP we only send for the ipmp_ill. arp_nce_init() will
1299*11042SErik.Nordmark@Sun.COM 		 * ensure that this goes out on the cast_ill.
1300*11042SErik.Nordmark@Sun.COM 		 */
1301*11042SErik.Nordmark@Sun.COM 		if (IS_UNDER_IPMP(ire1->ire_ill))
1302*11042SErik.Nordmark@Sun.COM 			continue;
1303*11042SErik.Nordmark@Sun.COM 
1304*11042SErik.Nordmark@Sun.COM 		mp1 = copymsg(mp);
1305*11042SErik.Nordmark@Sun.COM 		if (mp1 == NULL) {
1306*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ire1->ire_ill->ill_ip_mib,
1307*11042SErik.Nordmark@Sun.COM 			    ipIfStatsOutDiscards);
1308*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards",
1309*11042SErik.Nordmark@Sun.COM 			    mp, ire1->ire_ill);
1310*11042SErik.Nordmark@Sun.COM 			continue;
1311*11042SErik.Nordmark@Sun.COM 		}
1312*11042SErik.Nordmark@Sun.COM 
1313*11042SErik.Nordmark@Sun.COM 		ipha1 = (ipha_t *)mp1->b_rptr;
1314*11042SErik.Nordmark@Sun.COM 		if (ixa->ixa_flags & IXAF_SET_SOURCE) {
1315*11042SErik.Nordmark@Sun.COM 			/*
1316*11042SErik.Nordmark@Sun.COM 			 * Need to pick a different source address for each
1317*11042SErik.Nordmark@Sun.COM 			 * interface. If we have a global IPsec policy and
1318*11042SErik.Nordmark@Sun.COM 			 * no per-socket policy then we punt to
1319*11042SErik.Nordmark@Sun.COM 			 * ip_output_simple_v4 using a separate ip_xmit_attr_t.
1320*11042SErik.Nordmark@Sun.COM 			 */
1321*11042SErik.Nordmark@Sun.COM 			if (ixaflags & IXAF_IPSEC_GLOBAL_POLICY) {
1322*11042SErik.Nordmark@Sun.COM 				ip_output_simple_broadcast(ixa, mp1);
1323*11042SErik.Nordmark@Sun.COM 				continue;
1324*11042SErik.Nordmark@Sun.COM 			}
1325*11042SErik.Nordmark@Sun.COM 			/* Pick a new source address for each interface */
1326*11042SErik.Nordmark@Sun.COM 			if (ip_select_source_v4(ire1->ire_ill, INADDR_ANY,
1327*11042SErik.Nordmark@Sun.COM 			    ipha1->ipha_dst, INADDR_ANY, ixa->ixa_zoneid, ipst,
1328*11042SErik.Nordmark@Sun.COM 			    &ipha1->ipha_src, NULL, NULL) != 0) {
1329*11042SErik.Nordmark@Sun.COM 				BUMP_MIB(ire1->ire_ill->ill_ip_mib,
1330*11042SErik.Nordmark@Sun.COM 				    ipIfStatsOutDiscards);
1331*11042SErik.Nordmark@Sun.COM 				ip_drop_output("ipIfStatsOutDiscards - select "
1332*11042SErik.Nordmark@Sun.COM 				    "broadcast source", mp1, ire1->ire_ill);
1333*11042SErik.Nordmark@Sun.COM 				freemsg(mp1);
1334*11042SErik.Nordmark@Sun.COM 				continue;
1335*11042SErik.Nordmark@Sun.COM 			}
1336*11042SErik.Nordmark@Sun.COM 			/*
1337*11042SErik.Nordmark@Sun.COM 			 * Check against global IPsec policy to set the AH/ESP
1338*11042SErik.Nordmark@Sun.COM 			 * attributes. IPsec will set IXAF_IPSEC_* and
1339*11042SErik.Nordmark@Sun.COM 			 * ixa_ipsec_* as appropriate.
1340*11042SErik.Nordmark@Sun.COM 			 */
1341*11042SErik.Nordmark@Sun.COM 			if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) {
1342*11042SErik.Nordmark@Sun.COM 				ASSERT(ixa->ixa_ipsec_policy == NULL);
1343*11042SErik.Nordmark@Sun.COM 				mp1 = ip_output_attach_policy(mp1, ipha, NULL,
1344*11042SErik.Nordmark@Sun.COM 				    NULL, ixa);
1345*11042SErik.Nordmark@Sun.COM 				if (mp1 == NULL) {
1346*11042SErik.Nordmark@Sun.COM 					/*
1347*11042SErik.Nordmark@Sun.COM 					 * MIB and ip_drop_packet already
1348*11042SErik.Nordmark@Sun.COM 					 * done
1349*11042SErik.Nordmark@Sun.COM 					 */
1350*11042SErik.Nordmark@Sun.COM 					continue;
1351*11042SErik.Nordmark@Sun.COM 				}
1352*11042SErik.Nordmark@Sun.COM 			}
1353*11042SErik.Nordmark@Sun.COM 		}
1354*11042SErik.Nordmark@Sun.COM 		/* Make sure we have an NCE on this ill */
1355*11042SErik.Nordmark@Sun.COM 		nce1 = arp_nce_init(ire1->ire_ill, ire1->ire_addr,
1356*11042SErik.Nordmark@Sun.COM 		    ire1->ire_type);
1357*11042SErik.Nordmark@Sun.COM 		if (nce1 == NULL) {
1358*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ire1->ire_ill->ill_ip_mib,
1359*11042SErik.Nordmark@Sun.COM 			    ipIfStatsOutDiscards);
1360*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - broadcast nce",
1361*11042SErik.Nordmark@Sun.COM 			    mp1, ire1->ire_ill);
1362*11042SErik.Nordmark@Sun.COM 			freemsg(mp1);
1363*11042SErik.Nordmark@Sun.COM 			continue;
1364*11042SErik.Nordmark@Sun.COM 		}
1365*11042SErik.Nordmark@Sun.COM 		nce_orig = ixa->ixa_nce;
1366*11042SErik.Nordmark@Sun.COM 		ixa->ixa_nce = nce1;
1367*11042SErik.Nordmark@Sun.COM 
1368*11042SErik.Nordmark@Sun.COM 		ire_refhold(ire1);
1369*11042SErik.Nordmark@Sun.COM 		/*
1370*11042SErik.Nordmark@Sun.COM 		 * Ignore any errors here. We just collect the errno for
1371*11042SErik.Nordmark@Sun.COM 		 * the main ire below
1372*11042SErik.Nordmark@Sun.COM 		 */
1373*11042SErik.Nordmark@Sun.COM 		(void) ire_send_wire_v4(ire1, mp1, ipha1, ixa, identp);
1374*11042SErik.Nordmark@Sun.COM 		ire_refrele(ire1);
1375*11042SErik.Nordmark@Sun.COM 
1376*11042SErik.Nordmark@Sun.COM 		ixa->ixa_nce = nce_orig;
1377*11042SErik.Nordmark@Sun.COM 		nce_refrele(nce1);
1378*11042SErik.Nordmark@Sun.COM 
1379*11042SErik.Nordmark@Sun.COM 		ixa->ixa_flags &= ~IXAF_LOOPBACK_COPY;
1380*11042SErik.Nordmark@Sun.COM 	}
1381*11042SErik.Nordmark@Sun.COM 	irb_refrele(irb);
1382*11042SErik.Nordmark@Sun.COM 	/* Finally, the main one */
1383*11042SErik.Nordmark@Sun.COM 
1384*11042SErik.Nordmark@Sun.COM 	/*
1385*11042SErik.Nordmark@Sun.COM 	 * For IPMP we only send broadcasts on the ipmp_ill.
1386*11042SErik.Nordmark@Sun.COM 	 */
1387*11042SErik.Nordmark@Sun.COM 	if (IS_UNDER_IPMP(ire->ire_ill)) {
1388*11042SErik.Nordmark@Sun.COM 		freemsg(mp);
1389*11042SErik.Nordmark@Sun.COM 		return (0);
1390*11042SErik.Nordmark@Sun.COM 	}
1391*11042SErik.Nordmark@Sun.COM 
1392*11042SErik.Nordmark@Sun.COM 	return (ire_send_wire_v4(ire, mp, ipha, ixa, identp));
1393*11042SErik.Nordmark@Sun.COM }
1394*11042SErik.Nordmark@Sun.COM 
1395*11042SErik.Nordmark@Sun.COM /*
1396*11042SErik.Nordmark@Sun.COM  * Send a packet using a different source address and different
1397*11042SErik.Nordmark@Sun.COM  * IPsec policy.
1398*11042SErik.Nordmark@Sun.COM  */
1399*11042SErik.Nordmark@Sun.COM static void
1400*11042SErik.Nordmark@Sun.COM ip_output_simple_broadcast(ip_xmit_attr_t *ixa, mblk_t *mp)
1401*11042SErik.Nordmark@Sun.COM {
1402*11042SErik.Nordmark@Sun.COM 	ip_xmit_attr_t ixas;
1403*11042SErik.Nordmark@Sun.COM 
1404*11042SErik.Nordmark@Sun.COM 	bzero(&ixas, sizeof (ixas));
1405*11042SErik.Nordmark@Sun.COM 	ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4;
1406*11042SErik.Nordmark@Sun.COM 	ixas.ixa_zoneid = ixa->ixa_zoneid;
1407*11042SErik.Nordmark@Sun.COM 	ixas.ixa_ifindex = 0;
1408*11042SErik.Nordmark@Sun.COM 	ixas.ixa_ipst = ixa->ixa_ipst;
1409*11042SErik.Nordmark@Sun.COM 	ixas.ixa_cred = ixa->ixa_cred;
1410*11042SErik.Nordmark@Sun.COM 	ixas.ixa_cpid = ixa->ixa_cpid;
1411*11042SErik.Nordmark@Sun.COM 	ixas.ixa_tsl = ixa->ixa_tsl;
1412*11042SErik.Nordmark@Sun.COM 	ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1413*11042SErik.Nordmark@Sun.COM 
1414*11042SErik.Nordmark@Sun.COM 	(void) ip_output_simple(mp, &ixas);
1415*11042SErik.Nordmark@Sun.COM 	ixa_cleanup(&ixas);
1416*11042SErik.Nordmark@Sun.COM }
1417*11042SErik.Nordmark@Sun.COM 
1418*11042SErik.Nordmark@Sun.COM 
1419*11042SErik.Nordmark@Sun.COM static void
1420*11042SErik.Nordmark@Sun.COM multirt_check_v4(ire_t *ire, ipha_t *ipha, ip_xmit_attr_t *ixa)
1421*11042SErik.Nordmark@Sun.COM {
1422*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
1423*11042SErik.Nordmark@Sun.COM 
1424*11042SErik.Nordmark@Sun.COM 	/* Limit the TTL on multirt packets */
1425*11042SErik.Nordmark@Sun.COM 	if (ire->ire_type & IRE_MULTICAST) {
1426*11042SErik.Nordmark@Sun.COM 		if (ipha->ipha_ttl > 1) {
1427*11042SErik.Nordmark@Sun.COM 			ip2dbg(("ire_send_multirt_v4: forcing multicast "
1428*11042SErik.Nordmark@Sun.COM 			    "multirt TTL to 1 (was %d), dst 0x%08x\n",
1429*11042SErik.Nordmark@Sun.COM 			    ipha->ipha_ttl, ntohl(ire->ire_addr)));
1430*11042SErik.Nordmark@Sun.COM 			ipha->ipha_ttl = 1;
1431*11042SErik.Nordmark@Sun.COM 		}
1432*11042SErik.Nordmark@Sun.COM 		ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1433*11042SErik.Nordmark@Sun.COM 	} else if ((ipst->ips_ip_multirt_ttl > 0) &&
1434*11042SErik.Nordmark@Sun.COM 	    (ipha->ipha_ttl > ipst->ips_ip_multirt_ttl)) {
1435*11042SErik.Nordmark@Sun.COM 		ipha->ipha_ttl = ipst->ips_ip_multirt_ttl;
1436*11042SErik.Nordmark@Sun.COM 		/*
1437*11042SErik.Nordmark@Sun.COM 		 * Need to ensure we don't increase the ttl should we go through
1438*11042SErik.Nordmark@Sun.COM 		 * ire_send_broadcast or multicast.
1439*11042SErik.Nordmark@Sun.COM 		 */
1440*11042SErik.Nordmark@Sun.COM 		ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1441*11042SErik.Nordmark@Sun.COM 	}
1442*11042SErik.Nordmark@Sun.COM }
1443*11042SErik.Nordmark@Sun.COM 
1444*11042SErik.Nordmark@Sun.COM /*
1445*11042SErik.Nordmark@Sun.COM  * ire_sendfn for IRE_MULTICAST
1446*11042SErik.Nordmark@Sun.COM  */
1447*11042SErik.Nordmark@Sun.COM int
1448*11042SErik.Nordmark@Sun.COM ire_send_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1449*11042SErik.Nordmark@Sun.COM     ip_xmit_attr_t *ixa, uint32_t *identp)
1450*11042SErik.Nordmark@Sun.COM {
1451*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha = (ipha_t *)iph_arg;
1452*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
1453*11042SErik.Nordmark@Sun.COM 	ill_t		*ill = ire->ire_ill;
1454*11042SErik.Nordmark@Sun.COM 	iaflags_t	ixaflags = ixa->ixa_flags;
1455*11042SErik.Nordmark@Sun.COM 
1456*11042SErik.Nordmark@Sun.COM 	/*
1457*11042SErik.Nordmark@Sun.COM 	 * The IRE_MULTICAST is the same whether or not multirt is in use.
1458*11042SErik.Nordmark@Sun.COM 	 * Hence we need special-case code.
1459*11042SErik.Nordmark@Sun.COM 	 */
1460*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_MULTIRT_MULTICAST)
1461*11042SErik.Nordmark@Sun.COM 		multirt_check_v4(ire, ipha, ixa);
1462*11042SErik.Nordmark@Sun.COM 
1463*11042SErik.Nordmark@Sun.COM 	/*
1464*11042SErik.Nordmark@Sun.COM 	 * Check if anything in ip_input_v4 wants a copy of the transmitted
1465*11042SErik.Nordmark@Sun.COM 	 * packet (after IPsec and fragmentation)
1466*11042SErik.Nordmark@Sun.COM 	 *
1467*11042SErik.Nordmark@Sun.COM 	 * 1. Multicast routers always need a copy unless SO_DONTROUTE is set
1468*11042SErik.Nordmark@Sun.COM 	 *    RSVP and the rsvp daemon is an example of a
1469*11042SErik.Nordmark@Sun.COM 	 *    protocol and user level process that
1470*11042SErik.Nordmark@Sun.COM 	 *    handles it's own routing. Hence, it uses the
1471*11042SErik.Nordmark@Sun.COM 	 *    SO_DONTROUTE option to accomplish this.
1472*11042SErik.Nordmark@Sun.COM 	 * 2. If the sender has set IP_MULTICAST_LOOP, then we just
1473*11042SErik.Nordmark@Sun.COM 	 *    check whether there are any receivers for the group on the ill
1474*11042SErik.Nordmark@Sun.COM 	 *    (ignoring the zoneid).
1475*11042SErik.Nordmark@Sun.COM 	 * 3. If IP_MULTICAST_LOOP is not set, then we check if there are
1476*11042SErik.Nordmark@Sun.COM 	 *    any members in other shared-IP zones.
1477*11042SErik.Nordmark@Sun.COM 	 *    If such members exist, then we indicate that the sending zone
1478*11042SErik.Nordmark@Sun.COM 	 *    shouldn't get a loopback copy to preserve the IP_MULTICAST_LOOP
1479*11042SErik.Nordmark@Sun.COM 	 *    behavior.
1480*11042SErik.Nordmark@Sun.COM 	 *
1481*11042SErik.Nordmark@Sun.COM 	 * When we loopback we skip hardware checksum to make sure loopback
1482*11042SErik.Nordmark@Sun.COM 	 * copy is checksumed.
1483*11042SErik.Nordmark@Sun.COM 	 *
1484*11042SErik.Nordmark@Sun.COM 	 * Note that ire_ill is the upper in the case of IPMP.
1485*11042SErik.Nordmark@Sun.COM 	 */
1486*11042SErik.Nordmark@Sun.COM 	ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM);
1487*11042SErik.Nordmark@Sun.COM 	if (ipst->ips_ip_g_mrouter && ill->ill_mrouter_cnt > 0 &&
1488*11042SErik.Nordmark@Sun.COM 	    !(ixaflags & IXAF_DONTROUTE)) {
1489*11042SErik.Nordmark@Sun.COM 		ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
1490*11042SErik.Nordmark@Sun.COM 	} else if (ixaflags & IXAF_MULTICAST_LOOP) {
1491*11042SErik.Nordmark@Sun.COM 		/*
1492*11042SErik.Nordmark@Sun.COM 		 * If this zone or any other zone has members then loopback
1493*11042SErik.Nordmark@Sun.COM 		 * a copy.
1494*11042SErik.Nordmark@Sun.COM 		 */
1495*11042SErik.Nordmark@Sun.COM 		if (ill_hasmembers_v4(ill, ipha->ipha_dst))
1496*11042SErik.Nordmark@Sun.COM 			ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
1497*11042SErik.Nordmark@Sun.COM 	} else if (ipst->ips_netstack->netstack_numzones > 1) {
1498*11042SErik.Nordmark@Sun.COM 		/*
1499*11042SErik.Nordmark@Sun.COM 		 * This zone should not have a copy. But there are some other
1500*11042SErik.Nordmark@Sun.COM 		 * zones which might have members.
1501*11042SErik.Nordmark@Sun.COM 		 */
1502*11042SErik.Nordmark@Sun.COM 		if (ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst,
1503*11042SErik.Nordmark@Sun.COM 		    ixa->ixa_zoneid)) {
1504*11042SErik.Nordmark@Sun.COM 			ixa->ixa_flags |= IXAF_NO_LOOP_ZONEID_SET;
1505*11042SErik.Nordmark@Sun.COM 			ixa->ixa_no_loop_zoneid = ixa->ixa_zoneid;
1506*11042SErik.Nordmark@Sun.COM 			ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM;
1507*11042SErik.Nordmark@Sun.COM 		}
1508*11042SErik.Nordmark@Sun.COM 	}
1509*11042SErik.Nordmark@Sun.COM 
1510*11042SErik.Nordmark@Sun.COM 	/*
1511*11042SErik.Nordmark@Sun.COM 	 * Unless ire_send_multirt_v4 or icmp_output_hdrincl already set a ttl,
1512*11042SErik.Nordmark@Sun.COM 	 * force the ttl to the IP_MULTICAST_TTL value
1513*11042SErik.Nordmark@Sun.COM 	 */
1514*11042SErik.Nordmark@Sun.COM 	if (!(ixaflags & IXAF_NO_TTL_CHANGE)) {
1515*11042SErik.Nordmark@Sun.COM 		ipha->ipha_ttl = ixa->ixa_multicast_ttl;
1516*11042SErik.Nordmark@Sun.COM 	}
1517*11042SErik.Nordmark@Sun.COM 
1518*11042SErik.Nordmark@Sun.COM 	return (ire_send_wire_v4(ire, mp, ipha, ixa, identp));
1519*11042SErik.Nordmark@Sun.COM }
1520*11042SErik.Nordmark@Sun.COM 
1521*11042SErik.Nordmark@Sun.COM /*
1522*11042SErik.Nordmark@Sun.COM  * ire_sendfn for IREs with RTF_MULTIRT
1523*11042SErik.Nordmark@Sun.COM  */
1524*11042SErik.Nordmark@Sun.COM int
1525*11042SErik.Nordmark@Sun.COM ire_send_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1526*11042SErik.Nordmark@Sun.COM     ip_xmit_attr_t *ixa, uint32_t *identp)
1527*11042SErik.Nordmark@Sun.COM {
1528*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha = (ipha_t *)iph_arg;
1529*11042SErik.Nordmark@Sun.COM 
1530*11042SErik.Nordmark@Sun.COM 	multirt_check_v4(ire, ipha, ixa);
1531*11042SErik.Nordmark@Sun.COM 
1532*11042SErik.Nordmark@Sun.COM 	if (ire->ire_type & IRE_MULTICAST)
1533*11042SErik.Nordmark@Sun.COM 		return (ire_send_multicast_v4(ire, mp, ipha, ixa, identp));
1534*11042SErik.Nordmark@Sun.COM 	else if (ire->ire_type & IRE_BROADCAST)
1535*11042SErik.Nordmark@Sun.COM 		return (ire_send_broadcast_v4(ire, mp, ipha, ixa, identp));
1536*11042SErik.Nordmark@Sun.COM 	else
1537*11042SErik.Nordmark@Sun.COM 		return (ire_send_wire_v4(ire, mp, ipha, ixa, identp));
1538*11042SErik.Nordmark@Sun.COM }
1539*11042SErik.Nordmark@Sun.COM 
1540*11042SErik.Nordmark@Sun.COM /*
1541*11042SErik.Nordmark@Sun.COM  * ire_sendfn for IREs with RTF_REJECT/RTF_BLACKHOLE, including IRE_NOROUTE
1542*11042SErik.Nordmark@Sun.COM  */
1543*11042SErik.Nordmark@Sun.COM int
1544*11042SErik.Nordmark@Sun.COM ire_send_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1545*11042SErik.Nordmark@Sun.COM     ip_xmit_attr_t *ixa, uint32_t *identp)
1546*11042SErik.Nordmark@Sun.COM {
1547*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
1548*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha = (ipha_t *)iph_arg;
1549*11042SErik.Nordmark@Sun.COM 	ill_t		*ill;
1550*11042SErik.Nordmark@Sun.COM 	ip_recv_attr_t	iras;
1551*11042SErik.Nordmark@Sun.COM 	boolean_t	dummy;
1552*11042SErik.Nordmark@Sun.COM 
1553*11042SErik.Nordmark@Sun.COM 	/* We assign an IP ident for nice errors */
1554*11042SErik.Nordmark@Sun.COM 	ipha->ipha_ident = atomic_add_32_nv(identp, 1);
1555*11042SErik.Nordmark@Sun.COM 
1556*11042SErik.Nordmark@Sun.COM 	BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes);
1557*11042SErik.Nordmark@Sun.COM 
1558*11042SErik.Nordmark@Sun.COM 	if (ire->ire_type & IRE_NOROUTE) {
1559*11042SErik.Nordmark@Sun.COM 		/* A lack of a route as opposed to RTF_REJECT|BLACKHOLE */
1560*11042SErik.Nordmark@Sun.COM 		ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0,
1561*11042SErik.Nordmark@Sun.COM 		    RTA_DST, ipst);
1562*11042SErik.Nordmark@Sun.COM 	}
1563*11042SErik.Nordmark@Sun.COM 
1564*11042SErik.Nordmark@Sun.COM 	if (ire->ire_flags & RTF_BLACKHOLE) {
1565*11042SErik.Nordmark@Sun.COM 		ip_drop_output("ipIfStatsOutNoRoutes RTF_BLACKHOLE", mp, NULL);
1566*11042SErik.Nordmark@Sun.COM 		freemsg(mp);
1567*11042SErik.Nordmark@Sun.COM 		/* No error even for local senders - silent blackhole */
1568*11042SErik.Nordmark@Sun.COM 		return (0);
1569*11042SErik.Nordmark@Sun.COM 	}
1570*11042SErik.Nordmark@Sun.COM 	ip_drop_output("ipIfStatsOutNoRoutes RTF_REJECT", mp, NULL);
1571*11042SErik.Nordmark@Sun.COM 
1572*11042SErik.Nordmark@Sun.COM 	/*
1573*11042SErik.Nordmark@Sun.COM 	 * We need an ill_t for the ip_recv_attr_t even though this packet
1574*11042SErik.Nordmark@Sun.COM 	 * was never received and icmp_unreachable doesn't currently use
1575*11042SErik.Nordmark@Sun.COM 	 * ira_ill.
1576*11042SErik.Nordmark@Sun.COM 	 */
1577*11042SErik.Nordmark@Sun.COM 	ill = ill_lookup_on_name("lo0", B_FALSE,
1578*11042SErik.Nordmark@Sun.COM 	    !(ixa->ixa_flags & IRAF_IS_IPV4), &dummy, ipst);
1579*11042SErik.Nordmark@Sun.COM 	if (ill == NULL) {
1580*11042SErik.Nordmark@Sun.COM 		freemsg(mp);
1581*11042SErik.Nordmark@Sun.COM 		return (EHOSTUNREACH);
1582*11042SErik.Nordmark@Sun.COM 	}
1583*11042SErik.Nordmark@Sun.COM 
1584*11042SErik.Nordmark@Sun.COM 	bzero(&iras, sizeof (iras));
1585*11042SErik.Nordmark@Sun.COM 	/* Map ixa to ira including IPsec policies */
1586*11042SErik.Nordmark@Sun.COM 	ipsec_out_to_in(ixa, ill, &iras);
1587*11042SErik.Nordmark@Sun.COM 
1588*11042SErik.Nordmark@Sun.COM 	if (ip_source_routed(ipha, ipst)) {
1589*11042SErik.Nordmark@Sun.COM 		icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, &iras);
1590*11042SErik.Nordmark@Sun.COM 	} else {
1591*11042SErik.Nordmark@Sun.COM 		icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, &iras);
1592*11042SErik.Nordmark@Sun.COM 	}
1593*11042SErik.Nordmark@Sun.COM 	/* We moved any IPsec refs from ixa to iras */
1594*11042SErik.Nordmark@Sun.COM 	ira_cleanup(&iras, B_FALSE);
1595*11042SErik.Nordmark@Sun.COM 	ill_refrele(ill);
1596*11042SErik.Nordmark@Sun.COM 	return (EHOSTUNREACH);
1597*11042SErik.Nordmark@Sun.COM }
1598*11042SErik.Nordmark@Sun.COM 
1599*11042SErik.Nordmark@Sun.COM /*
1600*11042SErik.Nordmark@Sun.COM  * Calculate a checksum ignoring any hardware capabilities
1601*11042SErik.Nordmark@Sun.COM  *
1602*11042SErik.Nordmark@Sun.COM  * Returns B_FALSE if the packet was too short for the checksum. Caller
1603*11042SErik.Nordmark@Sun.COM  * should free and do stats.
1604*11042SErik.Nordmark@Sun.COM  */
1605*11042SErik.Nordmark@Sun.COM static boolean_t
1606*11042SErik.Nordmark@Sun.COM ip_output_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_xmit_attr_t *ixa)
1607*11042SErik.Nordmark@Sun.COM {
1608*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
1609*11042SErik.Nordmark@Sun.COM 	uint_t		pktlen = ixa->ixa_pktlen;
1610*11042SErik.Nordmark@Sun.COM 	uint16_t	*cksump;
1611*11042SErik.Nordmark@Sun.COM 	uint32_t	cksum;
1612*11042SErik.Nordmark@Sun.COM 	uint8_t		protocol = ixa->ixa_protocol;
1613*11042SErik.Nordmark@Sun.COM 	uint16_t	ip_hdr_length = ixa->ixa_ip_hdr_length;
1614*11042SErik.Nordmark@Sun.COM 	ipaddr_t	dst = ipha->ipha_dst;
1615*11042SErik.Nordmark@Sun.COM 	ipaddr_t	src = ipha->ipha_src;
1616*11042SErik.Nordmark@Sun.COM 
1617*11042SErik.Nordmark@Sun.COM 	/* Just in case it contained garbage */
1618*11042SErik.Nordmark@Sun.COM 	DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS;
1619*11042SErik.Nordmark@Sun.COM 
1620*11042SErik.Nordmark@Sun.COM 	/*
1621*11042SErik.Nordmark@Sun.COM 	 * Calculate ULP checksum
1622*11042SErik.Nordmark@Sun.COM 	 */
1623*11042SErik.Nordmark@Sun.COM 	if (protocol == IPPROTO_TCP) {
1624*11042SErik.Nordmark@Sun.COM 		cksump = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_length);
1625*11042SErik.Nordmark@Sun.COM 		cksum = IP_TCP_CSUM_COMP;
1626*11042SErik.Nordmark@Sun.COM 	} else if (protocol == IPPROTO_UDP) {
1627*11042SErik.Nordmark@Sun.COM 		cksump = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_length);
1628*11042SErik.Nordmark@Sun.COM 		cksum = IP_UDP_CSUM_COMP;
1629*11042SErik.Nordmark@Sun.COM 	} else if (protocol == IPPROTO_SCTP) {
1630*11042SErik.Nordmark@Sun.COM 		sctp_hdr_t	*sctph;
1631*11042SErik.Nordmark@Sun.COM 
1632*11042SErik.Nordmark@Sun.COM 		ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph)));
1633*11042SErik.Nordmark@Sun.COM 		sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length);
1634*11042SErik.Nordmark@Sun.COM 		/*
1635*11042SErik.Nordmark@Sun.COM 		 * Zero out the checksum field to ensure proper
1636*11042SErik.Nordmark@Sun.COM 		 * checksum calculation.
1637*11042SErik.Nordmark@Sun.COM 		 */
1638*11042SErik.Nordmark@Sun.COM 		sctph->sh_chksum = 0;
1639*11042SErik.Nordmark@Sun.COM #ifdef	DEBUG
1640*11042SErik.Nordmark@Sun.COM 		if (!skip_sctp_cksum)
1641*11042SErik.Nordmark@Sun.COM #endif
1642*11042SErik.Nordmark@Sun.COM 			sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length);
1643*11042SErik.Nordmark@Sun.COM 		goto ip_hdr_cksum;
1644*11042SErik.Nordmark@Sun.COM 	} else {
1645*11042SErik.Nordmark@Sun.COM 		goto ip_hdr_cksum;
1646*11042SErik.Nordmark@Sun.COM 	}
1647*11042SErik.Nordmark@Sun.COM 
1648*11042SErik.Nordmark@Sun.COM 	/* ULP puts the checksum field is in the first mblk */
1649*11042SErik.Nordmark@Sun.COM 	ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr);
1650*11042SErik.Nordmark@Sun.COM 
1651*11042SErik.Nordmark@Sun.COM 	/*
1652*11042SErik.Nordmark@Sun.COM 	 * We accumulate the pseudo header checksum in cksum.
1653*11042SErik.Nordmark@Sun.COM 	 * This is pretty hairy code, so watch close.  One
1654*11042SErik.Nordmark@Sun.COM 	 * thing to keep in mind is that UDP and TCP have
1655*11042SErik.Nordmark@Sun.COM 	 * stored their respective datagram lengths in their
1656*11042SErik.Nordmark@Sun.COM 	 * checksum fields.  This lines things up real nice.
1657*11042SErik.Nordmark@Sun.COM 	 */
1658*11042SErik.Nordmark@Sun.COM 	cksum += (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
1659*11042SErik.Nordmark@Sun.COM 
1660*11042SErik.Nordmark@Sun.COM 	cksum = IP_CSUM(mp, ip_hdr_length, cksum);
1661*11042SErik.Nordmark@Sun.COM 	/*
1662*11042SErik.Nordmark@Sun.COM 	 * For UDP/IPv4 a zero means that the packets wasn't checksummed.
1663*11042SErik.Nordmark@Sun.COM 	 * Change to 0xffff
1664*11042SErik.Nordmark@Sun.COM 	 */
1665*11042SErik.Nordmark@Sun.COM 	if (protocol == IPPROTO_UDP && cksum == 0)
1666*11042SErik.Nordmark@Sun.COM 		*cksump = ~cksum;
1667*11042SErik.Nordmark@Sun.COM 	else
1668*11042SErik.Nordmark@Sun.COM 		*cksump = cksum;
1669*11042SErik.Nordmark@Sun.COM 
1670*11042SErik.Nordmark@Sun.COM 	IP_STAT(ipst, ip_out_sw_cksum);
1671*11042SErik.Nordmark@Sun.COM 	IP_STAT_UPDATE(ipst, ip_out_sw_cksum_bytes, pktlen);
1672*11042SErik.Nordmark@Sun.COM 
1673*11042SErik.Nordmark@Sun.COM ip_hdr_cksum:
1674*11042SErik.Nordmark@Sun.COM 	/* Calculate IPv4 header checksum */
1675*11042SErik.Nordmark@Sun.COM 	ipha->ipha_hdr_checksum = 0;
1676*11042SErik.Nordmark@Sun.COM 	ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1677*11042SErik.Nordmark@Sun.COM 	return (B_TRUE);
1678*11042SErik.Nordmark@Sun.COM }
1679*11042SErik.Nordmark@Sun.COM 
1680*11042SErik.Nordmark@Sun.COM /*
1681*11042SErik.Nordmark@Sun.COM  * Calculate the ULP checksum - try to use hardware.
1682*11042SErik.Nordmark@Sun.COM  * In the case of MULTIRT, broadcast or multicast the
1683*11042SErik.Nordmark@Sun.COM  * IXAF_NO_HW_CKSUM is set in which case we use software.
1684*11042SErik.Nordmark@Sun.COM  *
1685*11042SErik.Nordmark@Sun.COM  * If the hardware supports IP header checksum offload; then clear the
1686*11042SErik.Nordmark@Sun.COM  * contents of IP header checksum field as expected by NIC.
1687*11042SErik.Nordmark@Sun.COM  * Do this only if we offloaded either full or partial sum.
1688*11042SErik.Nordmark@Sun.COM  *
1689*11042SErik.Nordmark@Sun.COM  * Returns B_FALSE if the packet was too short for the checksum. Caller
1690*11042SErik.Nordmark@Sun.COM  * should free and do stats.
1691*11042SErik.Nordmark@Sun.COM  */
1692*11042SErik.Nordmark@Sun.COM static boolean_t
1693*11042SErik.Nordmark@Sun.COM ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha,
1694*11042SErik.Nordmark@Sun.COM     ip_xmit_attr_t *ixa, ill_t *ill)
1695*11042SErik.Nordmark@Sun.COM {
1696*11042SErik.Nordmark@Sun.COM 	uint_t		pktlen = ixa->ixa_pktlen;
1697*11042SErik.Nordmark@Sun.COM 	uint16_t	*cksump;
1698*11042SErik.Nordmark@Sun.COM 	uint16_t	hck_flags;
1699*11042SErik.Nordmark@Sun.COM 	uint32_t	cksum;
1700*11042SErik.Nordmark@Sun.COM 	uint8_t		protocol = ixa->ixa_protocol;
1701*11042SErik.Nordmark@Sun.COM 	uint16_t	ip_hdr_length = ixa->ixa_ip_hdr_length;
1702*11042SErik.Nordmark@Sun.COM 
1703*11042SErik.Nordmark@Sun.COM 	if ((ixaflags & IXAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) ||
1704*11042SErik.Nordmark@Sun.COM 	    !dohwcksum) {
1705*11042SErik.Nordmark@Sun.COM 		return (ip_output_sw_cksum_v4(mp, ipha, ixa));
1706*11042SErik.Nordmark@Sun.COM 	}
1707*11042SErik.Nordmark@Sun.COM 
1708*11042SErik.Nordmark@Sun.COM 	/*
1709*11042SErik.Nordmark@Sun.COM 	 * Calculate ULP checksum. Note that we don't use cksump and cksum
1710*11042SErik.Nordmark@Sun.COM 	 * if the ill has FULL support.
1711*11042SErik.Nordmark@Sun.COM 	 */
1712*11042SErik.Nordmark@Sun.COM 	if (protocol == IPPROTO_TCP) {
1713*11042SErik.Nordmark@Sun.COM 		cksump = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_length);
1714*11042SErik.Nordmark@Sun.COM 		cksum = IP_TCP_CSUM_COMP;	/* Pseudo-header cksum */
1715*11042SErik.Nordmark@Sun.COM 	} else if (protocol == IPPROTO_UDP) {
1716*11042SErik.Nordmark@Sun.COM 		cksump = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_length);
1717*11042SErik.Nordmark@Sun.COM 		cksum = IP_UDP_CSUM_COMP;	/* Pseudo-header cksum */
1718*11042SErik.Nordmark@Sun.COM 	} else if (protocol == IPPROTO_SCTP) {
1719*11042SErik.Nordmark@Sun.COM 		sctp_hdr_t	*sctph;
1720*11042SErik.Nordmark@Sun.COM 
1721*11042SErik.Nordmark@Sun.COM 		ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph)));
1722*11042SErik.Nordmark@Sun.COM 		sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length);
1723*11042SErik.Nordmark@Sun.COM 		/*
1724*11042SErik.Nordmark@Sun.COM 		 * Zero out the checksum field to ensure proper
1725*11042SErik.Nordmark@Sun.COM 		 * checksum calculation.
1726*11042SErik.Nordmark@Sun.COM 		 */
1727*11042SErik.Nordmark@Sun.COM 		sctph->sh_chksum = 0;
1728*11042SErik.Nordmark@Sun.COM #ifdef	DEBUG
1729*11042SErik.Nordmark@Sun.COM 		if (!skip_sctp_cksum)
1730*11042SErik.Nordmark@Sun.COM #endif
1731*11042SErik.Nordmark@Sun.COM 			sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length);
1732*11042SErik.Nordmark@Sun.COM 		goto ip_hdr_cksum;
1733*11042SErik.Nordmark@Sun.COM 	} else {
1734*11042SErik.Nordmark@Sun.COM 	ip_hdr_cksum:
1735*11042SErik.Nordmark@Sun.COM 		/* Calculate IPv4 header checksum */
1736*11042SErik.Nordmark@Sun.COM 		ipha->ipha_hdr_checksum = 0;
1737*11042SErik.Nordmark@Sun.COM 		ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1738*11042SErik.Nordmark@Sun.COM 		return (B_TRUE);
1739*11042SErik.Nordmark@Sun.COM 	}
1740*11042SErik.Nordmark@Sun.COM 
1741*11042SErik.Nordmark@Sun.COM 	/* ULP puts the checksum field is in the first mblk */
1742*11042SErik.Nordmark@Sun.COM 	ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr);
1743*11042SErik.Nordmark@Sun.COM 
1744*11042SErik.Nordmark@Sun.COM 	/*
1745*11042SErik.Nordmark@Sun.COM 	 * Underlying interface supports hardware checksum offload for
1746*11042SErik.Nordmark@Sun.COM 	 * the payload; leave the payload checksum for the hardware to
1747*11042SErik.Nordmark@Sun.COM 	 * calculate.  N.B: We only need to set up checksum info on the
1748*11042SErik.Nordmark@Sun.COM 	 * first mblk.
1749*11042SErik.Nordmark@Sun.COM 	 */
1750*11042SErik.Nordmark@Sun.COM 	hck_flags = ill->ill_hcksum_capab->ill_hcksum_txflags;
1751*11042SErik.Nordmark@Sun.COM 
1752*11042SErik.Nordmark@Sun.COM 	DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS;
1753*11042SErik.Nordmark@Sun.COM 	if (hck_flags & HCKSUM_INET_FULL_V4) {
1754*11042SErik.Nordmark@Sun.COM 		/*
1755*11042SErik.Nordmark@Sun.COM 		 * Hardware calculates pseudo-header, header and the
1756*11042SErik.Nordmark@Sun.COM 		 * payload checksums, so clear the checksum field in
1757*11042SErik.Nordmark@Sun.COM 		 * the protocol header.
1758*11042SErik.Nordmark@Sun.COM 		 */
1759*11042SErik.Nordmark@Sun.COM 		*cksump = 0;
1760*11042SErik.Nordmark@Sun.COM 		DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM;
1761*11042SErik.Nordmark@Sun.COM 
1762*11042SErik.Nordmark@Sun.COM 		ipha->ipha_hdr_checksum = 0;
1763*11042SErik.Nordmark@Sun.COM 		if (hck_flags & HCKSUM_IPHDRCKSUM) {
1764*11042SErik.Nordmark@Sun.COM 			DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM;
1765*11042SErik.Nordmark@Sun.COM 		} else {
1766*11042SErik.Nordmark@Sun.COM 			ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1767*11042SErik.Nordmark@Sun.COM 		}
1768*11042SErik.Nordmark@Sun.COM 		return (B_TRUE);
1769*11042SErik.Nordmark@Sun.COM 	}
1770*11042SErik.Nordmark@Sun.COM 	if ((hck_flags) & HCKSUM_INET_PARTIAL)  {
1771*11042SErik.Nordmark@Sun.COM 		ipaddr_t	dst = ipha->ipha_dst;
1772*11042SErik.Nordmark@Sun.COM 		ipaddr_t	src = ipha->ipha_src;
1773*11042SErik.Nordmark@Sun.COM 		/*
1774*11042SErik.Nordmark@Sun.COM 		 * Partial checksum offload has been enabled.  Fill
1775*11042SErik.Nordmark@Sun.COM 		 * the checksum field in the protocol header with the
1776*11042SErik.Nordmark@Sun.COM 		 * pseudo-header checksum value.
1777*11042SErik.Nordmark@Sun.COM 		 *
1778*11042SErik.Nordmark@Sun.COM 		 * We accumulate the pseudo header checksum in cksum.
1779*11042SErik.Nordmark@Sun.COM 		 * This is pretty hairy code, so watch close.  One
1780*11042SErik.Nordmark@Sun.COM 		 * thing to keep in mind is that UDP and TCP have
1781*11042SErik.Nordmark@Sun.COM 		 * stored their respective datagram lengths in their
1782*11042SErik.Nordmark@Sun.COM 		 * checksum fields.  This lines things up real nice.
1783*11042SErik.Nordmark@Sun.COM 		 */
1784*11042SErik.Nordmark@Sun.COM 		cksum += (dst >> 16) + (dst & 0xFFFF) +
1785*11042SErik.Nordmark@Sun.COM 		    (src >> 16) + (src & 0xFFFF);
1786*11042SErik.Nordmark@Sun.COM 		cksum += *(cksump);
1787*11042SErik.Nordmark@Sun.COM 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
1788*11042SErik.Nordmark@Sun.COM 		*(cksump) = (cksum & 0xFFFF) + (cksum >> 16);
1789*11042SErik.Nordmark@Sun.COM 
1790*11042SErik.Nordmark@Sun.COM 		/*
1791*11042SErik.Nordmark@Sun.COM 		 * Offsets are relative to beginning of IP header.
1792*11042SErik.Nordmark@Sun.COM 		 */
1793*11042SErik.Nordmark@Sun.COM 		DB_CKSUMSTART(mp) = ip_hdr_length;
1794*11042SErik.Nordmark@Sun.COM 		DB_CKSUMSTUFF(mp) = (uint8_t *)cksump - (uint8_t *)ipha;
1795*11042SErik.Nordmark@Sun.COM 		DB_CKSUMEND(mp) = pktlen;
1796*11042SErik.Nordmark@Sun.COM 		DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM;
1797*11042SErik.Nordmark@Sun.COM 
1798*11042SErik.Nordmark@Sun.COM 		ipha->ipha_hdr_checksum = 0;
1799*11042SErik.Nordmark@Sun.COM 		if (hck_flags & HCKSUM_IPHDRCKSUM) {
1800*11042SErik.Nordmark@Sun.COM 			DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM;
1801*11042SErik.Nordmark@Sun.COM 		} else {
1802*11042SErik.Nordmark@Sun.COM 			ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1803*11042SErik.Nordmark@Sun.COM 		}
1804*11042SErik.Nordmark@Sun.COM 		return (B_TRUE);
1805*11042SErik.Nordmark@Sun.COM 	}
1806*11042SErik.Nordmark@Sun.COM 	/* Hardware capabilities include neither full nor partial IPv4 */
1807*11042SErik.Nordmark@Sun.COM 	return (ip_output_sw_cksum_v4(mp, ipha, ixa));
1808*11042SErik.Nordmark@Sun.COM }
1809*11042SErik.Nordmark@Sun.COM 
1810*11042SErik.Nordmark@Sun.COM /*
1811*11042SErik.Nordmark@Sun.COM  * ire_sendfn for offlink and onlink destinations.
1812*11042SErik.Nordmark@Sun.COM  * Also called from the multicast, broadcast, multirt send functions.
1813*11042SErik.Nordmark@Sun.COM  *
1814*11042SErik.Nordmark@Sun.COM  * Assumes that the caller has a hold on the ire.
1815*11042SErik.Nordmark@Sun.COM  *
1816*11042SErik.Nordmark@Sun.COM  * This function doesn't care if the IRE just became condemned since that
1817*11042SErik.Nordmark@Sun.COM  * can happen at any time.
1818*11042SErik.Nordmark@Sun.COM  */
1819*11042SErik.Nordmark@Sun.COM /* ARGSUSED */
1820*11042SErik.Nordmark@Sun.COM int
1821*11042SErik.Nordmark@Sun.COM ire_send_wire_v4(ire_t *ire, mblk_t *mp, void *iph_arg,
1822*11042SErik.Nordmark@Sun.COM     ip_xmit_attr_t *ixa, uint32_t *identp)
1823*11042SErik.Nordmark@Sun.COM {
1824*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
1825*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha = (ipha_t *)iph_arg;
1826*11042SErik.Nordmark@Sun.COM 	iaflags_t	ixaflags = ixa->ixa_flags;
1827*11042SErik.Nordmark@Sun.COM 	ill_t		*ill;
1828*11042SErik.Nordmark@Sun.COM 
1829*11042SErik.Nordmark@Sun.COM 	ASSERT(ixa->ixa_nce != NULL);
1830*11042SErik.Nordmark@Sun.COM 	ill = ixa->ixa_nce->nce_ill;
1831*11042SErik.Nordmark@Sun.COM 
1832*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_DONTROUTE)
1833*11042SErik.Nordmark@Sun.COM 		ipha->ipha_ttl = 1;
1834*11042SErik.Nordmark@Sun.COM 
1835*11042SErik.Nordmark@Sun.COM 	/*
1836*11042SErik.Nordmark@Sun.COM 	 * Assign an ident value for this packet. There could be other
1837*11042SErik.Nordmark@Sun.COM 	 * threads targeting the same destination, so we have to arrange
1838*11042SErik.Nordmark@Sun.COM 	 * for a atomic increment.  Note that we use a 32-bit atomic add
1839*11042SErik.Nordmark@Sun.COM 	 * because it has better performance than its 16-bit sibling.
1840*11042SErik.Nordmark@Sun.COM 	 *
1841*11042SErik.Nordmark@Sun.COM 	 * Normally ixa_extra_ident is 0, but in the case of LSO it will
1842*11042SErik.Nordmark@Sun.COM 	 * be the number of TCP segments  that the driver/hardware will
1843*11042SErik.Nordmark@Sun.COM 	 * extraly construct.
1844*11042SErik.Nordmark@Sun.COM 	 *
1845*11042SErik.Nordmark@Sun.COM 	 * If running in cluster mode and if the source address
1846*11042SErik.Nordmark@Sun.COM 	 * belongs to a replicated service then vector through
1847*11042SErik.Nordmark@Sun.COM 	 * cl_inet_ipident vector to allocate ip identifier
1848*11042SErik.Nordmark@Sun.COM 	 * NOTE: This is a contract private interface with the
1849*11042SErik.Nordmark@Sun.COM 	 * clustering group.
1850*11042SErik.Nordmark@Sun.COM 	 */
1851*11042SErik.Nordmark@Sun.COM 	if (cl_inet_ipident != NULL) {
1852*11042SErik.Nordmark@Sun.COM 		ipaddr_t src = ipha->ipha_src;
1853*11042SErik.Nordmark@Sun.COM 		ipaddr_t dst = ipha->ipha_dst;
1854*11042SErik.Nordmark@Sun.COM 		netstackid_t stack_id = ipst->ips_netstack->netstack_stackid;
1855*11042SErik.Nordmark@Sun.COM 
1856*11042SErik.Nordmark@Sun.COM 		ASSERT(cl_inet_isclusterwide != NULL);
1857*11042SErik.Nordmark@Sun.COM 		if ((*cl_inet_isclusterwide)(stack_id, IPPROTO_IP,
1858*11042SErik.Nordmark@Sun.COM 		    AF_INET, (uint8_t *)(uintptr_t)src, NULL)) {
1859*11042SErik.Nordmark@Sun.COM 			/*
1860*11042SErik.Nordmark@Sun.COM 			 * Note: not correct with LSO since we can't allocate
1861*11042SErik.Nordmark@Sun.COM 			 * ixa_extra_ident+1 consecutive values.
1862*11042SErik.Nordmark@Sun.COM 			 */
1863*11042SErik.Nordmark@Sun.COM 			ipha->ipha_ident = (*cl_inet_ipident)(stack_id,
1864*11042SErik.Nordmark@Sun.COM 			    IPPROTO_IP, AF_INET, (uint8_t *)(uintptr_t)src,
1865*11042SErik.Nordmark@Sun.COM 			    (uint8_t *)(uintptr_t)dst, NULL);
1866*11042SErik.Nordmark@Sun.COM 		} else {
1867*11042SErik.Nordmark@Sun.COM 			ipha->ipha_ident = atomic_add_32_nv(identp,
1868*11042SErik.Nordmark@Sun.COM 			    ixa->ixa_extra_ident + 1);
1869*11042SErik.Nordmark@Sun.COM 		}
1870*11042SErik.Nordmark@Sun.COM 	} else {
1871*11042SErik.Nordmark@Sun.COM 		ipha->ipha_ident = atomic_add_32_nv(identp,
1872*11042SErik.Nordmark@Sun.COM 		    ixa->ixa_extra_ident + 1);
1873*11042SErik.Nordmark@Sun.COM 	}
1874*11042SErik.Nordmark@Sun.COM #ifndef _BIG_ENDIAN
1875*11042SErik.Nordmark@Sun.COM 	ipha->ipha_ident = htons(ipha->ipha_ident);
1876*11042SErik.Nordmark@Sun.COM #endif
1877*11042SErik.Nordmark@Sun.COM 
1878*11042SErik.Nordmark@Sun.COM 	/*
1879*11042SErik.Nordmark@Sun.COM 	 * This might set b_band, thus the IPsec and fragmentation
1880*11042SErik.Nordmark@Sun.COM 	 * code in IP ensures that b_band is updated in the first mblk.
1881*11042SErik.Nordmark@Sun.COM 	 */
1882*11042SErik.Nordmark@Sun.COM 	if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) {
1883*11042SErik.Nordmark@Sun.COM 		/* ip_process translates an IS_UNDER_IPMP */
1884*11042SErik.Nordmark@Sun.COM 		mp = ip_process(IPP_LOCAL_OUT, mp, ill, ill);
1885*11042SErik.Nordmark@Sun.COM 		if (mp == NULL) {
1886*11042SErik.Nordmark@Sun.COM 			/* ip_drop_packet and MIB done */
1887*11042SErik.Nordmark@Sun.COM 			return (0);	/* Might just be delayed */
1888*11042SErik.Nordmark@Sun.COM 		}
1889*11042SErik.Nordmark@Sun.COM 	}
1890*11042SErik.Nordmark@Sun.COM 
1891*11042SErik.Nordmark@Sun.COM 	/*
1892*11042SErik.Nordmark@Sun.COM 	 * Verify any IPv4 options.
1893*11042SErik.Nordmark@Sun.COM 	 *
1894*11042SErik.Nordmark@Sun.COM 	 * The presense of IP options also forces the network stack to
1895*11042SErik.Nordmark@Sun.COM 	 * calculate the checksum in software.  This is because:
1896*11042SErik.Nordmark@Sun.COM 	 *
1897*11042SErik.Nordmark@Sun.COM 	 * Wrap around: certain partial-checksum NICs (eri, ce) limit
1898*11042SErik.Nordmark@Sun.COM 	 * the size of "start offset" width to 6-bit.  This effectively
1899*11042SErik.Nordmark@Sun.COM 	 * sets the largest value of the offset to 64-bytes, starting
1900*11042SErik.Nordmark@Sun.COM 	 * from the MAC header.  When the cumulative MAC and IP headers
1901*11042SErik.Nordmark@Sun.COM 	 * exceed such limit, the offset will wrap around.  This causes
1902*11042SErik.Nordmark@Sun.COM 	 * the checksum to be calculated at the wrong place.
1903*11042SErik.Nordmark@Sun.COM 	 *
1904*11042SErik.Nordmark@Sun.COM 	 * IPv4 source routing: none of the full-checksum capable NICs
1905*11042SErik.Nordmark@Sun.COM 	 * is capable of correctly handling the	IPv4 source-routing
1906*11042SErik.Nordmark@Sun.COM 	 * option for purposes of calculating the pseudo-header; the
1907*11042SErik.Nordmark@Sun.COM 	 * actual destination is different from the destination in the
1908*11042SErik.Nordmark@Sun.COM 	 * header which is that of the next-hop.  (This case may not be
1909*11042SErik.Nordmark@Sun.COM 	 * true for NICs which can parse IPv6 extension headers, but
1910*11042SErik.Nordmark@Sun.COM 	 * we choose to simplify the implementation by not offloading
1911*11042SErik.Nordmark@Sun.COM 	 * checksum when they are present.)
1912*11042SErik.Nordmark@Sun.COM 	 */
1913*11042SErik.Nordmark@Sun.COM 	if (!IS_SIMPLE_IPH(ipha)) {
1914*11042SErik.Nordmark@Sun.COM 		ixaflags = ixa->ixa_flags |= IXAF_NO_HW_CKSUM;
1915*11042SErik.Nordmark@Sun.COM 		/* An IS_UNDER_IPMP ill is ok here */
1916*11042SErik.Nordmark@Sun.COM 		if (ip_output_options(mp, ipha, ixa, ill)) {
1917*11042SErik.Nordmark@Sun.COM 			/* Packet has been consumed and ICMP error sent */
1918*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1919*11042SErik.Nordmark@Sun.COM 			return (EINVAL);
1920*11042SErik.Nordmark@Sun.COM 		}
1921*11042SErik.Nordmark@Sun.COM 	}
1922*11042SErik.Nordmark@Sun.COM 
1923*11042SErik.Nordmark@Sun.COM 	/*
1924*11042SErik.Nordmark@Sun.COM 	 * To handle IPsec/iptun's labeling needs we need to tag packets
1925*11042SErik.Nordmark@Sun.COM 	 * while we still have ixa_tsl
1926*11042SErik.Nordmark@Sun.COM 	 */
1927*11042SErik.Nordmark@Sun.COM 	if (is_system_labeled() && ixa->ixa_tsl != NULL &&
1928*11042SErik.Nordmark@Sun.COM 	    (ill->ill_mactype == DL_6TO4 || ill->ill_mactype == DL_IPV4 ||
1929*11042SErik.Nordmark@Sun.COM 	    ill->ill_mactype == DL_IPV6)) {
1930*11042SErik.Nordmark@Sun.COM 		cred_t *newcr;
1931*11042SErik.Nordmark@Sun.COM 
1932*11042SErik.Nordmark@Sun.COM 		newcr = copycred_from_tslabel(ixa->ixa_cred, ixa->ixa_tsl,
1933*11042SErik.Nordmark@Sun.COM 		    KM_NOSLEEP);
1934*11042SErik.Nordmark@Sun.COM 		if (newcr == NULL) {
1935*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1936*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - newcr",
1937*11042SErik.Nordmark@Sun.COM 			    mp, ill);
1938*11042SErik.Nordmark@Sun.COM 			freemsg(mp);
1939*11042SErik.Nordmark@Sun.COM 			return (ENOBUFS);
1940*11042SErik.Nordmark@Sun.COM 		}
1941*11042SErik.Nordmark@Sun.COM 		mblk_setcred(mp, newcr, NOPID);
1942*11042SErik.Nordmark@Sun.COM 		crfree(newcr);	/* mblk_setcred did its own crhold */
1943*11042SErik.Nordmark@Sun.COM 	}
1944*11042SErik.Nordmark@Sun.COM 
1945*11042SErik.Nordmark@Sun.COM 	if (ixa->ixa_pktlen > ixa->ixa_fragsize ||
1946*11042SErik.Nordmark@Sun.COM 	    (ixaflags & IXAF_IPSEC_SECURE)) {
1947*11042SErik.Nordmark@Sun.COM 		uint32_t pktlen;
1948*11042SErik.Nordmark@Sun.COM 
1949*11042SErik.Nordmark@Sun.COM 		pktlen = ixa->ixa_pktlen;
1950*11042SErik.Nordmark@Sun.COM 		if (ixaflags & IXAF_IPSEC_SECURE)
1951*11042SErik.Nordmark@Sun.COM 			pktlen += ipsec_out_extra_length(ixa);
1952*11042SErik.Nordmark@Sun.COM 
1953*11042SErik.Nordmark@Sun.COM 		if (pktlen > IP_MAXPACKET)
1954*11042SErik.Nordmark@Sun.COM 			return (EMSGSIZE);
1955*11042SErik.Nordmark@Sun.COM 
1956*11042SErik.Nordmark@Sun.COM 		if (ixaflags & IXAF_SET_ULP_CKSUM) {
1957*11042SErik.Nordmark@Sun.COM 			/*
1958*11042SErik.Nordmark@Sun.COM 			 * Compute ULP checksum and IP header checksum
1959*11042SErik.Nordmark@Sun.COM 			 * using software
1960*11042SErik.Nordmark@Sun.COM 			 */
1961*11042SErik.Nordmark@Sun.COM 			if (!ip_output_sw_cksum_v4(mp, ipha, ixa)) {
1962*11042SErik.Nordmark@Sun.COM 				BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1963*11042SErik.Nordmark@Sun.COM 				ip_drop_output("ipIfStatsOutDiscards", mp, ill);
1964*11042SErik.Nordmark@Sun.COM 				freemsg(mp);
1965*11042SErik.Nordmark@Sun.COM 				return (EINVAL);
1966*11042SErik.Nordmark@Sun.COM 			}
1967*11042SErik.Nordmark@Sun.COM 		} else {
1968*11042SErik.Nordmark@Sun.COM 			/* Calculate IPv4 header checksum */
1969*11042SErik.Nordmark@Sun.COM 			ipha->ipha_hdr_checksum = 0;
1970*11042SErik.Nordmark@Sun.COM 			ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1971*11042SErik.Nordmark@Sun.COM 		}
1972*11042SErik.Nordmark@Sun.COM 
1973*11042SErik.Nordmark@Sun.COM 		/*
1974*11042SErik.Nordmark@Sun.COM 		 * If this packet would generate a icmp_frag_needed
1975*11042SErik.Nordmark@Sun.COM 		 * message, we need to handle it before we do the IPsec
1976*11042SErik.Nordmark@Sun.COM 		 * processing. Otherwise, we need to strip the IPsec
1977*11042SErik.Nordmark@Sun.COM 		 * headers before we send up the message to the ULPs
1978*11042SErik.Nordmark@Sun.COM 		 * which becomes messy and difficult.
1979*11042SErik.Nordmark@Sun.COM 		 *
1980*11042SErik.Nordmark@Sun.COM 		 * We check using IXAF_DONTFRAG. The DF bit in the header
1981*11042SErik.Nordmark@Sun.COM 		 * is not inspected - it will be copied to any generated
1982*11042SErik.Nordmark@Sun.COM 		 * fragments.
1983*11042SErik.Nordmark@Sun.COM 		 */
1984*11042SErik.Nordmark@Sun.COM 		if ((pktlen > ixa->ixa_fragsize) &&
1985*11042SErik.Nordmark@Sun.COM 		    (ixaflags & IXAF_DONTFRAG)) {
1986*11042SErik.Nordmark@Sun.COM 			/* Generate ICMP and return error */
1987*11042SErik.Nordmark@Sun.COM 			ip_recv_attr_t	iras;
1988*11042SErik.Nordmark@Sun.COM 
1989*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE4(ip4__fragsize__fail, uint_t, pktlen,
1990*11042SErik.Nordmark@Sun.COM 			    uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen,
1991*11042SErik.Nordmark@Sun.COM 			    uint_t, ixa->ixa_pmtu);
1992*11042SErik.Nordmark@Sun.COM 
1993*11042SErik.Nordmark@Sun.COM 			bzero(&iras, sizeof (iras));
1994*11042SErik.Nordmark@Sun.COM 			/* Map ixa to ira including IPsec policies */
1995*11042SErik.Nordmark@Sun.COM 			ipsec_out_to_in(ixa, ill, &iras);
1996*11042SErik.Nordmark@Sun.COM 
1997*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ICMP_FRAG_NEEDED", mp, ill);
1998*11042SErik.Nordmark@Sun.COM 			icmp_frag_needed(mp, ixa->ixa_fragsize, &iras);
1999*11042SErik.Nordmark@Sun.COM 			/* We moved any IPsec refs from ixa to iras */
2000*11042SErik.Nordmark@Sun.COM 			ira_cleanup(&iras, B_FALSE);
2001*11042SErik.Nordmark@Sun.COM 			return (EMSGSIZE);
2002*11042SErik.Nordmark@Sun.COM 		}
2003*11042SErik.Nordmark@Sun.COM 		DTRACE_PROBE4(ip4__fragsize__ok, uint_t, pktlen,
2004*11042SErik.Nordmark@Sun.COM 		    uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen,
2005*11042SErik.Nordmark@Sun.COM 		    uint_t, ixa->ixa_pmtu);
2006*11042SErik.Nordmark@Sun.COM 
2007*11042SErik.Nordmark@Sun.COM 		if (ixaflags & IXAF_IPSEC_SECURE) {
2008*11042SErik.Nordmark@Sun.COM 			/*
2009*11042SErik.Nordmark@Sun.COM 			 * Pass in sufficient information so that
2010*11042SErik.Nordmark@Sun.COM 			 * IPsec can determine whether to fragment, and
2011*11042SErik.Nordmark@Sun.COM 			 * which function to call after fragmentation.
2012*11042SErik.Nordmark@Sun.COM 			 */
2013*11042SErik.Nordmark@Sun.COM 			return (ipsec_out_process(mp, ixa));
2014*11042SErik.Nordmark@Sun.COM 		}
2015*11042SErik.Nordmark@Sun.COM 		return (ip_fragment_v4(mp, ixa->ixa_nce, ixaflags,
2016*11042SErik.Nordmark@Sun.COM 		    ixa->ixa_pktlen, ixa->ixa_fragsize, ixa->ixa_xmit_hint,
2017*11042SErik.Nordmark@Sun.COM 		    ixa->ixa_zoneid, ixa->ixa_no_loop_zoneid,
2018*11042SErik.Nordmark@Sun.COM 		    ixa->ixa_postfragfn, &ixa->ixa_cookie));
2019*11042SErik.Nordmark@Sun.COM 	}
2020*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_SET_ULP_CKSUM) {
2021*11042SErik.Nordmark@Sun.COM 		/* Compute ULP checksum and IP header checksum */
2022*11042SErik.Nordmark@Sun.COM 		/* An IS_UNDER_IPMP ill is ok here */
2023*11042SErik.Nordmark@Sun.COM 		if (!ip_output_cksum_v4(ixaflags, mp, ipha, ixa, ill)) {
2024*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2025*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards", mp, ill);
2026*11042SErik.Nordmark@Sun.COM 			freemsg(mp);
2027*11042SErik.Nordmark@Sun.COM 			return (EINVAL);
2028*11042SErik.Nordmark@Sun.COM 		}
2029*11042SErik.Nordmark@Sun.COM 	} else {
2030*11042SErik.Nordmark@Sun.COM 		/* Calculate IPv4 header checksum */
2031*11042SErik.Nordmark@Sun.COM 		ipha->ipha_hdr_checksum = 0;
2032*11042SErik.Nordmark@Sun.COM 		ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
2033*11042SErik.Nordmark@Sun.COM 	}
2034*11042SErik.Nordmark@Sun.COM 	return ((ixa->ixa_postfragfn)(mp, ixa->ixa_nce, ixaflags,
2035*11042SErik.Nordmark@Sun.COM 	    ixa->ixa_pktlen, ixa->ixa_xmit_hint, ixa->ixa_zoneid,
2036*11042SErik.Nordmark@Sun.COM 	    ixa->ixa_no_loop_zoneid, &ixa->ixa_cookie));
2037*11042SErik.Nordmark@Sun.COM }
2038*11042SErik.Nordmark@Sun.COM 
2039*11042SErik.Nordmark@Sun.COM /*
2040*11042SErik.Nordmark@Sun.COM  * Send mp into ip_input
2041*11042SErik.Nordmark@Sun.COM  * Common for IPv4 and IPv6
2042*11042SErik.Nordmark@Sun.COM  */
2043*11042SErik.Nordmark@Sun.COM void
2044*11042SErik.Nordmark@Sun.COM ip_postfrag_loopback(mblk_t *mp, nce_t *nce, iaflags_t ixaflags,
2045*11042SErik.Nordmark@Sun.COM     uint_t pkt_len, zoneid_t nolzid)
2046*11042SErik.Nordmark@Sun.COM {
2047*11042SErik.Nordmark@Sun.COM 	rtc_t		rtc;
2048*11042SErik.Nordmark@Sun.COM 	ill_t		*ill = nce->nce_ill;
2049*11042SErik.Nordmark@Sun.COM 	ip_recv_attr_t	iras;	/* NOTE: No bzero for performance */
2050*11042SErik.Nordmark@Sun.COM 	ncec_t		*ncec;
2051*11042SErik.Nordmark@Sun.COM 
2052*11042SErik.Nordmark@Sun.COM 	ncec = nce->nce_common;
2053*11042SErik.Nordmark@Sun.COM 	iras.ira_flags = IRAF_VERIFY_IP_CKSUM | IRAF_VERIFY_ULP_CKSUM |
2054*11042SErik.Nordmark@Sun.COM 	    IRAF_LOOPBACK | IRAF_L2SRC_LOOPBACK;
2055*11042SErik.Nordmark@Sun.COM 	if (ncec->ncec_flags & NCE_F_BCAST)
2056*11042SErik.Nordmark@Sun.COM 		iras.ira_flags |= IRAF_L2DST_BROADCAST;
2057*11042SErik.Nordmark@Sun.COM 	else if (ncec->ncec_flags & NCE_F_MCAST)
2058*11042SErik.Nordmark@Sun.COM 		iras.ira_flags |= IRAF_L2DST_MULTICAST;
2059*11042SErik.Nordmark@Sun.COM 
2060*11042SErik.Nordmark@Sun.COM 	iras.ira_free_flags = 0;
2061*11042SErik.Nordmark@Sun.COM 	iras.ira_cred = NULL;
2062*11042SErik.Nordmark@Sun.COM 	iras.ira_cpid = NOPID;
2063*11042SErik.Nordmark@Sun.COM 	iras.ira_tsl = NULL;
2064*11042SErik.Nordmark@Sun.COM 	iras.ira_zoneid = ALL_ZONES;
2065*11042SErik.Nordmark@Sun.COM 	iras.ira_pktlen = pkt_len;
2066*11042SErik.Nordmark@Sun.COM 	UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, iras.ira_pktlen);
2067*11042SErik.Nordmark@Sun.COM 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives);
2068*11042SErik.Nordmark@Sun.COM 
2069*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_IS_IPV4)
2070*11042SErik.Nordmark@Sun.COM 		iras.ira_flags |= IRAF_IS_IPV4;
2071*11042SErik.Nordmark@Sun.COM 
2072*11042SErik.Nordmark@Sun.COM 	iras.ira_ill = iras.ira_rill = ill;
2073*11042SErik.Nordmark@Sun.COM 	iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex;
2074*11042SErik.Nordmark@Sun.COM 	iras.ira_rifindex = iras.ira_ruifindex;
2075*11042SErik.Nordmark@Sun.COM 	iras.ira_mhip = NULL;
2076*11042SErik.Nordmark@Sun.COM 
2077*11042SErik.Nordmark@Sun.COM 	iras.ira_flags |= ixaflags & IAF_MASK;
2078*11042SErik.Nordmark@Sun.COM 	iras.ira_no_loop_zoneid = nolzid;
2079*11042SErik.Nordmark@Sun.COM 
2080*11042SErik.Nordmark@Sun.COM 	/* Broadcast and multicast doesn't care about the squeue */
2081*11042SErik.Nordmark@Sun.COM 	iras.ira_sqp = NULL;
2082*11042SErik.Nordmark@Sun.COM 
2083*11042SErik.Nordmark@Sun.COM 	rtc.rtc_ire = NULL;
2084*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_IS_IPV4) {
2085*11042SErik.Nordmark@Sun.COM 		ipha_t		*ipha = (ipha_t *)mp->b_rptr;
2086*11042SErik.Nordmark@Sun.COM 
2087*11042SErik.Nordmark@Sun.COM 		rtc.rtc_ipaddr = INADDR_ANY;
2088*11042SErik.Nordmark@Sun.COM 
2089*11042SErik.Nordmark@Sun.COM 		(*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc);
2090*11042SErik.Nordmark@Sun.COM 		if (rtc.rtc_ire != NULL) {
2091*11042SErik.Nordmark@Sun.COM 			ASSERT(rtc.rtc_ipaddr != INADDR_ANY);
2092*11042SErik.Nordmark@Sun.COM 			ire_refrele(rtc.rtc_ire);
2093*11042SErik.Nordmark@Sun.COM 		}
2094*11042SErik.Nordmark@Sun.COM 	} else {
2095*11042SErik.Nordmark@Sun.COM 		ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
2096*11042SErik.Nordmark@Sun.COM 
2097*11042SErik.Nordmark@Sun.COM 		rtc.rtc_ip6addr = ipv6_all_zeros;
2098*11042SErik.Nordmark@Sun.COM 
2099*11042SErik.Nordmark@Sun.COM 		(*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc);
2100*11042SErik.Nordmark@Sun.COM 		if (rtc.rtc_ire != NULL) {
2101*11042SErik.Nordmark@Sun.COM 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr));
2102*11042SErik.Nordmark@Sun.COM 			ire_refrele(rtc.rtc_ire);
2103*11042SErik.Nordmark@Sun.COM 		}
2104*11042SErik.Nordmark@Sun.COM 	}
2105*11042SErik.Nordmark@Sun.COM 	/* Any references to clean up? No hold on ira */
2106*11042SErik.Nordmark@Sun.COM 	if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED))
2107*11042SErik.Nordmark@Sun.COM 		ira_cleanup(&iras, B_FALSE);
2108*11042SErik.Nordmark@Sun.COM }
2109*11042SErik.Nordmark@Sun.COM 
2110*11042SErik.Nordmark@Sun.COM /*
2111*11042SErik.Nordmark@Sun.COM  * Post fragmentation function for IRE_MULTICAST and IRE_BROADCAST which
2112*11042SErik.Nordmark@Sun.COM  * looks at the IXAF_LOOPBACK_COPY flag.
2113*11042SErik.Nordmark@Sun.COM  * Common for IPv4 and IPv6.
2114*11042SErik.Nordmark@Sun.COM  *
2115*11042SErik.Nordmark@Sun.COM  * If the loopback copy fails (due to no memory) but we send the packet out
2116*11042SErik.Nordmark@Sun.COM  * on the wire we return no failure. Only in the case we supress the wire
2117*11042SErik.Nordmark@Sun.COM  * sending do we take the loopback failure into account.
2118*11042SErik.Nordmark@Sun.COM  *
2119*11042SErik.Nordmark@Sun.COM  * Note that we do not perform DTRACE_IP7 and FW_HOOKS for the looped back copy.
2120*11042SErik.Nordmark@Sun.COM  * Those operations are performed on this packet in ip_xmit() and it would
2121*11042SErik.Nordmark@Sun.COM  * be odd to do it twice for the same packet.
2122*11042SErik.Nordmark@Sun.COM  */
2123*11042SErik.Nordmark@Sun.COM int
2124*11042SErik.Nordmark@Sun.COM ip_postfrag_loopcheck(mblk_t *mp, nce_t *nce, iaflags_t ixaflags,
2125*11042SErik.Nordmark@Sun.COM     uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
2126*11042SErik.Nordmark@Sun.COM     uintptr_t *ixacookie)
2127*11042SErik.Nordmark@Sun.COM {
2128*11042SErik.Nordmark@Sun.COM 	ill_t		*ill = nce->nce_ill;
2129*11042SErik.Nordmark@Sun.COM 	int		error = 0;
2130*11042SErik.Nordmark@Sun.COM 
2131*11042SErik.Nordmark@Sun.COM 	/*
2132*11042SErik.Nordmark@Sun.COM 	 * Check for IXAF_LOOPBACK_COPY - send a copy to ip as if the driver
2133*11042SErik.Nordmark@Sun.COM 	 * had looped it back
2134*11042SErik.Nordmark@Sun.COM 	 */
2135*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_LOOPBACK_COPY) {
2136*11042SErik.Nordmark@Sun.COM 		mblk_t		*mp1;
2137*11042SErik.Nordmark@Sun.COM 
2138*11042SErik.Nordmark@Sun.COM 		mp1 = copymsg(mp);
2139*11042SErik.Nordmark@Sun.COM 		if (mp1 == NULL) {
2140*11042SErik.Nordmark@Sun.COM 			/* Failed to deliver the loopback copy. */
2141*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2142*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards", mp, ill);
2143*11042SErik.Nordmark@Sun.COM 			error = ENOBUFS;
2144*11042SErik.Nordmark@Sun.COM 		} else {
2145*11042SErik.Nordmark@Sun.COM 			ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len,
2146*11042SErik.Nordmark@Sun.COM 			    nolzid);
2147*11042SErik.Nordmark@Sun.COM 		}
2148*11042SErik.Nordmark@Sun.COM 	}
2149*11042SErik.Nordmark@Sun.COM 
2150*11042SErik.Nordmark@Sun.COM 	/*
2151*11042SErik.Nordmark@Sun.COM 	 * If TTL = 0 then only do the loopback to this host i.e. we are
2152*11042SErik.Nordmark@Sun.COM 	 * done. We are also done if this was the
2153*11042SErik.Nordmark@Sun.COM 	 * loopback interface since it is sufficient
2154*11042SErik.Nordmark@Sun.COM 	 * to loopback one copy of a multicast packet.
2155*11042SErik.Nordmark@Sun.COM 	 */
2156*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_IS_IPV4) {
2157*11042SErik.Nordmark@Sun.COM 		ipha_t *ipha = (ipha_t *)mp->b_rptr;
2158*11042SErik.Nordmark@Sun.COM 
2159*11042SErik.Nordmark@Sun.COM 		if (ipha->ipha_ttl == 0) {
2160*11042SErik.Nordmark@Sun.COM 			ip_drop_output("multicast ipha_ttl not sent to wire",
2161*11042SErik.Nordmark@Sun.COM 			    mp, ill);
2162*11042SErik.Nordmark@Sun.COM 			freemsg(mp);
2163*11042SErik.Nordmark@Sun.COM 			return (error);
2164*11042SErik.Nordmark@Sun.COM 		}
2165*11042SErik.Nordmark@Sun.COM 	} else {
2166*11042SErik.Nordmark@Sun.COM 		ip6_t	*ip6h = (ip6_t *)mp->b_rptr;
2167*11042SErik.Nordmark@Sun.COM 
2168*11042SErik.Nordmark@Sun.COM 		if (ip6h->ip6_hops == 0) {
2169*11042SErik.Nordmark@Sun.COM 			ip_drop_output("multicast ipha_ttl not sent to wire",
2170*11042SErik.Nordmark@Sun.COM 			    mp, ill);
2171*11042SErik.Nordmark@Sun.COM 			freemsg(mp);
2172*11042SErik.Nordmark@Sun.COM 			return (error);
2173*11042SErik.Nordmark@Sun.COM 		}
2174*11042SErik.Nordmark@Sun.COM 	}
2175*11042SErik.Nordmark@Sun.COM 	if (nce->nce_ill->ill_wq == NULL) {
2176*11042SErik.Nordmark@Sun.COM 		/* Loopback interface */
2177*11042SErik.Nordmark@Sun.COM 		ip_drop_output("multicast on lo0 not sent to wire", mp, ill);
2178*11042SErik.Nordmark@Sun.COM 		freemsg(mp);
2179*11042SErik.Nordmark@Sun.COM 		return (error);
2180*11042SErik.Nordmark@Sun.COM 	}
2181*11042SErik.Nordmark@Sun.COM 
2182*11042SErik.Nordmark@Sun.COM 	return (ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0,
2183*11042SErik.Nordmark@Sun.COM 	    ixacookie));
2184*11042SErik.Nordmark@Sun.COM }
2185*11042SErik.Nordmark@Sun.COM 
2186*11042SErik.Nordmark@Sun.COM /*
2187*11042SErik.Nordmark@Sun.COM  * Post fragmentation function for RTF_MULTIRT routes.
2188*11042SErik.Nordmark@Sun.COM  * Since IRE_BROADCASTs can have RTF_MULTIRT, this function
2189*11042SErik.Nordmark@Sun.COM  * checks IXAF_LOOPBACK_COPY.
2190*11042SErik.Nordmark@Sun.COM  *
2191*11042SErik.Nordmark@Sun.COM  * If no packet is sent due to failures then we return an errno, but if at
2192*11042SErik.Nordmark@Sun.COM  * least one succeeded we return zero.
2193*11042SErik.Nordmark@Sun.COM  */
2194*11042SErik.Nordmark@Sun.COM int
2195*11042SErik.Nordmark@Sun.COM ip_postfrag_multirt_v4(mblk_t *mp, nce_t *nce, iaflags_t ixaflags,
2196*11042SErik.Nordmark@Sun.COM     uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
2197*11042SErik.Nordmark@Sun.COM     uintptr_t *ixacookie)
2198*11042SErik.Nordmark@Sun.COM {
2199*11042SErik.Nordmark@Sun.COM 	irb_t		*irb;
2200*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha = (ipha_t *)mp->b_rptr;
2201*11042SErik.Nordmark@Sun.COM 	ire_t		*ire;
2202*11042SErik.Nordmark@Sun.COM 	ire_t		*ire1;
2203*11042SErik.Nordmark@Sun.COM 	mblk_t		*mp1;
2204*11042SErik.Nordmark@Sun.COM 	nce_t		*nce1;
2205*11042SErik.Nordmark@Sun.COM 	ill_t		*ill = nce->nce_ill;
2206*11042SErik.Nordmark@Sun.COM 	ill_t		*ill1;
2207*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ill->ill_ipst;
2208*11042SErik.Nordmark@Sun.COM 	int		error = 0;
2209*11042SErik.Nordmark@Sun.COM 	int		num_sent = 0;
2210*11042SErik.Nordmark@Sun.COM 	int		err;
2211*11042SErik.Nordmark@Sun.COM 	uint_t		ire_type;
2212*11042SErik.Nordmark@Sun.COM 	ipaddr_t	nexthop;
2213*11042SErik.Nordmark@Sun.COM 
2214*11042SErik.Nordmark@Sun.COM 	ASSERT(ixaflags & IXAF_IS_IPV4);
2215*11042SErik.Nordmark@Sun.COM 
2216*11042SErik.Nordmark@Sun.COM 	/* Check for IXAF_LOOPBACK_COPY */
2217*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_LOOPBACK_COPY) {
2218*11042SErik.Nordmark@Sun.COM 		mblk_t *mp1;
2219*11042SErik.Nordmark@Sun.COM 
2220*11042SErik.Nordmark@Sun.COM 		mp1 = copymsg(mp);
2221*11042SErik.Nordmark@Sun.COM 		if (mp1 == NULL) {
2222*11042SErik.Nordmark@Sun.COM 			/* Failed to deliver the loopback copy. */
2223*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2224*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards", mp, ill);
2225*11042SErik.Nordmark@Sun.COM 			error = ENOBUFS;
2226*11042SErik.Nordmark@Sun.COM 		} else {
2227*11042SErik.Nordmark@Sun.COM 			ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len,
2228*11042SErik.Nordmark@Sun.COM 			    nolzid);
2229*11042SErik.Nordmark@Sun.COM 		}
2230*11042SErik.Nordmark@Sun.COM 	}
2231*11042SErik.Nordmark@Sun.COM 
2232*11042SErik.Nordmark@Sun.COM 	/*
2233*11042SErik.Nordmark@Sun.COM 	 * Loop over RTF_MULTIRT for ipha_dst in the same bucket. Send
2234*11042SErik.Nordmark@Sun.COM 	 * a copy to each one.
2235*11042SErik.Nordmark@Sun.COM 	 * Use the nce (nexthop) and ipha_dst to find the ire.
2236*11042SErik.Nordmark@Sun.COM 	 *
2237*11042SErik.Nordmark@Sun.COM 	 * MULTIRT is not designed to work with shared-IP zones thus we don't
2238*11042SErik.Nordmark@Sun.COM 	 * need to pass a zoneid or a label to the IRE lookup.
2239*11042SErik.Nordmark@Sun.COM 	 */
2240*11042SErik.Nordmark@Sun.COM 	if (V4_PART_OF_V6(nce->nce_addr) == ipha->ipha_dst) {
2241*11042SErik.Nordmark@Sun.COM 		/* Broadcast and multicast case */
2242*11042SErik.Nordmark@Sun.COM 		ire = ire_ftable_lookup_v4(ipha->ipha_dst, 0, 0, 0,
2243*11042SErik.Nordmark@Sun.COM 		    NULL, ALL_ZONES, NULL, MATCH_IRE_DSTONLY, 0, ipst, NULL);
2244*11042SErik.Nordmark@Sun.COM 	} else {
2245*11042SErik.Nordmark@Sun.COM 		ipaddr_t v4addr = V4_PART_OF_V6(nce->nce_addr);
2246*11042SErik.Nordmark@Sun.COM 
2247*11042SErik.Nordmark@Sun.COM 		/* Unicast case */
2248*11042SErik.Nordmark@Sun.COM 		ire = ire_ftable_lookup_v4(ipha->ipha_dst, 0, v4addr, 0,
2249*11042SErik.Nordmark@Sun.COM 		    NULL, ALL_ZONES, NULL, MATCH_IRE_GW, 0, ipst, NULL);
2250*11042SErik.Nordmark@Sun.COM 	}
2251*11042SErik.Nordmark@Sun.COM 
2252*11042SErik.Nordmark@Sun.COM 	if (ire == NULL ||
2253*11042SErik.Nordmark@Sun.COM 	    (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
2254*11042SErik.Nordmark@Sun.COM 	    !(ire->ire_flags & RTF_MULTIRT)) {
2255*11042SErik.Nordmark@Sun.COM 		/* Drop */
2256*11042SErik.Nordmark@Sun.COM 		ip_drop_output("ip_postfrag_multirt didn't find route",
2257*11042SErik.Nordmark@Sun.COM 		    mp, nce->nce_ill);
2258*11042SErik.Nordmark@Sun.COM 		if (ire != NULL)
2259*11042SErik.Nordmark@Sun.COM 			ire_refrele(ire);
2260*11042SErik.Nordmark@Sun.COM 		return (ENETUNREACH);
2261*11042SErik.Nordmark@Sun.COM 	}
2262*11042SErik.Nordmark@Sun.COM 
2263*11042SErik.Nordmark@Sun.COM 	irb = ire->ire_bucket;
2264*11042SErik.Nordmark@Sun.COM 	irb_refhold(irb);
2265*11042SErik.Nordmark@Sun.COM 	for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
2266*11042SErik.Nordmark@Sun.COM 		/*
2267*11042SErik.Nordmark@Sun.COM 		 * For broadcast we can have a mixture of IRE_BROADCAST and
2268*11042SErik.Nordmark@Sun.COM 		 * IRE_HOST due to the manually added IRE_HOSTs that are used
2269*11042SErik.Nordmark@Sun.COM 		 * to trigger the creation of the special CGTP broadcast routes.
2270*11042SErik.Nordmark@Sun.COM 		 * Thus we have to skip if ire_type doesn't match the original.
2271*11042SErik.Nordmark@Sun.COM 		 */
2272*11042SErik.Nordmark@Sun.COM 		if (IRE_IS_CONDEMNED(ire1) ||
2273*11042SErik.Nordmark@Sun.COM 		    !(ire1->ire_flags & RTF_MULTIRT) ||
2274*11042SErik.Nordmark@Sun.COM 		    ire1->ire_type != ire->ire_type)
2275*11042SErik.Nordmark@Sun.COM 			continue;
2276*11042SErik.Nordmark@Sun.COM 
2277*11042SErik.Nordmark@Sun.COM 		/* Do the ire argument one after the loop */
2278*11042SErik.Nordmark@Sun.COM 		if (ire1 == ire)
2279*11042SErik.Nordmark@Sun.COM 			continue;
2280*11042SErik.Nordmark@Sun.COM 
2281*11042SErik.Nordmark@Sun.COM 		ill1 = ire_nexthop_ill(ire1);
2282*11042SErik.Nordmark@Sun.COM 		if (ill1 == NULL) {
2283*11042SErik.Nordmark@Sun.COM 			/*
2284*11042SErik.Nordmark@Sun.COM 			 * This ire might not have been picked by
2285*11042SErik.Nordmark@Sun.COM 			 * ire_route_recursive, in which case ire_dep might
2286*11042SErik.Nordmark@Sun.COM 			 * not have been setup yet.
2287*11042SErik.Nordmark@Sun.COM 			 * We kick ire_route_recursive to try to resolve
2288*11042SErik.Nordmark@Sun.COM 			 * starting at ire1.
2289*11042SErik.Nordmark@Sun.COM 			 */
2290*11042SErik.Nordmark@Sun.COM 			ire_t *ire2;
2291*11042SErik.Nordmark@Sun.COM 
2292*11042SErik.Nordmark@Sun.COM 			ire2 = ire_route_recursive_impl_v4(ire1,
2293*11042SErik.Nordmark@Sun.COM 			    ire1->ire_addr, ire1->ire_type, ire1->ire_ill,
2294*11042SErik.Nordmark@Sun.COM 			    ire1->ire_zoneid, NULL, MATCH_IRE_DSTONLY,
2295*11042SErik.Nordmark@Sun.COM 			    B_TRUE, 0, ipst, NULL, NULL, NULL);
2296*11042SErik.Nordmark@Sun.COM 			if (ire2 != NULL)
2297*11042SErik.Nordmark@Sun.COM 				ire_refrele(ire2);
2298*11042SErik.Nordmark@Sun.COM 			ill1 = ire_nexthop_ill(ire1);
2299*11042SErik.Nordmark@Sun.COM 		}
2300*11042SErik.Nordmark@Sun.COM 
2301*11042SErik.Nordmark@Sun.COM 		if (ill1 == NULL) {
2302*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2303*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - no ill",
2304*11042SErik.Nordmark@Sun.COM 			    mp, ill);
2305*11042SErik.Nordmark@Sun.COM 			error = ENETUNREACH;
2306*11042SErik.Nordmark@Sun.COM 			continue;
2307*11042SErik.Nordmark@Sun.COM 		}
2308*11042SErik.Nordmark@Sun.COM 
2309*11042SErik.Nordmark@Sun.COM 		/* Pick the addr and type to use for arp_nce_init */
2310*11042SErik.Nordmark@Sun.COM 		if (nce->nce_common->ncec_flags & NCE_F_BCAST) {
2311*11042SErik.Nordmark@Sun.COM 			ire_type = IRE_BROADCAST;
2312*11042SErik.Nordmark@Sun.COM 			nexthop = ire1->ire_gateway_addr;
2313*11042SErik.Nordmark@Sun.COM 		} else if (nce->nce_common->ncec_flags & NCE_F_MCAST) {
2314*11042SErik.Nordmark@Sun.COM 			ire_type = IRE_MULTICAST;
2315*11042SErik.Nordmark@Sun.COM 			nexthop = ipha->ipha_dst;
2316*11042SErik.Nordmark@Sun.COM 		} else {
2317*11042SErik.Nordmark@Sun.COM 			ire_type = ire1->ire_type;	/* Doesn't matter */
2318*11042SErik.Nordmark@Sun.COM 			nexthop = ire1->ire_gateway_addr;
2319*11042SErik.Nordmark@Sun.COM 		}
2320*11042SErik.Nordmark@Sun.COM 
2321*11042SErik.Nordmark@Sun.COM 		/* If IPMP meta or under, then we just drop */
2322*11042SErik.Nordmark@Sun.COM 		if (ill1->ill_grp != NULL) {
2323*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards);
2324*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - IPMP",
2325*11042SErik.Nordmark@Sun.COM 			    mp, ill1);
2326*11042SErik.Nordmark@Sun.COM 			ill_refrele(ill1);
2327*11042SErik.Nordmark@Sun.COM 			error = ENETUNREACH;
2328*11042SErik.Nordmark@Sun.COM 			continue;
2329*11042SErik.Nordmark@Sun.COM 		}
2330*11042SErik.Nordmark@Sun.COM 
2331*11042SErik.Nordmark@Sun.COM 		nce1 = arp_nce_init(ill1, nexthop, ire_type);
2332*11042SErik.Nordmark@Sun.COM 		if (nce1 == NULL) {
2333*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards);
2334*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards - no nce",
2335*11042SErik.Nordmark@Sun.COM 			    mp, ill1);
2336*11042SErik.Nordmark@Sun.COM 			ill_refrele(ill1);
2337*11042SErik.Nordmark@Sun.COM 			error = ENETUNREACH;
2338*11042SErik.Nordmark@Sun.COM 			continue;
2339*11042SErik.Nordmark@Sun.COM 		}
2340*11042SErik.Nordmark@Sun.COM 		mp1 = copymsg(mp);
2341*11042SErik.Nordmark@Sun.COM 		if (mp1 == NULL) {
2342*11042SErik.Nordmark@Sun.COM 			BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards);
2343*11042SErik.Nordmark@Sun.COM 			ip_drop_output("ipIfStatsOutDiscards", mp, ill1);
2344*11042SErik.Nordmark@Sun.COM 			nce_refrele(nce1);
2345*11042SErik.Nordmark@Sun.COM 			ill_refrele(ill1);
2346*11042SErik.Nordmark@Sun.COM 			error = ENOBUFS;
2347*11042SErik.Nordmark@Sun.COM 			continue;
2348*11042SErik.Nordmark@Sun.COM 		}
2349*11042SErik.Nordmark@Sun.COM 		/* Preserve HW checksum for this copy */
2350*11042SErik.Nordmark@Sun.COM 		DB_CKSUMSTART(mp1) = DB_CKSUMSTART(mp);
2351*11042SErik.Nordmark@Sun.COM 		DB_CKSUMSTUFF(mp1) = DB_CKSUMSTUFF(mp);
2352*11042SErik.Nordmark@Sun.COM 		DB_CKSUMEND(mp1) = DB_CKSUMEND(mp);
2353*11042SErik.Nordmark@Sun.COM 		DB_CKSUMFLAGS(mp1) = DB_CKSUMFLAGS(mp);
2354*11042SErik.Nordmark@Sun.COM 		DB_LSOMSS(mp1) = DB_LSOMSS(mp);
2355*11042SErik.Nordmark@Sun.COM 
2356*11042SErik.Nordmark@Sun.COM 		ire1->ire_ob_pkt_count++;
2357*11042SErik.Nordmark@Sun.COM 		err = ip_xmit(mp1, nce1, ixaflags, pkt_len, xmit_hint, szone,
2358*11042SErik.Nordmark@Sun.COM 		    0, ixacookie);
2359*11042SErik.Nordmark@Sun.COM 		if (err == 0)
2360*11042SErik.Nordmark@Sun.COM 			num_sent++;
2361*11042SErik.Nordmark@Sun.COM 		else
2362*11042SErik.Nordmark@Sun.COM 			error = err;
2363*11042SErik.Nordmark@Sun.COM 		nce_refrele(nce1);
2364*11042SErik.Nordmark@Sun.COM 		ill_refrele(ill1);
2365*11042SErik.Nordmark@Sun.COM 	}
2366*11042SErik.Nordmark@Sun.COM 	irb_refrele(irb);
2367*11042SErik.Nordmark@Sun.COM 	ire_refrele(ire);
2368*11042SErik.Nordmark@Sun.COM 	/* Finally, the main one */
2369*11042SErik.Nordmark@Sun.COM 	err = ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0,
2370*11042SErik.Nordmark@Sun.COM 	    ixacookie);
2371*11042SErik.Nordmark@Sun.COM 	if (err == 0)
2372*11042SErik.Nordmark@Sun.COM 		num_sent++;
2373*11042SErik.Nordmark@Sun.COM 	else
2374*11042SErik.Nordmark@Sun.COM 		error = err;
2375*11042SErik.Nordmark@Sun.COM 	if (num_sent > 0)
2376*11042SErik.Nordmark@Sun.COM 		return (0);
2377*11042SErik.Nordmark@Sun.COM 	else
2378*11042SErik.Nordmark@Sun.COM 		return (error);
2379*11042SErik.Nordmark@Sun.COM }
2380*11042SErik.Nordmark@Sun.COM 
2381*11042SErik.Nordmark@Sun.COM /*
2382*11042SErik.Nordmark@Sun.COM  * Verify local connectivity. This check is called by ULP fusion code.
2383*11042SErik.Nordmark@Sun.COM  * The generation number on an IRE_LOCAL or IRE_LOOPBACK only changes if
2384*11042SErik.Nordmark@Sun.COM  * the interface is brought down and back up. So we simply fail the local
2385*11042SErik.Nordmark@Sun.COM  * process. The caller, TCP Fusion, should unfuse the connection.
2386*11042SErik.Nordmark@Sun.COM  */
2387*11042SErik.Nordmark@Sun.COM boolean_t
2388*11042SErik.Nordmark@Sun.COM ip_output_verify_local(ip_xmit_attr_t *ixa)
2389*11042SErik.Nordmark@Sun.COM {
2390*11042SErik.Nordmark@Sun.COM 	ire_t		*ire = ixa->ixa_ire;
2391*11042SErik.Nordmark@Sun.COM 
2392*11042SErik.Nordmark@Sun.COM 	if (!(ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)))
2393*11042SErik.Nordmark@Sun.COM 		return (B_FALSE);
2394*11042SErik.Nordmark@Sun.COM 
2395*11042SErik.Nordmark@Sun.COM 	return (ixa->ixa_ire->ire_generation == ixa->ixa_ire_generation);
2396*11042SErik.Nordmark@Sun.COM }
2397*11042SErik.Nordmark@Sun.COM 
2398*11042SErik.Nordmark@Sun.COM /*
2399*11042SErik.Nordmark@Sun.COM  * Local process for ULP loopback, TCP Fusion. Handle both IPv4 and IPv6.
2400*11042SErik.Nordmark@Sun.COM  *
2401*11042SErik.Nordmark@Sun.COM  * The caller must call ip_output_verify_local() first. This function handles
2402*11042SErik.Nordmark@Sun.COM  * IPobs, FW_HOOKS, and/or IPsec cases sequentially.
2403*11042SErik.Nordmark@Sun.COM  */
2404*11042SErik.Nordmark@Sun.COM mblk_t *
2405*11042SErik.Nordmark@Sun.COM ip_output_process_local(mblk_t *mp, ip_xmit_attr_t *ixa, boolean_t hooks_out,
2406*11042SErik.Nordmark@Sun.COM     boolean_t hooks_in, conn_t *peer_connp)
2407*11042SErik.Nordmark@Sun.COM {
2408*11042SErik.Nordmark@Sun.COM 	ill_t		*ill = ixa->ixa_ire->ire_ill;
2409*11042SErik.Nordmark@Sun.COM 	ipha_t		*ipha = NULL;
2410*11042SErik.Nordmark@Sun.COM 	ip6_t		*ip6h = NULL;
2411*11042SErik.Nordmark@Sun.COM 	ip_stack_t	*ipst = ixa->ixa_ipst;
2412*11042SErik.Nordmark@Sun.COM 	iaflags_t	ixaflags = ixa->ixa_flags;
2413*11042SErik.Nordmark@Sun.COM 	ip_recv_attr_t	iras;
2414*11042SErik.Nordmark@Sun.COM 	int		error;
2415*11042SErik.Nordmark@Sun.COM 
2416*11042SErik.Nordmark@Sun.COM 	ASSERT(mp != NULL);
2417*11042SErik.Nordmark@Sun.COM 
2418*11042SErik.Nordmark@Sun.COM 	if (ixaflags & IXAF_IS_IPV4) {
2419*11042SErik.Nordmark@Sun.COM 		ipha = (ipha_t *)mp->b_rptr;
2420*11042SErik.Nordmark@Sun.COM 
2421*11042SErik.Nordmark@Sun.COM 		/*
2422*11042SErik.Nordmark@Sun.COM 		 * If a callback is enabled then we need to know the
2423*11042SErik.Nordmark@Sun.COM 		 * source and destination zoneids for the packet. We already
2424*11042SErik.Nordmark@Sun.COM 		 * have those handy.
2425*11042SErik.Nordmark@Sun.COM 		 */
2426*11042SErik.Nordmark@Sun.COM 		if (ipst->ips_ip4_observe.he_interested) {
2427*11042SErik.Nordmark@Sun.COM 			zoneid_t szone, dzone;
2428*11042SErik.Nordmark@Sun.COM 			zoneid_t stackzoneid;
2429*11042SErik.Nordmark@Sun.COM 
2430*11042SErik.Nordmark@Sun.COM 			stackzoneid = netstackid_to_zoneid(
2431*11042SErik.Nordmark@Sun.COM 			    ipst->ips_netstack->netstack_stackid);
2432*11042SErik.Nordmark@Sun.COM 
2433*11042SErik.Nordmark@Sun.COM 			if (stackzoneid == GLOBAL_ZONEID) {
2434*11042SErik.Nordmark@Sun.COM 				/* Shared-IP zone */
2435*11042SErik.Nordmark@Sun.COM 				dzone = ixa->ixa_ire->ire_zoneid;
2436*11042SErik.Nordmark@Sun.COM 				szone = ixa->ixa_zoneid;
2437*11042SErik.Nordmark@Sun.COM 			} else {
2438*11042SErik.Nordmark@Sun.COM 				szone = dzone = stackzoneid;
2439*11042SErik.Nordmark@Sun.COM 			}
2440*11042SErik.Nordmark@Sun.COM 			ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
2441*11042SErik.Nordmark@Sun.COM 			    ipst);
2442*11042SErik.Nordmark@Sun.COM 		}
2443*11042SErik.Nordmark@Sun.COM 		DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
2444*11042SErik.Nordmark@Sun.COM 		    ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *,
2445*11042SErik.Nordmark@Sun.COM 		    NULL, int, 1);
2446*11042SErik.Nordmark@Sun.COM 
2447*11042SErik.Nordmark@Sun.COM 		/* FW_HOOKS: LOOPBACK_OUT */
2448*11042SErik.Nordmark@Sun.COM 		if (hooks_out) {
2449*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL,
2450*11042SErik.Nordmark@Sun.COM 			    ill_t *, ill, ipha_t *, ipha, mblk_t *, mp);
2451*11042SErik.Nordmark@Sun.COM 			FW_HOOKS(ipst->ips_ip4_loopback_out_event,
2452*11042SErik.Nordmark@Sun.COM 			    ipst->ips_ipv4firewall_loopback_out,
2453*11042SErik.Nordmark@Sun.COM 			    NULL, ill, ipha, mp, mp, 0, ipst, error);
2454*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, mp);
2455*11042SErik.Nordmark@Sun.COM 		}
2456*11042SErik.Nordmark@Sun.COM 		if (mp == NULL)
2457*11042SErik.Nordmark@Sun.COM 			return (NULL);
2458*11042SErik.Nordmark@Sun.COM 
2459*11042SErik.Nordmark@Sun.COM 		/* FW_HOOKS: LOOPBACK_IN */
2460*11042SErik.Nordmark@Sun.COM 		if (hooks_in) {
2461*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill,
2462*11042SErik.Nordmark@Sun.COM 			    ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp);
2463*11042SErik.Nordmark@Sun.COM 			FW_HOOKS(ipst->ips_ip4_loopback_in_event,
2464*11042SErik.Nordmark@Sun.COM 			    ipst->ips_ipv4firewall_loopback_in,
2465*11042SErik.Nordmark@Sun.COM 			    ill, NULL, ipha, mp, mp, 0, ipst, error);
2466*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, mp);
2467*11042SErik.Nordmark@Sun.COM 		}
2468*11042SErik.Nordmark@Sun.COM 		if (mp == NULL)
2469*11042SErik.Nordmark@Sun.COM 			return (NULL);
2470*11042SErik.Nordmark@Sun.COM 
2471*11042SErik.Nordmark@Sun.COM 		DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
2472*11042SErik.Nordmark@Sun.COM 		    ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *,
2473*11042SErik.Nordmark@Sun.COM 		    NULL, int, 1);
2474*11042SErik.Nordmark@Sun.COM 
2475*11042SErik.Nordmark@Sun.COM 		/* Inbound IPsec polocies */
2476*11042SErik.Nordmark@Sun.COM 		if (peer_connp != NULL) {
2477*11042SErik.Nordmark@Sun.COM 			/* Map ixa to ira including IPsec policies. */
2478*11042SErik.Nordmark@Sun.COM 			ipsec_out_to_in(ixa, ill, &iras);
2479*11042SErik.Nordmark@Sun.COM 			mp = ipsec_check_inbound_policy(mp, peer_connp, ipha,
2480*11042SErik.Nordmark@Sun.COM 			    NULL, &iras);
2481*11042SErik.Nordmark@Sun.COM 		}
2482*11042SErik.Nordmark@Sun.COM 	} else {
2483*11042SErik.Nordmark@Sun.COM 		ip6h = (ip6_t *)mp->b_rptr;
2484*11042SErik.Nordmark@Sun.COM 
2485*11042SErik.Nordmark@Sun.COM 		/*
2486*11042SErik.Nordmark@Sun.COM 		 * If a callback is enabled then we need to know the
2487*11042SErik.Nordmark@Sun.COM 		 * source and destination zoneids for the packet. We already
2488*11042SErik.Nordmark@Sun.COM 		 * have those handy.
2489*11042SErik.Nordmark@Sun.COM 		 */
2490*11042SErik.Nordmark@Sun.COM 		if (ipst->ips_ip6_observe.he_interested) {
2491*11042SErik.Nordmark@Sun.COM 			zoneid_t szone, dzone;
2492*11042SErik.Nordmark@Sun.COM 			zoneid_t stackzoneid;
2493*11042SErik.Nordmark@Sun.COM 
2494*11042SErik.Nordmark@Sun.COM 			stackzoneid = netstackid_to_zoneid(
2495*11042SErik.Nordmark@Sun.COM 			    ipst->ips_netstack->netstack_stackid);
2496*11042SErik.Nordmark@Sun.COM 
2497*11042SErik.Nordmark@Sun.COM 			if (stackzoneid == GLOBAL_ZONEID) {
2498*11042SErik.Nordmark@Sun.COM 				/* Shared-IP zone */
2499*11042SErik.Nordmark@Sun.COM 				dzone = ixa->ixa_ire->ire_zoneid;
2500*11042SErik.Nordmark@Sun.COM 				szone = ixa->ixa_zoneid;
2501*11042SErik.Nordmark@Sun.COM 			} else {
2502*11042SErik.Nordmark@Sun.COM 				szone = dzone = stackzoneid;
2503*11042SErik.Nordmark@Sun.COM 			}
2504*11042SErik.Nordmark@Sun.COM 			ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill,
2505*11042SErik.Nordmark@Sun.COM 			    ipst);
2506*11042SErik.Nordmark@Sun.COM 		}
2507*11042SErik.Nordmark@Sun.COM 		DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
2508*11042SErik.Nordmark@Sun.COM 		    ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *,
2509*11042SErik.Nordmark@Sun.COM 		    ip6h, int, 1);
2510*11042SErik.Nordmark@Sun.COM 
2511*11042SErik.Nordmark@Sun.COM 		/* FW_HOOKS: LOOPBACK_OUT */
2512*11042SErik.Nordmark@Sun.COM 		if (hooks_out) {
2513*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE4(ip6__loopback__out__start, ill_t *, NULL,
2514*11042SErik.Nordmark@Sun.COM 			    ill_t *, ill, ip6_t *, ip6h, mblk_t *, mp);
2515*11042SErik.Nordmark@Sun.COM 			FW_HOOKS6(ipst->ips_ip6_loopback_out_event,
2516*11042SErik.Nordmark@Sun.COM 			    ipst->ips_ipv6firewall_loopback_out,
2517*11042SErik.Nordmark@Sun.COM 			    NULL, ill, ip6h, mp, mp, 0, ipst, error);
2518*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, mp);
2519*11042SErik.Nordmark@Sun.COM 		}
2520*11042SErik.Nordmark@Sun.COM 		if (mp == NULL)
2521*11042SErik.Nordmark@Sun.COM 			return (NULL);
2522*11042SErik.Nordmark@Sun.COM 
2523*11042SErik.Nordmark@Sun.COM 		/* FW_HOOKS: LOOPBACK_IN */
2524*11042SErik.Nordmark@Sun.COM 		if (hooks_in) {
2525*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE4(ip6__loopback__in__start, ill_t *, ill,
2526*11042SErik.Nordmark@Sun.COM 			    ill_t *, NULL, ip6_t *, ip6h, mblk_t *, mp);
2527*11042SErik.Nordmark@Sun.COM 			FW_HOOKS6(ipst->ips_ip6_loopback_in_event,
2528*11042SErik.Nordmark@Sun.COM 			    ipst->ips_ipv6firewall_loopback_in,
2529*11042SErik.Nordmark@Sun.COM 			    ill, NULL, ip6h, mp, mp, 0, ipst, error);
2530*11042SErik.Nordmark@Sun.COM 			DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, mp);
2531*11042SErik.Nordmark@Sun.COM 		}
2532*11042SErik.Nordmark@Sun.COM 		if (mp == NULL)
2533*11042SErik.Nordmark@Sun.COM 			return (NULL);
2534*11042SErik.Nordmark@Sun.COM 
2535*11042SErik.Nordmark@Sun.COM 		DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *,
2536*11042SErik.Nordmark@Sun.COM 		    ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *,
2537*11042SErik.Nordmark@Sun.COM 		    ip6h, int, 1);
2538*11042SErik.Nordmark@Sun.COM 
2539*11042SErik.Nordmark@Sun.COM 		/* Inbound IPsec polocies */
2540*11042SErik.Nordmark@Sun.COM 		if (peer_connp != NULL) {
2541*11042SErik.Nordmark@Sun.COM 			/* Map ixa to ira including IPsec policies. */
2542*11042SErik.Nordmark@Sun.COM 			ipsec_out_to_in(ixa, ill, &iras);
2543*11042SErik.Nordmark@Sun.COM 			mp = ipsec_check_inbound_policy(mp, peer_connp, NULL,
2544*11042SErik.Nordmark@Sun.COM 			    ip6h, &iras);
2545*11042SErik.Nordmark@Sun.COM 		}
2546*11042SErik.Nordmark@Sun.COM 	}
2547*11042SErik.Nordmark@Sun.COM 
2548*11042SErik.Nordmark@Sun.COM 	if (mp == NULL) {
2549*11042SErik.Nordmark@Sun.COM 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2550*11042SErik.Nordmark@Sun.COM 		ip_drop_input("ipIfStatsInDiscards", NULL, ill);
2551*11042SErik.Nordmark@Sun.COM 	}
2552*11042SErik.Nordmark@Sun.COM 
2553*11042SErik.Nordmark@Sun.COM 	return (mp);
2554*11042SErik.Nordmark@Sun.COM }
2555