xref: /onnv-gate/usr/src/uts/common/inet/ip/ip_ndp.c (revision 9175:3d80e1dd8316)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51676Sjpk  * Common Development and Distribution License (the "License").
61676Sjpk  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
228485SPeter.Memishian@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <sys/types.h>
270Sstevel@tonic-gate #include <sys/stream.h>
280Sstevel@tonic-gate #include <sys/stropts.h>
292546Scarlsonj #include <sys/strsun.h>
300Sstevel@tonic-gate #include <sys/sysmacros.h>
310Sstevel@tonic-gate #include <sys/errno.h>
320Sstevel@tonic-gate #include <sys/dlpi.h>
330Sstevel@tonic-gate #include <sys/socket.h>
340Sstevel@tonic-gate #include <sys/ddi.h>
352546Scarlsonj #include <sys/sunddi.h>
360Sstevel@tonic-gate #include <sys/cmn_err.h>
370Sstevel@tonic-gate #include <sys/debug.h>
380Sstevel@tonic-gate #include <sys/vtrace.h>
390Sstevel@tonic-gate #include <sys/kmem.h>
400Sstevel@tonic-gate #include <sys/zone.h>
412546Scarlsonj #include <sys/ethernet.h>
422546Scarlsonj #include <sys/sdt.h>
430Sstevel@tonic-gate 
440Sstevel@tonic-gate #include <net/if.h>
452546Scarlsonj #include <net/if_types.h>
460Sstevel@tonic-gate #include <net/if_dl.h>
470Sstevel@tonic-gate #include <net/route.h>
480Sstevel@tonic-gate #include <netinet/in.h>
490Sstevel@tonic-gate #include <netinet/ip6.h>
500Sstevel@tonic-gate #include <netinet/icmp6.h>
510Sstevel@tonic-gate 
520Sstevel@tonic-gate #include <inet/common.h>
530Sstevel@tonic-gate #include <inet/mi.h>
540Sstevel@tonic-gate #include <inet/mib2.h>
550Sstevel@tonic-gate #include <inet/nd.h>
560Sstevel@tonic-gate #include <inet/ip.h>
572733Snordmark #include <inet/ip_impl.h>
583448Sdh155122 #include <inet/ipclassifier.h>
590Sstevel@tonic-gate #include <inet/ip_if.h>
600Sstevel@tonic-gate #include <inet/ip_ire.h>
610Sstevel@tonic-gate #include <inet/ip_rts.h>
620Sstevel@tonic-gate #include <inet/ip6.h>
630Sstevel@tonic-gate #include <inet/ip_ndp.h>
640Sstevel@tonic-gate #include <inet/ipsec_impl.h>
650Sstevel@tonic-gate #include <inet/ipsec_info.h>
662546Scarlsonj #include <inet/sctp_ip.h>
67*9175SSowmini.Varadhan@Sun.COM #include <inet/ip2mac_impl.h>
680Sstevel@tonic-gate 
690Sstevel@tonic-gate /*
700Sstevel@tonic-gate  * Function names with nce_ prefix are static while function
710Sstevel@tonic-gate  * names with ndp_ prefix are used by rest of the IP.
722546Scarlsonj  *
732546Scarlsonj  * Lock ordering:
742546Scarlsonj  *
752546Scarlsonj  *	ndp_g_lock -> ill_lock -> nce_lock
762546Scarlsonj  *
772546Scarlsonj  * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and
782546Scarlsonj  * nce_next.  Nce_lock protects the contents of the NCE (particularly
792546Scarlsonj  * nce_refcnt).
800Sstevel@tonic-gate  */
810Sstevel@tonic-gate 
822546Scarlsonj static	boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr,
830Sstevel@tonic-gate     uint32_t ll_addr_len);
840Sstevel@tonic-gate static	void	nce_ire_delete(nce_t *nce);
850Sstevel@tonic-gate static	void	nce_ire_delete1(ire_t *ire, char *nce_arg);
860Sstevel@tonic-gate static	void 	nce_set_ll(nce_t *nce, uchar_t *ll_addr);
878485SPeter.Memishian@Sun.COM static	nce_t	*nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *,
888485SPeter.Memishian@Sun.COM     nce_t *);
898485SPeter.Memishian@Sun.COM static	nce_t	*nce_lookup_mapping(ill_t *, const in6_addr_t *);
900Sstevel@tonic-gate static	void	nce_make_mapping(nce_t *nce, uchar_t *addrpos,
910Sstevel@tonic-gate     uchar_t *addr);
920Sstevel@tonic-gate static	int	nce_set_multicast(ill_t *ill, const in6_addr_t *addr);
930Sstevel@tonic-gate static	void	nce_queue_mp(nce_t *nce, mblk_t *mp);
940Sstevel@tonic-gate static	mblk_t	*nce_udreq_alloc(ill_t *ill);
950Sstevel@tonic-gate static	void	nce_update(nce_t *nce, uint16_t new_state,
960Sstevel@tonic-gate     uchar_t *new_ll_addr);
97*9175SSowmini.Varadhan@Sun.COM static	uint32_t	nce_solicit(nce_t *nce, in6_addr_t src);
988485SPeter.Memishian@Sun.COM static	boolean_t	nce_xmit(ill_t *ill, uint8_t type,
998485SPeter.Memishian@Sun.COM     boolean_t use_lla_addr, const in6_addr_t *sender,
1000Sstevel@tonic-gate     const in6_addr_t *target, int flag);
1018485SPeter.Memishian@Sun.COM static boolean_t	nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla,
1028485SPeter.Memishian@Sun.COM     const in6_addr_t *target, uint_t flags);
1038485SPeter.Memishian@Sun.COM static boolean_t	nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla,
1048485SPeter.Memishian@Sun.COM     const in6_addr_t *src, uint_t flags);
1054714Ssowmini static int	ndp_add_v4(ill_t *, const in_addr_t *, uint16_t,
1064714Ssowmini     nce_t **, nce_t *);
1078485SPeter.Memishian@Sun.COM static ipif_t	*ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill);
1084714Ssowmini 
1095023Scarlsonj #ifdef DEBUG
1105023Scarlsonj static void	nce_trace_cleanup(const nce_t *);
1110Sstevel@tonic-gate #endif
1120Sstevel@tonic-gate 
1133448Sdh155122 #define	NCE_HASH_PTR_V4(ipst, addr)					\
1143448Sdh155122 	(&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)]))
1153448Sdh155122 
1163448Sdh155122 #define	NCE_HASH_PTR_V6(ipst, addr)				 \
1173448Sdh155122 	(&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \
1183448Sdh155122 		NCE_TABLE_SIZE)]))
1190Sstevel@tonic-gate 
1202546Scarlsonj /* Non-tunable probe interval, based on link capabilities */
1212546Scarlsonj #define	ILL_PROBE_INTERVAL(ill)	((ill)->ill_note_link ? 150 : 1500)
1222546Scarlsonj 
1230Sstevel@tonic-gate /*
1240Sstevel@tonic-gate  * NDP Cache Entry creation routine.
1250Sstevel@tonic-gate  * Mapped entries will never do NUD .
1263448Sdh155122  * This routine must always be called with ndp6->ndp_g_lock held.
1270Sstevel@tonic-gate  * Prior to return, nce_refcnt is incremented.
1280Sstevel@tonic-gate  */
1294714Ssowmini int
1302535Ssangeeta ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr,
1310Sstevel@tonic-gate     const in6_addr_t *mask, const in6_addr_t *extract_mask,
1320Sstevel@tonic-gate     uint32_t hw_extract_start, uint16_t flags, uint16_t state,
1330Sstevel@tonic-gate     nce_t **newnce)
1340Sstevel@tonic-gate {
1352535Ssangeeta 	static	nce_t		nce_nil;
1360Sstevel@tonic-gate 	nce_t		*nce;
1370Sstevel@tonic-gate 	mblk_t		*mp;
1380Sstevel@tonic-gate 	mblk_t		*template;
1390Sstevel@tonic-gate 	nce_t		**ncep;
1402546Scarlsonj 	int		err;
1410Sstevel@tonic-gate 	boolean_t	dropped = B_FALSE;
1423448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
1433448Sdh155122 
1443448Sdh155122 	ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock));
1452535Ssangeeta 	ASSERT(ill != NULL && ill->ill_isv6);
1460Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(addr)) {
1474714Ssowmini 		ip0dbg(("ndp_add_v6: no addr\n"));
1480Sstevel@tonic-gate 		return (EINVAL);
1490Sstevel@tonic-gate 	}
1500Sstevel@tonic-gate 	if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) {
1514714Ssowmini 		ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags));
1520Sstevel@tonic-gate 		return (EINVAL);
1530Sstevel@tonic-gate 	}
1540Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) &&
1550Sstevel@tonic-gate 	    (flags & NCE_F_MAPPING)) {
1564714Ssowmini 		ip0dbg(("ndp_add_v6: extract mask zero for mapping"));
1570Sstevel@tonic-gate 		return (EINVAL);
1580Sstevel@tonic-gate 	}
1590Sstevel@tonic-gate 	/*
1600Sstevel@tonic-gate 	 * Allocate the mblk to hold the nce.
1610Sstevel@tonic-gate 	 *
1620Sstevel@tonic-gate 	 * XXX This can come out of a separate cache - nce_cache.
1630Sstevel@tonic-gate 	 * We don't need the mp anymore as there are no more
1640Sstevel@tonic-gate 	 * "qwriter"s
1650Sstevel@tonic-gate 	 */
1660Sstevel@tonic-gate 	mp = allocb(sizeof (nce_t), BPRI_MED);
1670Sstevel@tonic-gate 	if (mp == NULL)
1680Sstevel@tonic-gate 		return (ENOMEM);
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 	nce = (nce_t *)mp->b_rptr;
1710Sstevel@tonic-gate 	mp->b_wptr = (uchar_t *)&nce[1];
1720Sstevel@tonic-gate 	*nce = nce_nil;
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate 	/*
1750Sstevel@tonic-gate 	 * This one holds link layer address
1760Sstevel@tonic-gate 	 */
1770Sstevel@tonic-gate 	if (ill->ill_net_type == IRE_IF_RESOLVER) {
1780Sstevel@tonic-gate 		template = nce_udreq_alloc(ill);
1790Sstevel@tonic-gate 	} else {
1803150Ssowmini 		if (ill->ill_resolver_mp == NULL) {
1813150Ssowmini 			freeb(mp);
1823150Ssowmini 			return (EINVAL);
1833150Ssowmini 		}
1840Sstevel@tonic-gate 		ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER));
1850Sstevel@tonic-gate 		template = copyb(ill->ill_resolver_mp);
1860Sstevel@tonic-gate 	}
1870Sstevel@tonic-gate 	if (template == NULL) {
1880Sstevel@tonic-gate 		freeb(mp);
1890Sstevel@tonic-gate 		return (ENOMEM);
1900Sstevel@tonic-gate 	}
1910Sstevel@tonic-gate 	nce->nce_ill = ill;
1922535Ssangeeta 	nce->nce_ipversion = IPV6_VERSION;
1930Sstevel@tonic-gate 	nce->nce_flags = flags;
1940Sstevel@tonic-gate 	nce->nce_state = state;
1950Sstevel@tonic-gate 	nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
1960Sstevel@tonic-gate 	nce->nce_rcnt = ill->ill_xmit_count;
1970Sstevel@tonic-gate 	nce->nce_addr = *addr;
1980Sstevel@tonic-gate 	nce->nce_mask = *mask;
1990Sstevel@tonic-gate 	nce->nce_extract_mask = *extract_mask;
2000Sstevel@tonic-gate 	nce->nce_ll_extract_start = hw_extract_start;
2010Sstevel@tonic-gate 	nce->nce_fp_mp = NULL;
2020Sstevel@tonic-gate 	nce->nce_res_mp = template;
2030Sstevel@tonic-gate 	if (state == ND_REACHABLE)
2040Sstevel@tonic-gate 		nce->nce_last = TICK_TO_MSEC(lbolt64);
2050Sstevel@tonic-gate 	else
2060Sstevel@tonic-gate 		nce->nce_last = 0;
2070Sstevel@tonic-gate 	nce->nce_qd_mp = NULL;
2080Sstevel@tonic-gate 	nce->nce_mp = mp;
2090Sstevel@tonic-gate 	if (hw_addr != NULL)
2100Sstevel@tonic-gate 		nce_set_ll(nce, hw_addr);
2110Sstevel@tonic-gate 	/* This one is for nce getting created */
2120Sstevel@tonic-gate 	nce->nce_refcnt = 1;
2130Sstevel@tonic-gate 	mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
2140Sstevel@tonic-gate 	if (nce->nce_flags & NCE_F_MAPPING) {
2150Sstevel@tonic-gate 		ASSERT(IN6_IS_ADDR_MULTICAST(addr));
2160Sstevel@tonic-gate 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask));
2170Sstevel@tonic-gate 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask));
2183448Sdh155122 		ncep = &ipst->ips_ndp6->nce_mask_entries;
2190Sstevel@tonic-gate 	} else {
2203448Sdh155122 		ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
2210Sstevel@tonic-gate 	}
2220Sstevel@tonic-gate 
2235023Scarlsonj 	nce->nce_trace_disable = B_FALSE;
2245023Scarlsonj 
225*9175SSowmini.Varadhan@Sun.COM 	list_create(&nce->nce_cb, sizeof (nce_cb_t),
226*9175SSowmini.Varadhan@Sun.COM 	    offsetof(nce_cb_t, nce_cb_node));
2270Sstevel@tonic-gate 	/*
2280Sstevel@tonic-gate 	 * Atomically ensure that the ill is not CONDEMNED, before
2290Sstevel@tonic-gate 	 * adding the NCE.
2300Sstevel@tonic-gate 	 */
2310Sstevel@tonic-gate 	mutex_enter(&ill->ill_lock);
2320Sstevel@tonic-gate 	if (ill->ill_state_flags & ILL_CONDEMNED) {
2330Sstevel@tonic-gate 		mutex_exit(&ill->ill_lock);
2340Sstevel@tonic-gate 		freeb(mp);
2352546Scarlsonj 		freeb(template);
2360Sstevel@tonic-gate 		return (EINVAL);
2370Sstevel@tonic-gate 	}
2380Sstevel@tonic-gate 	if ((nce->nce_next = *ncep) != NULL)
2390Sstevel@tonic-gate 		nce->nce_next->nce_ptpn = &nce->nce_next;
2400Sstevel@tonic-gate 	*ncep = nce;
2410Sstevel@tonic-gate 	nce->nce_ptpn = ncep;
2420Sstevel@tonic-gate 	*newnce = nce;
2430Sstevel@tonic-gate 	/* This one is for nce being used by an active thread */
2440Sstevel@tonic-gate 	NCE_REFHOLD(*newnce);
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate 	/* Bump up the number of nce's referencing this ill */
2476255Ssowmini 	DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
2486255Ssowmini 	    (char *), "nce", (void *), nce);
2496379Ssowmini 	ill->ill_nce_cnt++;
2500Sstevel@tonic-gate 	mutex_exit(&ill->ill_lock);
2510Sstevel@tonic-gate 
2522546Scarlsonj 	err = 0;
2532546Scarlsonj 	if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) {
2542546Scarlsonj 		mutex_enter(&nce->nce_lock);
2553448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
2562546Scarlsonj 		nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
2572546Scarlsonj 		mutex_exit(&nce->nce_lock);
2588485SPeter.Memishian@Sun.COM 		dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE);
2592546Scarlsonj 		if (dropped) {
2602546Scarlsonj 			mutex_enter(&nce->nce_lock);
2612546Scarlsonj 			nce->nce_pcnt++;
2622546Scarlsonj 			mutex_exit(&nce->nce_lock);
2632546Scarlsonj 		}
2642546Scarlsonj 		NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
2653448Sdh155122 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
2662546Scarlsonj 		err = EINPROGRESS;
2672546Scarlsonj 	} else if (flags & NCE_F_UNSOL_ADV) {
2680Sstevel@tonic-gate 		/*
2690Sstevel@tonic-gate 		 * We account for the transmit below by assigning one
2700Sstevel@tonic-gate 		 * less than the ndd variable. Subsequent decrements
2710Sstevel@tonic-gate 		 * are done in ndp_timer.
2720Sstevel@tonic-gate 		 */
2730Sstevel@tonic-gate 		mutex_enter(&nce->nce_lock);
2743448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
2753448Sdh155122 		nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1;
2760Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
2778485SPeter.Memishian@Sun.COM 		dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast,
2788485SPeter.Memishian@Sun.COM 		    0);
2790Sstevel@tonic-gate 		mutex_enter(&nce->nce_lock);
2800Sstevel@tonic-gate 		if (dropped)
2810Sstevel@tonic-gate 			nce->nce_unsolicit_count++;
2820Sstevel@tonic-gate 		if (nce->nce_unsolicit_count != 0) {
2838485SPeter.Memishian@Sun.COM 			ASSERT(nce->nce_timeout_id == 0);
2840Sstevel@tonic-gate 			nce->nce_timeout_id = timeout(ndp_timer, nce,
2853448Sdh155122 			    MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval));
2860Sstevel@tonic-gate 		}
2870Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
2883448Sdh155122 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
2890Sstevel@tonic-gate 	}
2908485SPeter.Memishian@Sun.COM 
291741Smasputra 	/*
292741Smasputra 	 * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then
293741Smasputra 	 * we call nce_fastpath as soon as the nce is resolved in ndp_process.
294741Smasputra 	 * We call nce_fastpath from nce_update if the link layer address of
295741Smasputra 	 * the peer changes from nce_update
296741Smasputra 	 */
297741Smasputra 	if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER)
298741Smasputra 		nce_fastpath(nce);
2992546Scarlsonj 	return (err);
3000Sstevel@tonic-gate }
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate int
3038485SPeter.Memishian@Sun.COM ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr,
3048485SPeter.Memishian@Sun.COM     const in6_addr_t *addr, const in6_addr_t *mask,
3058485SPeter.Memishian@Sun.COM     const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags,
3068485SPeter.Memishian@Sun.COM     uint16_t state, nce_t **newnce)
3070Sstevel@tonic-gate {
3080Sstevel@tonic-gate 	int	err = 0;
3090Sstevel@tonic-gate 	nce_t	*nce;
3103448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
3110Sstevel@tonic-gate 
3124714Ssowmini 	ASSERT(ill->ill_isv6);
3133448Sdh155122 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
3143448Sdh155122 
3153448Sdh155122 	/* Get head of v6 hash table */
3163448Sdh155122 	nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
3178485SPeter.Memishian@Sun.COM 	nce = nce_lookup_addr(ill, match_illgrp, addr, nce);
3180Sstevel@tonic-gate 	if (nce == NULL) {
3194714Ssowmini 		err = ndp_add_v6(ill,
3200Sstevel@tonic-gate 		    hw_addr,
3210Sstevel@tonic-gate 		    addr,
3220Sstevel@tonic-gate 		    mask,
3230Sstevel@tonic-gate 		    extract_mask,
3240Sstevel@tonic-gate 		    hw_extract_start,
3250Sstevel@tonic-gate 		    flags,
3260Sstevel@tonic-gate 		    state,
3274714Ssowmini 		    newnce);
3280Sstevel@tonic-gate 	} else {
3290Sstevel@tonic-gate 		*newnce = nce;
3300Sstevel@tonic-gate 		err = EEXIST;
3310Sstevel@tonic-gate 	}
3323448Sdh155122 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
3330Sstevel@tonic-gate 	return (err);
3340Sstevel@tonic-gate }
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate /*
3370Sstevel@tonic-gate  * Remove all the CONDEMNED nces from the appropriate hash table.
3380Sstevel@tonic-gate  * We create a private list of NCEs, these may have ires pointing
3390Sstevel@tonic-gate  * to them, so the list will be passed through to clean up dependent
3400Sstevel@tonic-gate  * ires and only then we can do NCE_REFRELE which can make NCE inactive.
3410Sstevel@tonic-gate  */
3420Sstevel@tonic-gate static void
3432535Ssangeeta nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list)
3440Sstevel@tonic-gate {
3450Sstevel@tonic-gate 	nce_t *nce1;
3460Sstevel@tonic-gate 	nce_t **ptpn;
3470Sstevel@tonic-gate 
3482535Ssangeeta 	ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
3492535Ssangeeta 	ASSERT(ndp->ndp_g_walker == 0);
3500Sstevel@tonic-gate 	for (; nce; nce = nce1) {
3510Sstevel@tonic-gate 		nce1 = nce->nce_next;
3520Sstevel@tonic-gate 		mutex_enter(&nce->nce_lock);
3530Sstevel@tonic-gate 		if (nce->nce_flags & NCE_F_CONDEMNED) {
3540Sstevel@tonic-gate 			ptpn = nce->nce_ptpn;
3550Sstevel@tonic-gate 			nce1 = nce->nce_next;
3560Sstevel@tonic-gate 			if (nce1 != NULL)
3570Sstevel@tonic-gate 				nce1->nce_ptpn = ptpn;
3580Sstevel@tonic-gate 			*ptpn = nce1;
3590Sstevel@tonic-gate 			nce->nce_ptpn = NULL;
3600Sstevel@tonic-gate 			nce->nce_next = NULL;
3610Sstevel@tonic-gate 			nce->nce_next = *free_nce_list;
3620Sstevel@tonic-gate 			*free_nce_list = nce;
3630Sstevel@tonic-gate 		}
3640Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
3650Sstevel@tonic-gate 	}
3660Sstevel@tonic-gate }
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate /*
3690Sstevel@tonic-gate  * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup()
3700Sstevel@tonic-gate  *    will return this NCE. Also no new IREs will be created that
3710Sstevel@tonic-gate  *    point to this NCE (See ire_add_v6).  Also no new timeouts will
3720Sstevel@tonic-gate  *    be started (See NDP_RESTART_TIMER).
3730Sstevel@tonic-gate  * 2. Cancel any currently running timeouts.
3740Sstevel@tonic-gate  * 3. If there is an ndp walker, return. The walker will do the cleanup.
3750Sstevel@tonic-gate  *    This ensures that walkers see a consistent list of NCEs while walking.
3760Sstevel@tonic-gate  * 4. Otherwise remove the NCE from the list of NCEs
3770Sstevel@tonic-gate  * 5. Delete all IREs pointing to this NCE.
3780Sstevel@tonic-gate  */
3790Sstevel@tonic-gate void
3800Sstevel@tonic-gate ndp_delete(nce_t *nce)
3810Sstevel@tonic-gate {
3820Sstevel@tonic-gate 	nce_t	**ptpn;
3830Sstevel@tonic-gate 	nce_t	*nce1;
3842535Ssangeeta 	int	ipversion = nce->nce_ipversion;
3853448Sdh155122 	ndp_g_t *ndp;
3863448Sdh155122 	ip_stack_t	*ipst = nce->nce_ill->ill_ipst;
3873448Sdh155122 
3883448Sdh155122 	if (ipversion == IPV4_VERSION)
3893448Sdh155122 		ndp = ipst->ips_ndp4;
3903448Sdh155122 	else
3913448Sdh155122 		ndp = ipst->ips_ndp6;
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate 	/* Serialize deletes */
3940Sstevel@tonic-gate 	mutex_enter(&nce->nce_lock);
3950Sstevel@tonic-gate 	if (nce->nce_flags & NCE_F_CONDEMNED) {
3960Sstevel@tonic-gate 		/* Some other thread is doing the delete */
3970Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
3980Sstevel@tonic-gate 		return;
3990Sstevel@tonic-gate 	}
4000Sstevel@tonic-gate 	/*
4010Sstevel@tonic-gate 	 * Caller has a refhold. Also 1 ref for being in the list. Thus
4020Sstevel@tonic-gate 	 * refcnt has to be >= 2
4030Sstevel@tonic-gate 	 */
4040Sstevel@tonic-gate 	ASSERT(nce->nce_refcnt >= 2);
4050Sstevel@tonic-gate 	nce->nce_flags |= NCE_F_CONDEMNED;
4060Sstevel@tonic-gate 	mutex_exit(&nce->nce_lock);
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate 	nce_fastpath_list_delete(nce);
4090Sstevel@tonic-gate 
410*9175SSowmini.Varadhan@Sun.COM 	/* Complete any waiting callbacks */
411*9175SSowmini.Varadhan@Sun.COM 	nce_cb_dispatch(nce);
412*9175SSowmini.Varadhan@Sun.COM 
4130Sstevel@tonic-gate 	/*
4140Sstevel@tonic-gate 	 * Cancel any running timer. Timeout can't be restarted
4150Sstevel@tonic-gate 	 * since CONDEMNED is set. Can't hold nce_lock across untimeout.
4160Sstevel@tonic-gate 	 * Passing invalid timeout id is fine.
4170Sstevel@tonic-gate 	 */
4180Sstevel@tonic-gate 	if (nce->nce_timeout_id != 0) {
4190Sstevel@tonic-gate 		(void) untimeout(nce->nce_timeout_id);
4200Sstevel@tonic-gate 		nce->nce_timeout_id = 0;
4210Sstevel@tonic-gate 	}
4220Sstevel@tonic-gate 
4232535Ssangeeta 	mutex_enter(&ndp->ndp_g_lock);
4240Sstevel@tonic-gate 	if (nce->nce_ptpn == NULL) {
4250Sstevel@tonic-gate 		/*
4260Sstevel@tonic-gate 		 * The last ndp walker has already removed this nce from
4270Sstevel@tonic-gate 		 * the list after we marked the nce CONDEMNED and before
4282535Ssangeeta 		 * we grabbed the global lock.
4290Sstevel@tonic-gate 		 */
4302535Ssangeeta 		mutex_exit(&ndp->ndp_g_lock);
4310Sstevel@tonic-gate 		return;
4320Sstevel@tonic-gate 	}
4332535Ssangeeta 	if (ndp->ndp_g_walker > 0) {
4340Sstevel@tonic-gate 		/*
4350Sstevel@tonic-gate 		 * Can't unlink. The walker will clean up
4360Sstevel@tonic-gate 		 */
4372535Ssangeeta 		ndp->ndp_g_walker_cleanup = B_TRUE;
4382535Ssangeeta 		mutex_exit(&ndp->ndp_g_lock);
4390Sstevel@tonic-gate 		return;
4400Sstevel@tonic-gate 	}
4410Sstevel@tonic-gate 
4420Sstevel@tonic-gate 	/*
4430Sstevel@tonic-gate 	 * Now remove the nce from the list. NDP_RESTART_TIMER won't restart
4440Sstevel@tonic-gate 	 * the timer since it is marked CONDEMNED.
4450Sstevel@tonic-gate 	 */
4460Sstevel@tonic-gate 	ptpn = nce->nce_ptpn;
4470Sstevel@tonic-gate 	nce1 = nce->nce_next;
4480Sstevel@tonic-gate 	if (nce1 != NULL)
4490Sstevel@tonic-gate 		nce1->nce_ptpn = ptpn;
4500Sstevel@tonic-gate 	*ptpn = nce1;
4510Sstevel@tonic-gate 	nce->nce_ptpn = NULL;
4520Sstevel@tonic-gate 	nce->nce_next = NULL;
4532535Ssangeeta 	mutex_exit(&ndp->ndp_g_lock);
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 	nce_ire_delete(nce);
4560Sstevel@tonic-gate }
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate void
4590Sstevel@tonic-gate ndp_inactive(nce_t *nce)
4600Sstevel@tonic-gate {
4610Sstevel@tonic-gate 	mblk_t		**mpp;
4620Sstevel@tonic-gate 	ill_t		*ill;
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate 	ASSERT(nce->nce_refcnt == 0);
4650Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&nce->nce_lock));
4660Sstevel@tonic-gate 	ASSERT(nce->nce_fastpath == NULL);
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 	/* Free all nce allocated messages */
4690Sstevel@tonic-gate 	mpp = &nce->nce_first_mp_to_free;
4700Sstevel@tonic-gate 	do {
4710Sstevel@tonic-gate 		while (*mpp != NULL) {
4720Sstevel@tonic-gate 			mblk_t  *mp;
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate 			mp = *mpp;
4750Sstevel@tonic-gate 			*mpp = mp->b_next;
4762958Sdr146992 
4772958Sdr146992 			inet_freemsg(mp);
4780Sstevel@tonic-gate 		}
4790Sstevel@tonic-gate 	} while (mpp++ != &nce->nce_last_mp_to_free);
4800Sstevel@tonic-gate 
481*9175SSowmini.Varadhan@Sun.COM 	if (nce->nce_ipversion == IPV6_VERSION) {
482*9175SSowmini.Varadhan@Sun.COM 		/*
483*9175SSowmini.Varadhan@Sun.COM 		 * must have been cleaned up in nce_delete
484*9175SSowmini.Varadhan@Sun.COM 		 */
485*9175SSowmini.Varadhan@Sun.COM 		ASSERT(list_is_empty(&nce->nce_cb));
486*9175SSowmini.Varadhan@Sun.COM 		list_destroy(&nce->nce_cb);
487*9175SSowmini.Varadhan@Sun.COM 	}
4885023Scarlsonj #ifdef DEBUG
4895023Scarlsonj 	nce_trace_cleanup(nce);
4900Sstevel@tonic-gate #endif
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate 	ill = nce->nce_ill;
4930Sstevel@tonic-gate 	mutex_enter(&ill->ill_lock);
4946255Ssowmini 	DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
4956255Ssowmini 	    (char *), "nce", (void *), nce);
4966379Ssowmini 	ill->ill_nce_cnt--;
4970Sstevel@tonic-gate 	/*
4980Sstevel@tonic-gate 	 * If the number of nce's associated with this ill have dropped
4990Sstevel@tonic-gate 	 * to zero, check whether we need to restart any operation that
5000Sstevel@tonic-gate 	 * is waiting for this to happen.
5010Sstevel@tonic-gate 	 */
5026255Ssowmini 	if (ILL_DOWN_OK(ill)) {
5030Sstevel@tonic-gate 		/* ipif_ill_refrele_tail drops the ill_lock */
5040Sstevel@tonic-gate 		ipif_ill_refrele_tail(ill);
5050Sstevel@tonic-gate 	} else {
5060Sstevel@tonic-gate 		mutex_exit(&ill->ill_lock);
5070Sstevel@tonic-gate 	}
5080Sstevel@tonic-gate 	mutex_destroy(&nce->nce_lock);
5092958Sdr146992 	if (nce->nce_mp != NULL)
5102958Sdr146992 		inet_freemsg(nce->nce_mp);
5110Sstevel@tonic-gate }
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate /*
5140Sstevel@tonic-gate  * ndp_walk routine.  Delete the nce if it is associated with the ill
5150Sstevel@tonic-gate  * that is going away.  Always called as a writer.
5160Sstevel@tonic-gate  */
5170Sstevel@tonic-gate void
5180Sstevel@tonic-gate ndp_delete_per_ill(nce_t *nce, uchar_t *arg)
5190Sstevel@tonic-gate {
5200Sstevel@tonic-gate 	if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) {
5210Sstevel@tonic-gate 		ndp_delete(nce);
5220Sstevel@tonic-gate 	}
5230Sstevel@tonic-gate }
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate /*
5260Sstevel@tonic-gate  * Walk a list of to be inactive NCEs and blow away all the ires.
5270Sstevel@tonic-gate  */
5280Sstevel@tonic-gate static void
5290Sstevel@tonic-gate nce_ire_delete_list(nce_t *nce)
5300Sstevel@tonic-gate {
5310Sstevel@tonic-gate 	nce_t *nce_next;
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate 	ASSERT(nce != NULL);
5340Sstevel@tonic-gate 	while (nce != NULL) {
5350Sstevel@tonic-gate 		nce_next = nce->nce_next;
5360Sstevel@tonic-gate 		nce->nce_next = NULL;
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate 		/*
5390Sstevel@tonic-gate 		 * It is possible for the last ndp walker (this thread)
5400Sstevel@tonic-gate 		 * to come here after ndp_delete has marked the nce CONDEMNED
5410Sstevel@tonic-gate 		 * and before it has removed the nce from the fastpath list
5420Sstevel@tonic-gate 		 * or called untimeout. So we need to do it here. It is safe
5430Sstevel@tonic-gate 		 * for both ndp_delete and this thread to do it twice or
5440Sstevel@tonic-gate 		 * even simultaneously since each of the threads has a
5450Sstevel@tonic-gate 		 * reference on the nce.
5460Sstevel@tonic-gate 		 */
5470Sstevel@tonic-gate 		nce_fastpath_list_delete(nce);
5480Sstevel@tonic-gate 		/*
5490Sstevel@tonic-gate 		 * Cancel any running timer. Timeout can't be restarted
5500Sstevel@tonic-gate 		 * since CONDEMNED is set. Can't hold nce_lock across untimeout.
5510Sstevel@tonic-gate 		 * Passing invalid timeout id is fine.
5520Sstevel@tonic-gate 		 */
5530Sstevel@tonic-gate 		if (nce->nce_timeout_id != 0) {
5540Sstevel@tonic-gate 			(void) untimeout(nce->nce_timeout_id);
5550Sstevel@tonic-gate 			nce->nce_timeout_id = 0;
5560Sstevel@tonic-gate 		}
5572535Ssangeeta 		/*
5582535Ssangeeta 		 * We might hit this func thus in the v4 case:
5592535Ssangeeta 		 * ipif_down->ipif_ndp_down->ndp_walk
5602535Ssangeeta 		 */
5610Sstevel@tonic-gate 
5622535Ssangeeta 		if (nce->nce_ipversion == IPV4_VERSION) {
5632535Ssangeeta 			ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE,
5648485SPeter.Memishian@Sun.COM 			    IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill);
5652535Ssangeeta 		} else {
5662535Ssangeeta 			ASSERT(nce->nce_ipversion == IPV6_VERSION);
5672535Ssangeeta 			ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE,
5688485SPeter.Memishian@Sun.COM 			    IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill);
5692535Ssangeeta 		}
5700Sstevel@tonic-gate 		NCE_REFRELE_NOTR(nce);
5710Sstevel@tonic-gate 		nce = nce_next;
5720Sstevel@tonic-gate 	}
5730Sstevel@tonic-gate }
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate /*
5760Sstevel@tonic-gate  * Delete an ire when the nce goes away.
5770Sstevel@tonic-gate  */
5780Sstevel@tonic-gate /* ARGSUSED */
5790Sstevel@tonic-gate static void
5800Sstevel@tonic-gate nce_ire_delete(nce_t *nce)
5810Sstevel@tonic-gate {
5822535Ssangeeta 	if (nce->nce_ipversion == IPV6_VERSION) {
5832535Ssangeeta 		ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE,
5842535Ssangeeta 		    nce_ire_delete1, (char *)nce, nce->nce_ill);
5852535Ssangeeta 		NCE_REFRELE_NOTR(nce);
5862535Ssangeeta 	} else {
5872535Ssangeeta 		ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE,
5882535Ssangeeta 		    nce_ire_delete1, (char *)nce, nce->nce_ill);
5892535Ssangeeta 		NCE_REFRELE_NOTR(nce);
5902535Ssangeeta 	}
5910Sstevel@tonic-gate }
5920Sstevel@tonic-gate 
5930Sstevel@tonic-gate /*
5940Sstevel@tonic-gate  * ire_walk routine used to delete every IRE that shares this nce
5950Sstevel@tonic-gate  */
5960Sstevel@tonic-gate static void
5970Sstevel@tonic-gate nce_ire_delete1(ire_t *ire, char *nce_arg)
5980Sstevel@tonic-gate {
5990Sstevel@tonic-gate 	nce_t	*nce = (nce_t *)nce_arg;
6000Sstevel@tonic-gate 
6010Sstevel@tonic-gate 	ASSERT(ire->ire_type == IRE_CACHE);
6020Sstevel@tonic-gate 
6032535Ssangeeta 	if (ire->ire_nce == nce) {
6042535Ssangeeta 		ASSERT(ire->ire_ipversion == nce->nce_ipversion);
6050Sstevel@tonic-gate 		ire_delete(ire);
6062535Ssangeeta 	}
6070Sstevel@tonic-gate }
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate /*
6102546Scarlsonj  * Restart DAD on given NCE.  Returns B_TRUE if DAD has been restarted.
6112546Scarlsonj  */
6122546Scarlsonj boolean_t
6132546Scarlsonj ndp_restart_dad(nce_t *nce)
6142546Scarlsonj {
6152546Scarlsonj 	boolean_t started;
6162546Scarlsonj 	boolean_t dropped;
6172546Scarlsonj 
6182546Scarlsonj 	if (nce == NULL)
6192546Scarlsonj 		return (B_FALSE);
6202546Scarlsonj 	mutex_enter(&nce->nce_lock);
6212546Scarlsonj 	if (nce->nce_state == ND_PROBE) {
6222546Scarlsonj 		mutex_exit(&nce->nce_lock);
6232546Scarlsonj 		started = B_TRUE;
6242546Scarlsonj 	} else if (nce->nce_state == ND_REACHABLE) {
6252546Scarlsonj 		nce->nce_state = ND_PROBE;
6262546Scarlsonj 		nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1;
6272546Scarlsonj 		mutex_exit(&nce->nce_lock);
6288485SPeter.Memishian@Sun.COM 		dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE);
6292546Scarlsonj 		if (dropped) {
6302546Scarlsonj 			mutex_enter(&nce->nce_lock);
6312546Scarlsonj 			nce->nce_pcnt++;
6322546Scarlsonj 			mutex_exit(&nce->nce_lock);
6332546Scarlsonj 		}
6342546Scarlsonj 		NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill));
6352546Scarlsonj 		started = B_TRUE;
6362546Scarlsonj 	} else {
6372546Scarlsonj 		mutex_exit(&nce->nce_lock);
6382546Scarlsonj 		started = B_FALSE;
6392546Scarlsonj 	}
6402546Scarlsonj 	return (started);
6412546Scarlsonj }
6422546Scarlsonj 
6432546Scarlsonj /*
6442535Ssangeeta  * IPv6 Cache entry lookup.  Try to find an nce matching the parameters passed.
6450Sstevel@tonic-gate  * If one is found, the refcnt on the nce will be incremented.
6460Sstevel@tonic-gate  */
6470Sstevel@tonic-gate nce_t *
6488485SPeter.Memishian@Sun.COM ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr,
6498485SPeter.Memishian@Sun.COM     boolean_t caller_holds_lock)
6500Sstevel@tonic-gate {
6510Sstevel@tonic-gate 	nce_t	*nce;
6528485SPeter.Memishian@Sun.COM 	ip_stack_t *ipst = ill->ill_ipst;
6538485SPeter.Memishian@Sun.COM 
6548485SPeter.Memishian@Sun.COM 	ASSERT(ill->ill_isv6);
6558485SPeter.Memishian@Sun.COM 	if (!caller_holds_lock)
6563448Sdh155122 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
6573448Sdh155122 
6583448Sdh155122 	/* Get head of v6 hash table */
6593448Sdh155122 	nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
6608485SPeter.Memishian@Sun.COM 	nce = nce_lookup_addr(ill, match_illgrp, addr, nce);
6610Sstevel@tonic-gate 	if (nce == NULL)
6620Sstevel@tonic-gate 		nce = nce_lookup_mapping(ill, addr);
6630Sstevel@tonic-gate 	if (!caller_holds_lock)
6643448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
6652535Ssangeeta 	return (nce);
6662535Ssangeeta }
6672535Ssangeeta /*
6682535Ssangeeta  * IPv4 Cache entry lookup.  Try to find an nce matching the parameters passed.
6692535Ssangeeta  * If one is found, the refcnt on the nce will be incremented.
6702535Ssangeeta  * Since multicast mappings are handled in arp, there are no nce_mcast_entries
6712535Ssangeeta  * so we skip the nce_lookup_mapping call.
6722535Ssangeeta  * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL
6732535Ssangeeta  */
6742535Ssangeeta nce_t *
6752535Ssangeeta ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock)
6762535Ssangeeta {
6772535Ssangeeta 	nce_t	*nce;
6782535Ssangeeta 	in6_addr_t addr6;
6793448Sdh155122 	ip_stack_t *ipst = ill->ill_ipst;
6802535Ssangeeta 
6818485SPeter.Memishian@Sun.COM 	if (!caller_holds_lock)
6823448Sdh155122 		mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
6833448Sdh155122 
6843448Sdh155122 	/* Get head of v4 hash table */
6853448Sdh155122 	nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
6862535Ssangeeta 	IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
6878485SPeter.Memishian@Sun.COM 	/*
6888485SPeter.Memishian@Sun.COM 	 * NOTE: IPv4 never matches across the illgrp since the NCE's we're
6898485SPeter.Memishian@Sun.COM 	 * looking up have fastpath headers that are inherently per-ill.
6908485SPeter.Memishian@Sun.COM 	 */
6918485SPeter.Memishian@Sun.COM 	nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce);
6922535Ssangeeta 	if (!caller_holds_lock)
6933448Sdh155122 		mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
6940Sstevel@tonic-gate 	return (nce);
6950Sstevel@tonic-gate }
6960Sstevel@tonic-gate 
6970Sstevel@tonic-gate /*
6980Sstevel@tonic-gate  * Cache entry lookup.  Try to find an nce matching the parameters passed.
6990Sstevel@tonic-gate  * Look only for exact entries (no mappings).  If an nce is found, increment
7002535Ssangeeta  * the hold count on that nce. The caller passes in the start of the
7012535Ssangeeta  * appropriate hash table, and must be holding the appropriate global
7022535Ssangeeta  * lock (ndp_g_lock).
7030Sstevel@tonic-gate  */
7040Sstevel@tonic-gate static nce_t *
7058485SPeter.Memishian@Sun.COM nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr,
7068485SPeter.Memishian@Sun.COM     nce_t *nce)
7070Sstevel@tonic-gate {
7083448Sdh155122 	ndp_g_t		*ndp;
7093448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
7103448Sdh155122 
7113448Sdh155122 	if (ill->ill_isv6)
7123448Sdh155122 		ndp = ipst->ips_ndp6;
7133448Sdh155122 	else
7143448Sdh155122 		ndp = ipst->ips_ndp4;
7150Sstevel@tonic-gate 
7162535Ssangeeta 	ASSERT(MUTEX_HELD(&ndp->ndp_g_lock));
7170Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(addr))
7180Sstevel@tonic-gate 		return (NULL);
7190Sstevel@tonic-gate 	for (; nce != NULL; nce = nce->nce_next) {
7208485SPeter.Memishian@Sun.COM 		if (nce->nce_ill == ill ||
7218485SPeter.Memishian@Sun.COM 		    match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) {
7220Sstevel@tonic-gate 			if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) &&
7230Sstevel@tonic-gate 			    IN6_ARE_ADDR_EQUAL(&nce->nce_mask,
7240Sstevel@tonic-gate 			    &ipv6_all_ones)) {
7250Sstevel@tonic-gate 				mutex_enter(&nce->nce_lock);
7260Sstevel@tonic-gate 				if (!(nce->nce_flags & NCE_F_CONDEMNED)) {
7270Sstevel@tonic-gate 					NCE_REFHOLD_LOCKED(nce);
7280Sstevel@tonic-gate 					mutex_exit(&nce->nce_lock);
7290Sstevel@tonic-gate 					break;
7300Sstevel@tonic-gate 				}
7310Sstevel@tonic-gate 				mutex_exit(&nce->nce_lock);
7320Sstevel@tonic-gate 			}
7330Sstevel@tonic-gate 		}
7340Sstevel@tonic-gate 	}
7350Sstevel@tonic-gate 	return (nce);
7360Sstevel@tonic-gate }
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate /*
7390Sstevel@tonic-gate  * Cache entry lookup.  Try to find an nce matching the parameters passed.
7400Sstevel@tonic-gate  * Look only for mappings.
7410Sstevel@tonic-gate  */
7420Sstevel@tonic-gate static nce_t *
7430Sstevel@tonic-gate nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr)
7440Sstevel@tonic-gate {
7450Sstevel@tonic-gate 	nce_t	*nce;
7463448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
7470Sstevel@tonic-gate 
7482535Ssangeeta 	ASSERT(ill != NULL && ill->ill_isv6);
7493448Sdh155122 	ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock));
7500Sstevel@tonic-gate 	if (!IN6_IS_ADDR_MULTICAST(addr))
7510Sstevel@tonic-gate 		return (NULL);
7523448Sdh155122 	nce = ipst->ips_ndp6->nce_mask_entries;
7530Sstevel@tonic-gate 	for (; nce != NULL; nce = nce->nce_next)
7540Sstevel@tonic-gate 		if (nce->nce_ill == ill &&
7550Sstevel@tonic-gate 		    (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) {
7560Sstevel@tonic-gate 			mutex_enter(&nce->nce_lock);
7570Sstevel@tonic-gate 			if (!(nce->nce_flags & NCE_F_CONDEMNED)) {
7580Sstevel@tonic-gate 				NCE_REFHOLD_LOCKED(nce);
7590Sstevel@tonic-gate 				mutex_exit(&nce->nce_lock);
7600Sstevel@tonic-gate 				break;
7610Sstevel@tonic-gate 			}
7620Sstevel@tonic-gate 			mutex_exit(&nce->nce_lock);
7630Sstevel@tonic-gate 		}
7640Sstevel@tonic-gate 	return (nce);
7650Sstevel@tonic-gate }
7660Sstevel@tonic-gate 
7670Sstevel@tonic-gate /*
7680Sstevel@tonic-gate  * Process passed in parameters either from an incoming packet or via
7690Sstevel@tonic-gate  * user ioctl.
7700Sstevel@tonic-gate  */
7718485SPeter.Memishian@Sun.COM static void
7728485SPeter.Memishian@Sun.COM nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
7730Sstevel@tonic-gate {
7740Sstevel@tonic-gate 	ill_t	*ill = nce->nce_ill;
7750Sstevel@tonic-gate 	uint32_t hw_addr_len = ill->ill_nd_lla_len;
7760Sstevel@tonic-gate 	mblk_t	*mp;
7770Sstevel@tonic-gate 	boolean_t ll_updated = B_FALSE;
7780Sstevel@tonic-gate 	boolean_t ll_changed;
7793448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
7800Sstevel@tonic-gate 
7812535Ssangeeta 	ASSERT(nce->nce_ipversion == IPV6_VERSION);
7820Sstevel@tonic-gate 	/*
7830Sstevel@tonic-gate 	 * No updates of link layer address or the neighbor state is
7840Sstevel@tonic-gate 	 * allowed, when the cache is in NONUD state.  This still
7850Sstevel@tonic-gate 	 * allows for responding to reachability solicitation.
7860Sstevel@tonic-gate 	 */
7870Sstevel@tonic-gate 	mutex_enter(&nce->nce_lock);
7880Sstevel@tonic-gate 	if (nce->nce_state == ND_INCOMPLETE) {
7890Sstevel@tonic-gate 		if (hw_addr == NULL) {
7900Sstevel@tonic-gate 			mutex_exit(&nce->nce_lock);
7910Sstevel@tonic-gate 			return;
7920Sstevel@tonic-gate 		}
7930Sstevel@tonic-gate 		nce_set_ll(nce, hw_addr);
7940Sstevel@tonic-gate 		/*
7950Sstevel@tonic-gate 		 * Update nce state and send the queued packets
7960Sstevel@tonic-gate 		 * back to ip this time ire will be added.
7970Sstevel@tonic-gate 		 */
7980Sstevel@tonic-gate 		if (flag & ND_NA_FLAG_SOLICITED) {
7990Sstevel@tonic-gate 			nce_update(nce, ND_REACHABLE, NULL);
8000Sstevel@tonic-gate 		} else {
8010Sstevel@tonic-gate 			nce_update(nce, ND_STALE, NULL);
8020Sstevel@tonic-gate 		}
8030Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
8040Sstevel@tonic-gate 		nce_fastpath(nce);
805*9175SSowmini.Varadhan@Sun.COM 		nce_cb_dispatch(nce); /* complete callbacks */
8060Sstevel@tonic-gate 		mutex_enter(&nce->nce_lock);
8070Sstevel@tonic-gate 		mp = nce->nce_qd_mp;
8080Sstevel@tonic-gate 		nce->nce_qd_mp = NULL;
8090Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
8100Sstevel@tonic-gate 		while (mp != NULL) {
8112958Sdr146992 			mblk_t *nxt_mp, *data_mp;
8120Sstevel@tonic-gate 
8130Sstevel@tonic-gate 			nxt_mp = mp->b_next;
8140Sstevel@tonic-gate 			mp->b_next = NULL;
8152958Sdr146992 
8162958Sdr146992 			if (mp->b_datap->db_type == M_CTL)
8172958Sdr146992 				data_mp = mp->b_cont;
8182958Sdr146992 			else
8192958Sdr146992 				data_mp = mp;
8202958Sdr146992 			if (data_mp->b_prev != NULL) {
8210Sstevel@tonic-gate 				ill_t   *inbound_ill;
8220Sstevel@tonic-gate 				queue_t *fwdq = NULL;
8230Sstevel@tonic-gate 				uint_t ifindex;
8240Sstevel@tonic-gate 
8252958Sdr146992 				ifindex = (uint_t)(uintptr_t)data_mp->b_prev;
8260Sstevel@tonic-gate 				inbound_ill = ill_lookup_on_ifindex(ifindex,
8273448Sdh155122 				    B_TRUE, NULL, NULL, NULL, NULL, ipst);
8280Sstevel@tonic-gate 				if (inbound_ill == NULL) {
8292958Sdr146992 					data_mp->b_prev = NULL;
8300Sstevel@tonic-gate 					freemsg(mp);
8310Sstevel@tonic-gate 					return;
8320Sstevel@tonic-gate 				} else {
8330Sstevel@tonic-gate 					fwdq = inbound_ill->ill_rq;
8340Sstevel@tonic-gate 				}
8352958Sdr146992 				data_mp->b_prev = NULL;
8360Sstevel@tonic-gate 				/*
8370Sstevel@tonic-gate 				 * Send a forwarded packet back into ip_rput_v6
8380Sstevel@tonic-gate 				 * just as in ire_send_v6().
8390Sstevel@tonic-gate 				 * Extract the queue from b_prev (set in
8400Sstevel@tonic-gate 				 * ip_rput_data_v6).
8410Sstevel@tonic-gate 				 */
8420Sstevel@tonic-gate 				if (fwdq != NULL) {
8430Sstevel@tonic-gate 					/*
8440Sstevel@tonic-gate 					 * Forwarded packets hop count will
8450Sstevel@tonic-gate 					 * get decremented in ip_rput_data_v6
8460Sstevel@tonic-gate 					 */
8472958Sdr146992 					if (data_mp != mp)
8482958Sdr146992 						freeb(mp);
8492958Sdr146992 					put(fwdq, data_mp);
8500Sstevel@tonic-gate 				} else {
8510Sstevel@tonic-gate 					/*
8520Sstevel@tonic-gate 					 * Send locally originated packets back
8538485SPeter.Memishian@Sun.COM 					 * into ip_wput_v6.
8540Sstevel@tonic-gate 					 */
8550Sstevel@tonic-gate 					put(ill->ill_wq, mp);
8560Sstevel@tonic-gate 				}
8570Sstevel@tonic-gate 				ill_refrele(inbound_ill);
8580Sstevel@tonic-gate 			} else {
8590Sstevel@tonic-gate 				put(ill->ill_wq, mp);
8600Sstevel@tonic-gate 			}
8610Sstevel@tonic-gate 			mp = nxt_mp;
8620Sstevel@tonic-gate 		}
8630Sstevel@tonic-gate 		return;
8640Sstevel@tonic-gate 	}
8652546Scarlsonj 	ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len);
8660Sstevel@tonic-gate 	if (!is_adv) {
8670Sstevel@tonic-gate 		/* If this is a SOLICITATION request only */
8680Sstevel@tonic-gate 		if (ll_changed)
8690Sstevel@tonic-gate 			nce_update(nce, ND_STALE, hw_addr);
8700Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
871*9175SSowmini.Varadhan@Sun.COM 		nce_cb_dispatch(nce);
8720Sstevel@tonic-gate 		return;
8730Sstevel@tonic-gate 	}
8740Sstevel@tonic-gate 	if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) {
8750Sstevel@tonic-gate 		/* If in any other state than REACHABLE, ignore */
8760Sstevel@tonic-gate 		if (nce->nce_state == ND_REACHABLE) {
8770Sstevel@tonic-gate 			nce_update(nce, ND_STALE, NULL);
8780Sstevel@tonic-gate 		}
8790Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
880*9175SSowmini.Varadhan@Sun.COM 		nce_cb_dispatch(nce);
8810Sstevel@tonic-gate 		return;
8820Sstevel@tonic-gate 	} else {
8830Sstevel@tonic-gate 		if (ll_changed) {
8840Sstevel@tonic-gate 			nce_update(nce, ND_UNCHANGED, hw_addr);
8850Sstevel@tonic-gate 			ll_updated = B_TRUE;
8860Sstevel@tonic-gate 		}
8870Sstevel@tonic-gate 		if (flag & ND_NA_FLAG_SOLICITED) {
8880Sstevel@tonic-gate 			nce_update(nce, ND_REACHABLE, NULL);
8890Sstevel@tonic-gate 		} else {
8900Sstevel@tonic-gate 			if (ll_updated) {
8910Sstevel@tonic-gate 				nce_update(nce, ND_STALE, NULL);
8920Sstevel@tonic-gate 			}
8930Sstevel@tonic-gate 		}
8940Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
8950Sstevel@tonic-gate 		if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags &
8960Sstevel@tonic-gate 		    NCE_F_ISROUTER)) {
8970Sstevel@tonic-gate 			ire_t *ire;
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate 			/*
9000Sstevel@tonic-gate 			 * Router turned to host.  We need to remove the
9010Sstevel@tonic-gate 			 * entry as well as any default route that may be
9020Sstevel@tonic-gate 			 * using this as a next hop.  This is required by
9030Sstevel@tonic-gate 			 * section 7.2.5 of RFC 2461.
9040Sstevel@tonic-gate 			 */
9050Sstevel@tonic-gate 			ire = ire_ftable_lookup_v6(&ipv6_all_zeros,
9060Sstevel@tonic-gate 			    &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT,
9071676Sjpk 			    nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL,
9080Sstevel@tonic-gate 			    MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW |
9093448Sdh155122 			    MATCH_IRE_DEFAULT, ipst);
9100Sstevel@tonic-gate 			if (ire != NULL) {
9113448Sdh155122 				ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst);
9120Sstevel@tonic-gate 				ire_delete(ire);
9130Sstevel@tonic-gate 				ire_refrele(ire);
9140Sstevel@tonic-gate 			}
915*9175SSowmini.Varadhan@Sun.COM 			ndp_delete(nce); /* will do nce_cb_dispatch */
916*9175SSowmini.Varadhan@Sun.COM 		} else {
917*9175SSowmini.Varadhan@Sun.COM 			nce_cb_dispatch(nce);
9180Sstevel@tonic-gate 		}
9190Sstevel@tonic-gate 	}
9200Sstevel@tonic-gate }
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate /*
9238485SPeter.Memishian@Sun.COM  * Walker state structure used by ndp_process() / ndp_process_entry().
9248485SPeter.Memishian@Sun.COM  */
9258485SPeter.Memishian@Sun.COM typedef struct ndp_process_data {
9268485SPeter.Memishian@Sun.COM 	ill_t		*np_ill; 	/* ill/illgrp to match against */
9278485SPeter.Memishian@Sun.COM 	const in6_addr_t *np_addr; 	/* IPv6 address to match */
9288485SPeter.Memishian@Sun.COM 	uchar_t		*np_hw_addr; 	/* passed to nce_process() */
9298485SPeter.Memishian@Sun.COM 	uint32_t	np_flag;	/* passed to nce_process() */
9308485SPeter.Memishian@Sun.COM 	boolean_t	np_is_adv;	/* passed to nce_process() */
9318485SPeter.Memishian@Sun.COM } ndp_process_data_t;
9328485SPeter.Memishian@Sun.COM 
9338485SPeter.Memishian@Sun.COM /*
9348485SPeter.Memishian@Sun.COM  * Walker callback used by ndp_process() for IPMP groups: calls nce_process()
9358485SPeter.Memishian@Sun.COM  * for each NCE with a matching address that's in the same IPMP group.
9368485SPeter.Memishian@Sun.COM  */
9378485SPeter.Memishian@Sun.COM static void
9388485SPeter.Memishian@Sun.COM ndp_process_entry(nce_t *nce, void *arg)
9398485SPeter.Memishian@Sun.COM {
9408485SPeter.Memishian@Sun.COM 	ndp_process_data_t *npp = arg;
9418485SPeter.Memishian@Sun.COM 
9428485SPeter.Memishian@Sun.COM 	if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) &&
9438485SPeter.Memishian@Sun.COM 	    IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) &&
9448485SPeter.Memishian@Sun.COM 	    IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) {
9458485SPeter.Memishian@Sun.COM 		nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv);
9468485SPeter.Memishian@Sun.COM 	}
9478485SPeter.Memishian@Sun.COM }
9488485SPeter.Memishian@Sun.COM 
9498485SPeter.Memishian@Sun.COM /*
9508485SPeter.Memishian@Sun.COM  * Wrapper around nce_process() that handles IPMP.  In particular, for IPMP,
9518485SPeter.Memishian@Sun.COM  * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have
9528485SPeter.Memishian@Sun.COM  * more than one NCE for a given IPv6 address to tend to.  In that case, we
9538485SPeter.Memishian@Sun.COM  * need to walk all NCEs and callback nce_process() for each one.  Since this
9548485SPeter.Memishian@Sun.COM  * is expensive, in the non-IPMP case we just directly call nce_process().
9558485SPeter.Memishian@Sun.COM  * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP
9568485SPeter.Memishian@Sun.COM  * interfaces in an IPMP group share the same NCEs -- at which point this
9578485SPeter.Memishian@Sun.COM  * function can be removed entirely.
9588485SPeter.Memishian@Sun.COM  */
9598485SPeter.Memishian@Sun.COM void
9608485SPeter.Memishian@Sun.COM ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv)
9618485SPeter.Memishian@Sun.COM {
9628485SPeter.Memishian@Sun.COM 	ill_t *ill = nce->nce_ill;
9638485SPeter.Memishian@Sun.COM 	struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6;
9648485SPeter.Memishian@Sun.COM 	ndp_process_data_t np;
9658485SPeter.Memishian@Sun.COM 
9668485SPeter.Memishian@Sun.COM 	if (ill->ill_grp == NULL) {
9678485SPeter.Memishian@Sun.COM 		nce_process(nce, hw_addr, flag, is_adv);
9688485SPeter.Memishian@Sun.COM 		return;
9698485SPeter.Memishian@Sun.COM 	}
9708485SPeter.Memishian@Sun.COM 
9718485SPeter.Memishian@Sun.COM 	/* IPMP case: walk all NCEs */
9728485SPeter.Memishian@Sun.COM 	np.np_ill = ill;
9738485SPeter.Memishian@Sun.COM 	np.np_addr = &nce->nce_addr;
9748485SPeter.Memishian@Sun.COM 	np.np_flag = flag;
9758485SPeter.Memishian@Sun.COM 	np.np_is_adv = is_adv;
9768485SPeter.Memishian@Sun.COM 	np.np_hw_addr = hw_addr;
9778485SPeter.Memishian@Sun.COM 
9788485SPeter.Memishian@Sun.COM 	ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES);
9798485SPeter.Memishian@Sun.COM }
9808485SPeter.Memishian@Sun.COM 
9818485SPeter.Memishian@Sun.COM /*
9820Sstevel@tonic-gate  * Pass arg1 to the pfi supplied, along with each nce in existence.
9830Sstevel@tonic-gate  * ndp_walk() places a REFHOLD on the nce and drops the lock when
9840Sstevel@tonic-gate  * walking the hash list.
9850Sstevel@tonic-gate  */
9860Sstevel@tonic-gate void
9872535Ssangeeta ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1,
9882535Ssangeeta     boolean_t trace)
9890Sstevel@tonic-gate {
9900Sstevel@tonic-gate 	nce_t	*nce;
9910Sstevel@tonic-gate 	nce_t	*nce1;
9920Sstevel@tonic-gate 	nce_t	**ncep;
9930Sstevel@tonic-gate 	nce_t	*free_nce_list = NULL;
9940Sstevel@tonic-gate 
9952535Ssangeeta 	mutex_enter(&ndp->ndp_g_lock);
9962535Ssangeeta 	/* Prevent ndp_delete from unlink and free of NCE */
9972535Ssangeeta 	ndp->ndp_g_walker++;
9982535Ssangeeta 	mutex_exit(&ndp->ndp_g_lock);
9992535Ssangeeta 	for (ncep = ndp->nce_hash_tbl;
10002535Ssangeeta 	    ncep < A_END(ndp->nce_hash_tbl); ncep++) {
10012535Ssangeeta 		for (nce = *ncep; nce != NULL; nce = nce1) {
10020Sstevel@tonic-gate 			nce1 = nce->nce_next;
10030Sstevel@tonic-gate 			if (ill == NULL || nce->nce_ill == ill) {
10040Sstevel@tonic-gate 				if (trace) {
10050Sstevel@tonic-gate 					NCE_REFHOLD(nce);
10060Sstevel@tonic-gate 					(*pfi)(nce, arg1);
10070Sstevel@tonic-gate 					NCE_REFRELE(nce);
10080Sstevel@tonic-gate 				} else {
10090Sstevel@tonic-gate 					NCE_REFHOLD_NOTR(nce);
10100Sstevel@tonic-gate 					(*pfi)(nce, arg1);
10110Sstevel@tonic-gate 					NCE_REFRELE_NOTR(nce);
10120Sstevel@tonic-gate 				}
10130Sstevel@tonic-gate 			}
10140Sstevel@tonic-gate 		}
10150Sstevel@tonic-gate 	}
10162535Ssangeeta 	for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) {
10170Sstevel@tonic-gate 		nce1 = nce->nce_next;
10180Sstevel@tonic-gate 		if (ill == NULL || nce->nce_ill == ill) {
10190Sstevel@tonic-gate 			if (trace) {
10200Sstevel@tonic-gate 				NCE_REFHOLD(nce);
10210Sstevel@tonic-gate 				(*pfi)(nce, arg1);
10220Sstevel@tonic-gate 				NCE_REFRELE(nce);
10230Sstevel@tonic-gate 			} else {
10240Sstevel@tonic-gate 				NCE_REFHOLD_NOTR(nce);
10250Sstevel@tonic-gate 				(*pfi)(nce, arg1);
10260Sstevel@tonic-gate 				NCE_REFRELE_NOTR(nce);
10270Sstevel@tonic-gate 			}
10280Sstevel@tonic-gate 		}
10290Sstevel@tonic-gate 	}
10302535Ssangeeta 	mutex_enter(&ndp->ndp_g_lock);
10312535Ssangeeta 	ndp->ndp_g_walker--;
10320Sstevel@tonic-gate 	/*
10330Sstevel@tonic-gate 	 * While NCE's are removed from global list they are placed
10340Sstevel@tonic-gate 	 * in a private list, to be passed to nce_ire_delete_list().
10350Sstevel@tonic-gate 	 * The reason is, there may be ires pointing to this nce
10360Sstevel@tonic-gate 	 * which needs to cleaned up.
10370Sstevel@tonic-gate 	 */
10382535Ssangeeta 	if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) {
10390Sstevel@tonic-gate 		/* Time to delete condemned entries */
10402535Ssangeeta 		for (ncep = ndp->nce_hash_tbl;
10412535Ssangeeta 		    ncep < A_END(ndp->nce_hash_tbl); ncep++) {
10420Sstevel@tonic-gate 			nce = *ncep;
10430Sstevel@tonic-gate 			if (nce != NULL) {
10442535Ssangeeta 				nce_remove(ndp, nce, &free_nce_list);
10450Sstevel@tonic-gate 			}
10460Sstevel@tonic-gate 		}
10472535Ssangeeta 		nce = ndp->nce_mask_entries;
10480Sstevel@tonic-gate 		if (nce != NULL) {
10492535Ssangeeta 			nce_remove(ndp, nce, &free_nce_list);
10500Sstevel@tonic-gate 		}
10512535Ssangeeta 		ndp->ndp_g_walker_cleanup = B_FALSE;
10520Sstevel@tonic-gate 	}
10534714Ssowmini 
10542535Ssangeeta 	mutex_exit(&ndp->ndp_g_lock);
10550Sstevel@tonic-gate 
10560Sstevel@tonic-gate 	if (free_nce_list != NULL) {
10570Sstevel@tonic-gate 		nce_ire_delete_list(free_nce_list);
10580Sstevel@tonic-gate 	}
10590Sstevel@tonic-gate }
10600Sstevel@tonic-gate 
10613448Sdh155122 /*
10623448Sdh155122  * Walk everything.
10633448Sdh155122  * Note that ill can be NULL hence can't derive the ipst from it.
10643448Sdh155122  */
10650Sstevel@tonic-gate void
10663448Sdh155122 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst)
10670Sstevel@tonic-gate {
10683448Sdh155122 	ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE);
10693448Sdh155122 	ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE);
10700Sstevel@tonic-gate }
10710Sstevel@tonic-gate 
10720Sstevel@tonic-gate /*
10730Sstevel@tonic-gate  * Process resolve requests.  Handles both mapped entries
10740Sstevel@tonic-gate  * as well as cases that needs to be send out on the wire.
10750Sstevel@tonic-gate  * Lookup a NCE for a given IRE.  Regardless of whether one exists
10760Sstevel@tonic-gate  * or one is created, we defer making ire point to nce until the
10770Sstevel@tonic-gate  * ire is actually added at which point the nce_refcnt on the nce is
10780Sstevel@tonic-gate  * incremented.  This is done primarily to have symmetry between ire_add()
10790Sstevel@tonic-gate  * and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
10800Sstevel@tonic-gate  */
10810Sstevel@tonic-gate int
10820Sstevel@tonic-gate ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid)
10830Sstevel@tonic-gate {
10848485SPeter.Memishian@Sun.COM 	nce_t		*nce, *hw_nce = NULL;
10858485SPeter.Memishian@Sun.COM 	int		err;
10868485SPeter.Memishian@Sun.COM 	ill_t		*ipmp_ill;
10878485SPeter.Memishian@Sun.COM 	uint16_t	nce_flags;
10880Sstevel@tonic-gate 	mblk_t		*mp_nce = NULL;
10893448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
10908485SPeter.Memishian@Sun.COM 	uchar_t		*hwaddr = NULL;
10910Sstevel@tonic-gate 
10922535Ssangeeta 	ASSERT(ill->ill_isv6);
10938485SPeter.Memishian@Sun.COM 
10948485SPeter.Memishian@Sun.COM 	if (IN6_IS_ADDR_MULTICAST(dst))
10958485SPeter.Memishian@Sun.COM 		return (nce_set_multicast(ill, dst));
10968485SPeter.Memishian@Sun.COM 
10978485SPeter.Memishian@Sun.COM 	nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0;
10988485SPeter.Memishian@Sun.COM 
10998485SPeter.Memishian@Sun.COM 	/*
11008485SPeter.Memishian@Sun.COM 	 * If `ill' is under IPMP, then first check to see if there's an NCE
11018485SPeter.Memishian@Sun.COM 	 * for `dst' on the IPMP meta-interface (e.g., because an application
11028485SPeter.Memishian@Sun.COM 	 * explicitly did an SIOCLIFSETND to tie a hardware address to `dst').
11038485SPeter.Memishian@Sun.COM 	 * If so, we use that hardware address when creating the NCE below.
11048485SPeter.Memishian@Sun.COM 	 * Note that we don't yet have a mechanism to remove these NCEs if the
11058485SPeter.Memishian@Sun.COM 	 * NCE for `dst' on the IPMP meta-interface is subsequently removed --
11068485SPeter.Memishian@Sun.COM 	 * but rather than build such a beast, we should fix NCEs so that they
11078485SPeter.Memishian@Sun.COM 	 * can be properly shared across an IPMP group.
11088485SPeter.Memishian@Sun.COM 	 */
11098485SPeter.Memishian@Sun.COM 	if (IS_UNDER_IPMP(ill)) {
11108485SPeter.Memishian@Sun.COM 		if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) {
11118485SPeter.Memishian@Sun.COM 			hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE);
11128485SPeter.Memishian@Sun.COM 			if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) {
11138485SPeter.Memishian@Sun.COM 				hwaddr = hw_nce->nce_res_mp->b_rptr +
11148485SPeter.Memishian@Sun.COM 				    NCE_LL_ADDR_OFFSET(ipmp_ill);
11158485SPeter.Memishian@Sun.COM 				nce_flags |= hw_nce->nce_flags;
11168485SPeter.Memishian@Sun.COM 			}
11178485SPeter.Memishian@Sun.COM 			ill_refrele(ipmp_ill);
11188485SPeter.Memishian@Sun.COM 		}
11190Sstevel@tonic-gate 	}
11208485SPeter.Memishian@Sun.COM 
11214714Ssowmini 	err = ndp_lookup_then_add_v6(ill,
11228485SPeter.Memishian@Sun.COM 	    B_FALSE,	/* NCE fastpath is per ill; don't match across group */
11238485SPeter.Memishian@Sun.COM 	    hwaddr,
11240Sstevel@tonic-gate 	    dst,
11250Sstevel@tonic-gate 	    &ipv6_all_ones,
11260Sstevel@tonic-gate 	    &ipv6_all_zeros,
11270Sstevel@tonic-gate 	    0,
11288485SPeter.Memishian@Sun.COM 	    nce_flags,
11298485SPeter.Memishian@Sun.COM 	    hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE,
11304714Ssowmini 	    &nce);
11310Sstevel@tonic-gate 
11328485SPeter.Memishian@Sun.COM 	if (hw_nce != NULL)
11338485SPeter.Memishian@Sun.COM 		NCE_REFRELE(hw_nce);
11348485SPeter.Memishian@Sun.COM 
11350Sstevel@tonic-gate 	switch (err) {
11360Sstevel@tonic-gate 	case 0:
11370Sstevel@tonic-gate 		/*
11380Sstevel@tonic-gate 		 * New cache entry was created. Make sure that the state
11390Sstevel@tonic-gate 		 * is not ND_INCOMPLETE. It can be in some other state
11400Sstevel@tonic-gate 		 * even before we send out the solicitation as we could
11410Sstevel@tonic-gate 		 * get un-solicited advertisements.
11420Sstevel@tonic-gate 		 *
11430Sstevel@tonic-gate 		 * If this is an XRESOLV interface, simply return 0,
11440Sstevel@tonic-gate 		 * since we don't want to solicit just yet.
11450Sstevel@tonic-gate 		 */
11460Sstevel@tonic-gate 		if (ill->ill_flags & ILLF_XRESOLV) {
11470Sstevel@tonic-gate 			NCE_REFRELE(nce);
11480Sstevel@tonic-gate 			return (0);
11490Sstevel@tonic-gate 		}
11508485SPeter.Memishian@Sun.COM 
11510Sstevel@tonic-gate 		mutex_enter(&nce->nce_lock);
11520Sstevel@tonic-gate 		if (nce->nce_state != ND_INCOMPLETE) {
11530Sstevel@tonic-gate 			mutex_exit(&nce->nce_lock);
11540Sstevel@tonic-gate 			NCE_REFRELE(nce);
11550Sstevel@tonic-gate 			return (0);
11560Sstevel@tonic-gate 		}
1157*9175SSowmini.Varadhan@Sun.COM 		if (nce->nce_rcnt == 0) {
1158*9175SSowmini.Varadhan@Sun.COM 			/* The caller will free mp */
1159*9175SSowmini.Varadhan@Sun.COM 			mutex_exit(&nce->nce_lock);
1160*9175SSowmini.Varadhan@Sun.COM 			ndp_delete(nce);
1161*9175SSowmini.Varadhan@Sun.COM 			NCE_REFRELE(nce);
1162*9175SSowmini.Varadhan@Sun.COM 			return (ESRCH);
1163*9175SSowmini.Varadhan@Sun.COM 		}
11643448Sdh155122 		mp_nce = ip_prepend_zoneid(mp, zoneid, ipst);
11650Sstevel@tonic-gate 		if (mp_nce == NULL) {
11660Sstevel@tonic-gate 			/* The caller will free mp */
11670Sstevel@tonic-gate 			mutex_exit(&nce->nce_lock);
11680Sstevel@tonic-gate 			ndp_delete(nce);
11690Sstevel@tonic-gate 			NCE_REFRELE(nce);
11700Sstevel@tonic-gate 			return (ENOMEM);
11710Sstevel@tonic-gate 		}
1172*9175SSowmini.Varadhan@Sun.COM 		nce_queue_mp(nce, mp_nce);
1173*9175SSowmini.Varadhan@Sun.COM 		ip_ndp_resolve(nce);
11740Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
11750Sstevel@tonic-gate 		NCE_REFRELE(nce);
11760Sstevel@tonic-gate 		return (EINPROGRESS);
11770Sstevel@tonic-gate 	case EEXIST:
11780Sstevel@tonic-gate 		/* Resolution in progress just queue the packet */
11790Sstevel@tonic-gate 		mutex_enter(&nce->nce_lock);
11800Sstevel@tonic-gate 		if (nce->nce_state == ND_INCOMPLETE) {
11813448Sdh155122 			mp_nce = ip_prepend_zoneid(mp, zoneid, ipst);
11820Sstevel@tonic-gate 			if (mp_nce == NULL) {
11830Sstevel@tonic-gate 				err = ENOMEM;
11840Sstevel@tonic-gate 			} else {
11850Sstevel@tonic-gate 				nce_queue_mp(nce, mp_nce);
11860Sstevel@tonic-gate 				err = EINPROGRESS;
11870Sstevel@tonic-gate 			}
11880Sstevel@tonic-gate 		} else {
11890Sstevel@tonic-gate 			/*
11900Sstevel@tonic-gate 			 * Any other state implies we have
11910Sstevel@tonic-gate 			 * a nce but IRE needs to be added ...
11920Sstevel@tonic-gate 			 * ire_add_v6() will take care of the
11930Sstevel@tonic-gate 			 * the case when the nce becomes CONDEMNED
11940Sstevel@tonic-gate 			 * before the ire is added to the table.
11950Sstevel@tonic-gate 			 */
11960Sstevel@tonic-gate 			err = 0;
11970Sstevel@tonic-gate 		}
11980Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
11990Sstevel@tonic-gate 		NCE_REFRELE(nce);
12000Sstevel@tonic-gate 		break;
12010Sstevel@tonic-gate 	default:
12020Sstevel@tonic-gate 		ip1dbg(("ndp_resolver: Can't create NCE %d\n", err));
12030Sstevel@tonic-gate 		break;
12040Sstevel@tonic-gate 	}
12050Sstevel@tonic-gate 	return (err);
12060Sstevel@tonic-gate }
12070Sstevel@tonic-gate 
12080Sstevel@tonic-gate /*
12090Sstevel@tonic-gate  * When there is no resolver, the link layer template is passed in
12100Sstevel@tonic-gate  * the IRE.
12110Sstevel@tonic-gate  * Lookup a NCE for a given IRE.  Regardless of whether one exists
12120Sstevel@tonic-gate  * or one is created, we defer making ire point to nce until the
12130Sstevel@tonic-gate  * ire is actually added at which point the nce_refcnt on the nce is
12140Sstevel@tonic-gate  * incremented.  This is done primarily to have symmetry between ire_add()
12150Sstevel@tonic-gate  * and ire_delete() which decrements the nce_refcnt, when an ire is deleted.
12160Sstevel@tonic-gate  */
12170Sstevel@tonic-gate int
12180Sstevel@tonic-gate ndp_noresolver(ill_t *ill, const in6_addr_t *dst)
12190Sstevel@tonic-gate {
12200Sstevel@tonic-gate 	nce_t		*nce;
12210Sstevel@tonic-gate 	int		err = 0;
12220Sstevel@tonic-gate 
12230Sstevel@tonic-gate 	ASSERT(ill != NULL);
12242535Ssangeeta 	ASSERT(ill->ill_isv6);
12250Sstevel@tonic-gate 	if (IN6_IS_ADDR_MULTICAST(dst)) {
12260Sstevel@tonic-gate 		err = nce_set_multicast(ill, dst);
12270Sstevel@tonic-gate 		return (err);
12280Sstevel@tonic-gate 	}
12290Sstevel@tonic-gate 
12304714Ssowmini 	err = ndp_lookup_then_add_v6(ill,
12318485SPeter.Memishian@Sun.COM 	    B_FALSE,	/* NCE fastpath is per ill; don't match across group */
12320Sstevel@tonic-gate 	    NULL,	/* hardware address */
12330Sstevel@tonic-gate 	    dst,
12340Sstevel@tonic-gate 	    &ipv6_all_ones,
12350Sstevel@tonic-gate 	    &ipv6_all_zeros,
12360Sstevel@tonic-gate 	    0,
12370Sstevel@tonic-gate 	    (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0,
12380Sstevel@tonic-gate 	    ND_REACHABLE,
12394714Ssowmini 	    &nce);
12400Sstevel@tonic-gate 
12410Sstevel@tonic-gate 	switch (err) {
12420Sstevel@tonic-gate 	case 0:
12430Sstevel@tonic-gate 		/*
12440Sstevel@tonic-gate 		 * Cache entry with a proper resolver cookie was
12450Sstevel@tonic-gate 		 * created.
12460Sstevel@tonic-gate 		 */
12470Sstevel@tonic-gate 		NCE_REFRELE(nce);
12480Sstevel@tonic-gate 		break;
12490Sstevel@tonic-gate 	case EEXIST:
12500Sstevel@tonic-gate 		err = 0;
12510Sstevel@tonic-gate 		NCE_REFRELE(nce);
12520Sstevel@tonic-gate 		break;
12530Sstevel@tonic-gate 	default:
12540Sstevel@tonic-gate 		ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err));
12550Sstevel@tonic-gate 		break;
12560Sstevel@tonic-gate 	}
12570Sstevel@tonic-gate 	return (err);
12580Sstevel@tonic-gate }
12590Sstevel@tonic-gate 
12600Sstevel@tonic-gate /*
12610Sstevel@tonic-gate  * For each interface an entry is added for the unspecified multicast group.
12620Sstevel@tonic-gate  * Here that mapping is used to form the multicast cache entry for a particular
12630Sstevel@tonic-gate  * multicast destination.
12640Sstevel@tonic-gate  */
12650Sstevel@tonic-gate static int
12660Sstevel@tonic-gate nce_set_multicast(ill_t *ill, const in6_addr_t *dst)
12670Sstevel@tonic-gate {
12680Sstevel@tonic-gate 	nce_t		*mnce;	/* Multicast mapping entry */
12690Sstevel@tonic-gate 	nce_t		*nce;
12700Sstevel@tonic-gate 	uchar_t		*hw_addr = NULL;
12710Sstevel@tonic-gate 	int		err = 0;
12723448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
12730Sstevel@tonic-gate 
12740Sstevel@tonic-gate 	ASSERT(ill != NULL);
12752535Ssangeeta 	ASSERT(ill->ill_isv6);
12760Sstevel@tonic-gate 	ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst)));
12770Sstevel@tonic-gate 
12783448Sdh155122 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
12793448Sdh155122 	nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst));
12808485SPeter.Memishian@Sun.COM 	nce = nce_lookup_addr(ill, B_FALSE, dst, nce);
12810Sstevel@tonic-gate 	if (nce != NULL) {
12823448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
12830Sstevel@tonic-gate 		NCE_REFRELE(nce);
12840Sstevel@tonic-gate 		return (0);
12850Sstevel@tonic-gate 	}
12860Sstevel@tonic-gate 	/* No entry, now lookup for a mapping this should never fail */
12870Sstevel@tonic-gate 	mnce = nce_lookup_mapping(ill, dst);
12880Sstevel@tonic-gate 	if (mnce == NULL) {
12890Sstevel@tonic-gate 		/* Something broken for the interface. */
12903448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
12910Sstevel@tonic-gate 		return (ESRCH);
12920Sstevel@tonic-gate 	}
12930Sstevel@tonic-gate 	ASSERT(mnce->nce_flags & NCE_F_MAPPING);
12940Sstevel@tonic-gate 	if (ill->ill_net_type == IRE_IF_RESOLVER) {
12950Sstevel@tonic-gate 		/*
12960Sstevel@tonic-gate 		 * For IRE_IF_RESOLVER a hardware mapping can be
12970Sstevel@tonic-gate 		 * generated, for IRE_IF_NORESOLVER, resolution cookie
12984714Ssowmini 		 * in the ill is copied in ndp_add_v6().
12990Sstevel@tonic-gate 		 */
13000Sstevel@tonic-gate 		hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP);
13010Sstevel@tonic-gate 		if (hw_addr == NULL) {
13023448Sdh155122 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
13030Sstevel@tonic-gate 			NCE_REFRELE(mnce);
13040Sstevel@tonic-gate 			return (ENOMEM);
13050Sstevel@tonic-gate 		}
13060Sstevel@tonic-gate 		nce_make_mapping(mnce, hw_addr, (uchar_t *)dst);
13070Sstevel@tonic-gate 	}
13080Sstevel@tonic-gate 	NCE_REFRELE(mnce);
13090Sstevel@tonic-gate 	/*
13100Sstevel@tonic-gate 	 * IRE_IF_NORESOLVER type simply copies the resolution
13110Sstevel@tonic-gate 	 * cookie passed in.  So no hw_addr is needed.
13120Sstevel@tonic-gate 	 */
13134714Ssowmini 	err = ndp_add_v6(ill,
13140Sstevel@tonic-gate 	    hw_addr,
13150Sstevel@tonic-gate 	    dst,
13160Sstevel@tonic-gate 	    &ipv6_all_ones,
13170Sstevel@tonic-gate 	    &ipv6_all_zeros,
13180Sstevel@tonic-gate 	    0,
13190Sstevel@tonic-gate 	    NCE_F_NONUD,
13200Sstevel@tonic-gate 	    ND_REACHABLE,
13214714Ssowmini 	    &nce);
13223448Sdh155122 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
13230Sstevel@tonic-gate 	if (hw_addr != NULL)
13240Sstevel@tonic-gate 		kmem_free(hw_addr, ill->ill_nd_lla_len);
13250Sstevel@tonic-gate 	if (err != 0) {
13260Sstevel@tonic-gate 		ip1dbg(("nce_set_multicast: create failed" "%d\n", err));
13270Sstevel@tonic-gate 		return (err);
13280Sstevel@tonic-gate 	}
13290Sstevel@tonic-gate 	NCE_REFRELE(nce);
13300Sstevel@tonic-gate 	return (0);
13310Sstevel@tonic-gate }
13320Sstevel@tonic-gate 
13330Sstevel@tonic-gate /*
13340Sstevel@tonic-gate  * Return the link layer address, and any flags of a nce.
13350Sstevel@tonic-gate  */
13360Sstevel@tonic-gate int
13370Sstevel@tonic-gate ndp_query(ill_t *ill, struct lif_nd_req *lnr)
13380Sstevel@tonic-gate {
13390Sstevel@tonic-gate 	nce_t		*nce;
13400Sstevel@tonic-gate 	in6_addr_t	*addr;
13410Sstevel@tonic-gate 	sin6_t		*sin6;
13420Sstevel@tonic-gate 	dl_unitdata_req_t	*dl;
13430Sstevel@tonic-gate 
13442535Ssangeeta 	ASSERT(ill != NULL && ill->ill_isv6);
13450Sstevel@tonic-gate 	sin6 = (sin6_t *)&lnr->lnr_addr;
13460Sstevel@tonic-gate 	addr =  &sin6->sin6_addr;
13470Sstevel@tonic-gate 
13488485SPeter.Memishian@Sun.COM 	/*
13498485SPeter.Memishian@Sun.COM 	 * NOTE: if the ill is an IPMP interface, then match against the whole
13508485SPeter.Memishian@Sun.COM 	 * illgrp.  This e.g. allows in.ndpd to retrieve the link layer
13518485SPeter.Memishian@Sun.COM 	 * addresses for the data addresses on an IPMP interface even though
13528485SPeter.Memishian@Sun.COM 	 * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill.
13538485SPeter.Memishian@Sun.COM 	 */
13548485SPeter.Memishian@Sun.COM 	nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE);
13550Sstevel@tonic-gate 	if (nce == NULL)
13560Sstevel@tonic-gate 		return (ESRCH);
13570Sstevel@tonic-gate 	/* If in INCOMPLETE state, no link layer address is available yet */
13580Sstevel@tonic-gate 	if (nce->nce_state == ND_INCOMPLETE)
13590Sstevel@tonic-gate 		goto done;
13600Sstevel@tonic-gate 	dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr;
13610Sstevel@tonic-gate 	if (ill->ill_flags & ILLF_XRESOLV)
13620Sstevel@tonic-gate 		lnr->lnr_hdw_len = dl->dl_dest_addr_length;
13630Sstevel@tonic-gate 	else
13640Sstevel@tonic-gate 		lnr->lnr_hdw_len = ill->ill_nd_lla_len;
13650Sstevel@tonic-gate 	ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <=
13660Sstevel@tonic-gate 	    sizeof (lnr->lnr_hdw_addr));
13670Sstevel@tonic-gate 	bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill),
13680Sstevel@tonic-gate 	    (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len);
13690Sstevel@tonic-gate 	if (nce->nce_flags & NCE_F_ISROUTER)
13700Sstevel@tonic-gate 		lnr->lnr_flags = NDF_ISROUTER_ON;
13710Sstevel@tonic-gate 	if (nce->nce_flags & NCE_F_ANYCAST)
13720Sstevel@tonic-gate 		lnr->lnr_flags |= NDF_ANYCAST_ON;
13730Sstevel@tonic-gate done:
13740Sstevel@tonic-gate 	NCE_REFRELE(nce);
13750Sstevel@tonic-gate 	return (0);
13760Sstevel@tonic-gate }
13770Sstevel@tonic-gate 
13780Sstevel@tonic-gate /*
13790Sstevel@tonic-gate  * Send Enable/Disable multicast reqs to driver.
13800Sstevel@tonic-gate  */
13810Sstevel@tonic-gate int
13820Sstevel@tonic-gate ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len,
13830Sstevel@tonic-gate     uint32_t hw_addr_offset, mblk_t *mp)
13840Sstevel@tonic-gate {
13850Sstevel@tonic-gate 	nce_t		*nce;
13860Sstevel@tonic-gate 	uchar_t		*hw_addr;
13873448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
13880Sstevel@tonic-gate 
13892535Ssangeeta 	ASSERT(ill != NULL && ill->ill_isv6);
13900Sstevel@tonic-gate 	ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
13910Sstevel@tonic-gate 	hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len);
13920Sstevel@tonic-gate 	if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) {
13930Sstevel@tonic-gate 		freemsg(mp);
13940Sstevel@tonic-gate 		return (EINVAL);
13950Sstevel@tonic-gate 	}
13963448Sdh155122 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
13970Sstevel@tonic-gate 	nce = nce_lookup_mapping(ill, addr);
13980Sstevel@tonic-gate 	if (nce == NULL) {
13993448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
14000Sstevel@tonic-gate 		freemsg(mp);
14010Sstevel@tonic-gate 		return (ESRCH);
14020Sstevel@tonic-gate 	}
14033448Sdh155122 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
14040Sstevel@tonic-gate 	/*
14050Sstevel@tonic-gate 	 * Update dl_addr_length and dl_addr_offset for primitives that
14060Sstevel@tonic-gate 	 * have physical addresses as opposed to full saps
14070Sstevel@tonic-gate 	 */
14080Sstevel@tonic-gate 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
14090Sstevel@tonic-gate 	case DL_ENABMULTI_REQ:
14100Sstevel@tonic-gate 		/* Track the state if this is the first enabmulti */
14112893Sja97890 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
14122893Sja97890 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
14130Sstevel@tonic-gate 		ip1dbg(("ndp_mcastreq: ENABMULTI\n"));
14140Sstevel@tonic-gate 		break;
14150Sstevel@tonic-gate 	case DL_DISABMULTI_REQ:
14160Sstevel@tonic-gate 		ip1dbg(("ndp_mcastreq: DISABMULTI\n"));
14170Sstevel@tonic-gate 		break;
14180Sstevel@tonic-gate 	default:
14190Sstevel@tonic-gate 		NCE_REFRELE(nce);
14200Sstevel@tonic-gate 		ip1dbg(("ndp_mcastreq: default\n"));
14210Sstevel@tonic-gate 		return (EINVAL);
14220Sstevel@tonic-gate 	}
14230Sstevel@tonic-gate 	nce_make_mapping(nce, hw_addr, (uchar_t *)addr);
14240Sstevel@tonic-gate 	NCE_REFRELE(nce);
14254360Smeem 	ill_dlpi_send(ill, mp);
14260Sstevel@tonic-gate 	return (0);
14270Sstevel@tonic-gate }
14280Sstevel@tonic-gate 
1429*9175SSowmini.Varadhan@Sun.COM 
1430*9175SSowmini.Varadhan@Sun.COM /*
1431*9175SSowmini.Varadhan@Sun.COM  * Send out a NS for resolving the ip address in nce.
1432*9175SSowmini.Varadhan@Sun.COM  */
1433*9175SSowmini.Varadhan@Sun.COM void
1434*9175SSowmini.Varadhan@Sun.COM ip_ndp_resolve(nce_t *nce)
1435*9175SSowmini.Varadhan@Sun.COM {
1436*9175SSowmini.Varadhan@Sun.COM 	in6_addr_t	sender6 = ipv6_all_zeros;
1437*9175SSowmini.Varadhan@Sun.COM 	uint32_t	ms;
1438*9175SSowmini.Varadhan@Sun.COM 	mblk_t		*mp;
1439*9175SSowmini.Varadhan@Sun.COM 	ip6_t		*ip6h;
1440*9175SSowmini.Varadhan@Sun.COM 
1441*9175SSowmini.Varadhan@Sun.COM 	ASSERT(MUTEX_HELD(&nce->nce_lock));
1442*9175SSowmini.Varadhan@Sun.COM 	/*
1443*9175SSowmini.Varadhan@Sun.COM 	 * Pick the src from outgoing packet, if one is available.
1444*9175SSowmini.Varadhan@Sun.COM 	 * Otherwise let nce_xmit figure out the src.
1445*9175SSowmini.Varadhan@Sun.COM 	 */
1446*9175SSowmini.Varadhan@Sun.COM 	if ((mp = nce->nce_qd_mp) != NULL) {
1447*9175SSowmini.Varadhan@Sun.COM 		/* Handle ip_newroute_v6 giving us IPSEC packets */
1448*9175SSowmini.Varadhan@Sun.COM 		if (mp->b_datap->db_type == M_CTL)
1449*9175SSowmini.Varadhan@Sun.COM 			mp = mp->b_cont;
1450*9175SSowmini.Varadhan@Sun.COM 		ip6h = (ip6_t *)mp->b_rptr;
1451*9175SSowmini.Varadhan@Sun.COM 		if (ip6h->ip6_nxt == IPPROTO_RAW) {
1452*9175SSowmini.Varadhan@Sun.COM 			/*
1453*9175SSowmini.Varadhan@Sun.COM 			 * This message should have been pulled up already in
1454*9175SSowmini.Varadhan@Sun.COM 			 * ip_wput_v6. We can't do pullups here because
1455*9175SSowmini.Varadhan@Sun.COM 			 * the message could be from the nce_qd_mp which could
1456*9175SSowmini.Varadhan@Sun.COM 			 * have b_next/b_prev non-NULL.
1457*9175SSowmini.Varadhan@Sun.COM 			 */
1458*9175SSowmini.Varadhan@Sun.COM 			ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN);
1459*9175SSowmini.Varadhan@Sun.COM 			ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t));
1460*9175SSowmini.Varadhan@Sun.COM 		}
1461*9175SSowmini.Varadhan@Sun.COM 		sender6 = ip6h->ip6_src;
1462*9175SSowmini.Varadhan@Sun.COM 	}
1463*9175SSowmini.Varadhan@Sun.COM 	ms = nce_solicit(nce, sender6);
1464*9175SSowmini.Varadhan@Sun.COM 	mutex_exit(&nce->nce_lock);
1465*9175SSowmini.Varadhan@Sun.COM 	if (ms == 0) {
1466*9175SSowmini.Varadhan@Sun.COM 		if (nce->nce_state != ND_REACHABLE) {
1467*9175SSowmini.Varadhan@Sun.COM 			nce_resolv_failed(nce);
1468*9175SSowmini.Varadhan@Sun.COM 			ndp_delete(nce);
1469*9175SSowmini.Varadhan@Sun.COM 		}
1470*9175SSowmini.Varadhan@Sun.COM 	} else {
1471*9175SSowmini.Varadhan@Sun.COM 		NDP_RESTART_TIMER(nce, (clock_t)ms);
1472*9175SSowmini.Varadhan@Sun.COM 	}
1473*9175SSowmini.Varadhan@Sun.COM 	mutex_enter(&nce->nce_lock);
1474*9175SSowmini.Varadhan@Sun.COM }
1475*9175SSowmini.Varadhan@Sun.COM 
14760Sstevel@tonic-gate /*
14770Sstevel@tonic-gate  * Send a neighbor solicitation.
14780Sstevel@tonic-gate  * Returns number of milliseconds after which we should either rexmit or abort.
14790Sstevel@tonic-gate  * Return of zero means we should abort.
14800Sstevel@tonic-gate  * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt.
14810Sstevel@tonic-gate  *
14820Sstevel@tonic-gate  * NOTE: This routine drops nce_lock (and later reacquires it) when sending
14830Sstevel@tonic-gate  * the packet.
14840Sstevel@tonic-gate  */
14850Sstevel@tonic-gate uint32_t
1486*9175SSowmini.Varadhan@Sun.COM nce_solicit(nce_t *nce, in6_addr_t sender)
14870Sstevel@tonic-gate {
14888485SPeter.Memishian@Sun.COM 	boolean_t	dropped;
14898485SPeter.Memishian@Sun.COM 
1490*9175SSowmini.Varadhan@Sun.COM 	ASSERT(nce->nce_ipversion == IPV6_VERSION);
14910Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&nce->nce_lock));
14928485SPeter.Memishian@Sun.COM 
14938485SPeter.Memishian@Sun.COM 	if (nce->nce_rcnt == 0)
14940Sstevel@tonic-gate 		return (0);
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate 	nce->nce_rcnt--;
14970Sstevel@tonic-gate 	mutex_exit(&nce->nce_lock);
14988485SPeter.Memishian@Sun.COM 	dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0);
14990Sstevel@tonic-gate 	mutex_enter(&nce->nce_lock);
15000Sstevel@tonic-gate 	if (dropped)
15010Sstevel@tonic-gate 		nce->nce_rcnt++;
15028485SPeter.Memishian@Sun.COM 	return (nce->nce_ill->ill_reachable_retrans_time);
15030Sstevel@tonic-gate }
15040Sstevel@tonic-gate 
15052546Scarlsonj /*
15062546Scarlsonj  * Attempt to recover an address on an interface that's been marked as a
15072546Scarlsonj  * duplicate.  Because NCEs are destroyed when the interface goes down, there's
15082546Scarlsonj  * no easy way to just probe the address and have the right thing happen if
15092546Scarlsonj  * it's no longer in use.  Instead, we just bring it up normally and allow the
15102546Scarlsonj  * regular interface start-up logic to probe for a remaining duplicate and take
15112546Scarlsonj  * us back down if necessary.
15122546Scarlsonj  * Neither DHCP nor temporary addresses arrive here; they're excluded by
15132546Scarlsonj  * ip_ndp_excl.
15142546Scarlsonj  */
15152546Scarlsonj /* ARGSUSED */
15162546Scarlsonj static void
15172546Scarlsonj ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
15182546Scarlsonj {
15192546Scarlsonj 	ill_t	*ill = rq->q_ptr;
15202546Scarlsonj 	ipif_t	*ipif;
15212546Scarlsonj 	in6_addr_t *addr = (in6_addr_t *)mp->b_rptr;
15222546Scarlsonj 
15232546Scarlsonj 	for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) {
15242546Scarlsonj 		/*
15252546Scarlsonj 		 * We do not support recovery of proxy ARP'd interfaces,
15262546Scarlsonj 		 * because the system lacks a complete proxy ARP mechanism.
15272546Scarlsonj 		 */
15282546Scarlsonj 		if ((ipif->ipif_flags & IPIF_POINTOPOINT) ||
15292546Scarlsonj 		    !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) {
15302546Scarlsonj 			continue;
15312546Scarlsonj 		}
15322546Scarlsonj 
15332546Scarlsonj 		/*
15343322Scarlsonj 		 * If we have already recovered or if the interface is going
15353322Scarlsonj 		 * away, then ignore.
15362546Scarlsonj 		 */
15372546Scarlsonj 		mutex_enter(&ill->ill_lock);
15383322Scarlsonj 		if (!(ipif->ipif_flags & IPIF_DUPLICATE) ||
15398485SPeter.Memishian@Sun.COM 		    (ipif->ipif_state_flags & IPIF_CONDEMNED)) {
15402546Scarlsonj 			mutex_exit(&ill->ill_lock);
15412546Scarlsonj 			continue;
15422546Scarlsonj 		}
15432546Scarlsonj 
15442546Scarlsonj 		ipif->ipif_flags &= ~IPIF_DUPLICATE;
15452546Scarlsonj 		ill->ill_ipif_dup_count--;
15462546Scarlsonj 		mutex_exit(&ill->ill_lock);
15472546Scarlsonj 		ipif->ipif_was_dup = B_TRUE;
15482546Scarlsonj 
15498485SPeter.Memishian@Sun.COM 		VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS);
15508485SPeter.Memishian@Sun.COM 		(void) ipif_up_done_v6(ipif);
15512546Scarlsonj 	}
15522546Scarlsonj 	freeb(mp);
15532546Scarlsonj }
15542546Scarlsonj 
15552546Scarlsonj /*
15562546Scarlsonj  * Attempt to recover an IPv6 interface that's been shut down as a duplicate.
15572546Scarlsonj  * As long as someone else holds the address, the interface will stay down.
15582546Scarlsonj  * When that conflict goes away, the interface is brought back up.  This is
15592546Scarlsonj  * done so that accidental shutdowns of addresses aren't made permanent.  Your
15602546Scarlsonj  * server will recover from a failure.
15612546Scarlsonj  *
15622546Scarlsonj  * For DHCP and temporary addresses, recovery is not done in the kernel.
15632546Scarlsonj  * Instead, it's handled by user space processes (dhcpagent and in.ndpd).
15642546Scarlsonj  *
15652546Scarlsonj  * This function is entered on a timer expiry; the ID is in ipif_recovery_id.
15662546Scarlsonj  */
15672546Scarlsonj static void
15682546Scarlsonj ipif6_dup_recovery(void *arg)
15692546Scarlsonj {
15702546Scarlsonj 	ipif_t *ipif = arg;
15712546Scarlsonj 
15722546Scarlsonj 	ipif->ipif_recovery_id = 0;
15732546Scarlsonj 	if (!(ipif->ipif_flags & IPIF_DUPLICATE))
15742546Scarlsonj 		return;
15752546Scarlsonj 
15763322Scarlsonj 	/*
15773322Scarlsonj 	 * No lock, because this is just an optimization.
15783322Scarlsonj 	 */
15798485SPeter.Memishian@Sun.COM 	if (ipif->ipif_state_flags & IPIF_CONDEMNED)
15803322Scarlsonj 		return;
15813322Scarlsonj 
15822546Scarlsonj 	/* If the link is down, we'll retry this later */
15832546Scarlsonj 	if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING))
15842546Scarlsonj 		return;
15852546Scarlsonj 
15862546Scarlsonj 	ndp_do_recovery(ipif);
15872546Scarlsonj }
15882546Scarlsonj 
15892546Scarlsonj /*
15902546Scarlsonj  * Perform interface recovery by forcing the duplicate interfaces up and
15912546Scarlsonj  * allowing the system to determine which ones should stay up.
15922546Scarlsonj  *
15932546Scarlsonj  * Called both by recovery timer expiry and link-up notification.
15942546Scarlsonj  */
15950Sstevel@tonic-gate void
15962546Scarlsonj ndp_do_recovery(ipif_t *ipif)
15972546Scarlsonj {
15982546Scarlsonj 	ill_t *ill = ipif->ipif_ill;
15992546Scarlsonj 	mblk_t *mp;
16003448Sdh155122 	ip_stack_t *ipst = ill->ill_ipst;
16012546Scarlsonj 
16022546Scarlsonj 	mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED);
16032546Scarlsonj 	if (mp == NULL) {
16043322Scarlsonj 		mutex_enter(&ill->ill_lock);
16053322Scarlsonj 		if (ipif->ipif_recovery_id == 0 &&
16068485SPeter.Memishian@Sun.COM 		    !(ipif->ipif_state_flags & IPIF_CONDEMNED)) {
16073322Scarlsonj 			ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
16083448Sdh155122 			    ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
16093322Scarlsonj 		}
16103322Scarlsonj 		mutex_exit(&ill->ill_lock);
16112546Scarlsonj 	} else {
16128485SPeter.Memishian@Sun.COM 		/*
16138485SPeter.Memishian@Sun.COM 		 * A recovery timer may still be running if we got here from
16148485SPeter.Memishian@Sun.COM 		 * ill_restart_dad(); cancel that timer.
16158485SPeter.Memishian@Sun.COM 		 */
16168485SPeter.Memishian@Sun.COM 		if (ipif->ipif_recovery_id != 0)
16178485SPeter.Memishian@Sun.COM 			(void) untimeout(ipif->ipif_recovery_id);
16188485SPeter.Memishian@Sun.COM 		ipif->ipif_recovery_id = 0;
16198485SPeter.Memishian@Sun.COM 
16202546Scarlsonj 		bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr,
16212546Scarlsonj 		    sizeof (ipif->ipif_v6lcl_addr));
16222546Scarlsonj 		ill_refhold(ill);
16234360Smeem 		qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP,
16244360Smeem 		    B_FALSE);
16252546Scarlsonj 	}
16262546Scarlsonj }
16272546Scarlsonj 
16282546Scarlsonj /*
16298485SPeter.Memishian@Sun.COM  * Find the MAC and IP addresses in an NA/NS message.
16302546Scarlsonj  */
16318485SPeter.Memishian@Sun.COM static void
16328485SPeter.Memishian@Sun.COM ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp,
16338485SPeter.Memishian@Sun.COM     uchar_t **haddr, uint_t *haddrlenp)
16342546Scarlsonj {
16358485SPeter.Memishian@Sun.COM 	ip6_t *ip6h = (ip6_t *)mp->b_rptr;
16368485SPeter.Memishian@Sun.COM 	icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
16378485SPeter.Memishian@Sun.COM 	nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6;
16388485SPeter.Memishian@Sun.COM 	nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
16392546Scarlsonj 	uchar_t *addr;
16408485SPeter.Memishian@Sun.COM 	int alen = 0;
16418485SPeter.Memishian@Sun.COM 
16422546Scarlsonj 	if (dl_mp == NULL) {
16438512SPeter.Memishian@Sun.COM 		nd_opt_hdr_t *opt = NULL;
16448485SPeter.Memishian@Sun.COM 		int len;
16452546Scarlsonj 
16462546Scarlsonj 		/*
16472546Scarlsonj 		 * If it's from the fast-path, then it can't be a probe
16488485SPeter.Memishian@Sun.COM 		 * message, and thus must include a linkaddr option.
16492546Scarlsonj 		 * Extract that here.
16502546Scarlsonj 		 */
16518485SPeter.Memishian@Sun.COM 		switch (icmp6->icmp6_type) {
16528485SPeter.Memishian@Sun.COM 		case ND_NEIGHBOR_SOLICIT:
16538485SPeter.Memishian@Sun.COM 			len = mp->b_wptr - (uchar_t *)ns;
16548485SPeter.Memishian@Sun.COM 			if ((len -= sizeof (*ns)) > 0) {
16558485SPeter.Memishian@Sun.COM 				opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1),
16568485SPeter.Memishian@Sun.COM 				    len, ND_OPT_SOURCE_LINKADDR);
16572546Scarlsonj 			}
16588485SPeter.Memishian@Sun.COM 			break;
16598485SPeter.Memishian@Sun.COM 		case ND_NEIGHBOR_ADVERT:
16608485SPeter.Memishian@Sun.COM 			len = mp->b_wptr - (uchar_t *)na;
16618485SPeter.Memishian@Sun.COM 			if ((len -= sizeof (*na)) > 0) {
16628485SPeter.Memishian@Sun.COM 				opt = ndp_get_option((nd_opt_hdr_t *)(na + 1),
16638485SPeter.Memishian@Sun.COM 				    len, ND_OPT_TARGET_LINKADDR);
16648485SPeter.Memishian@Sun.COM 			}
16658485SPeter.Memishian@Sun.COM 			break;
16662546Scarlsonj 		}
16678485SPeter.Memishian@Sun.COM 
16688485SPeter.Memishian@Sun.COM 		if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >=
16698485SPeter.Memishian@Sun.COM 		    ill->ill_nd_lla_len) {
16708485SPeter.Memishian@Sun.COM 			addr = (uchar_t *)(opt + 1);
16718485SPeter.Memishian@Sun.COM 			alen = ill->ill_nd_lla_len;
16728485SPeter.Memishian@Sun.COM 		}
16738485SPeter.Memishian@Sun.COM 
16742546Scarlsonj 		/*
16752546Scarlsonj 		 * We cheat a bit here for the sake of printing usable log
16762546Scarlsonj 		 * messages in the rare case where the reply we got was unicast
16772546Scarlsonj 		 * without a source linkaddr option, and the interface is in
16782546Scarlsonj 		 * fastpath mode.  (Sigh.)
16792546Scarlsonj 		 */
16802546Scarlsonj 		if (alen == 0 && ill->ill_type == IFT_ETHER &&
16812546Scarlsonj 		    MBLKHEAD(mp) >= sizeof (struct ether_header)) {
16822546Scarlsonj 			struct ether_header *pether;
16832546Scarlsonj 
16842546Scarlsonj 			pether = (struct ether_header *)((char *)ip6h -
16852546Scarlsonj 			    sizeof (*pether));
16862546Scarlsonj 			addr = pether->ether_shost.ether_addr_octet;
16872546Scarlsonj 			alen = ETHERADDRL;
16882546Scarlsonj 		}
16892546Scarlsonj 	} else {
16902546Scarlsonj 		dl_unitdata_ind_t *dlu;
16912546Scarlsonj 
16922546Scarlsonj 		dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr;
16932546Scarlsonj 		alen = dlu->dl_src_addr_length;
16942546Scarlsonj 		if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) &&
16952546Scarlsonj 		    dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) {
16962546Scarlsonj 			addr = dl_mp->b_rptr + dlu->dl_src_addr_offset;
16972546Scarlsonj 			if (ill->ill_sap_length < 0) {
16982546Scarlsonj 				alen += ill->ill_sap_length;
16992546Scarlsonj 			} else {
17002546Scarlsonj 				addr += ill->ill_sap_length;
17012546Scarlsonj 				alen -= ill->ill_sap_length;
17022546Scarlsonj 			}
17032546Scarlsonj 		}
17042546Scarlsonj 	}
17058485SPeter.Memishian@Sun.COM 
17062546Scarlsonj 	if (alen > 0) {
17072546Scarlsonj 		*haddr = addr;
17088485SPeter.Memishian@Sun.COM 		*haddrlenp = alen;
17092546Scarlsonj 	} else {
17102546Scarlsonj 		*haddr = NULL;
17118485SPeter.Memishian@Sun.COM 		*haddrlenp = 0;
17122546Scarlsonj 	}
17138485SPeter.Memishian@Sun.COM 
17148485SPeter.Memishian@Sun.COM 	/* nd_ns_target and nd_na_target are at the same offset, so we cheat */
17158485SPeter.Memishian@Sun.COM 	*targp = ns->nd_ns_target;
17162546Scarlsonj }
17172546Scarlsonj 
17182546Scarlsonj /*
17192546Scarlsonj  * This is for exclusive changes due to NDP duplicate address detection
17202546Scarlsonj  * failure.
17212546Scarlsonj  */
17222546Scarlsonj /* ARGSUSED */
17232546Scarlsonj static void
17242546Scarlsonj ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg)
17252546Scarlsonj {
17262546Scarlsonj 	ill_t	*ill = rq->q_ptr;
17272546Scarlsonj 	ipif_t	*ipif;
17288485SPeter.Memishian@Sun.COM 	mblk_t	*dl_mp = NULL;
17298485SPeter.Memishian@Sun.COM 	uchar_t	*haddr;
17308485SPeter.Memishian@Sun.COM 	uint_t	haddrlen;
17313448Sdh155122 	ip_stack_t *ipst = ill->ill_ipst;
17328485SPeter.Memishian@Sun.COM 	in6_addr_t targ;
17332546Scarlsonj 
17342546Scarlsonj 	if (DB_TYPE(mp) != M_DATA) {
17352546Scarlsonj 		dl_mp = mp;
17362546Scarlsonj 		mp = mp->b_cont;
17372546Scarlsonj 	}
17388485SPeter.Memishian@Sun.COM 
17398485SPeter.Memishian@Sun.COM 	ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen);
17408485SPeter.Memishian@Sun.COM 	if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) {
17412546Scarlsonj 		/*
17428485SPeter.Memishian@Sun.COM 		 * Ignore conflicts generated by misbehaving switches that
17438485SPeter.Memishian@Sun.COM 		 * just reflect our own messages back to us.  For IPMP, we may
17448485SPeter.Memishian@Sun.COM 		 * see reflections across any ill in the illgrp.
17452546Scarlsonj 		 */
17468485SPeter.Memishian@Sun.COM 		if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 ||
17478485SPeter.Memishian@Sun.COM 		    IS_UNDER_IPMP(ill) &&
17488485SPeter.Memishian@Sun.COM 		    ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL)
17498485SPeter.Memishian@Sun.COM 			goto ignore_conflict;
17508485SPeter.Memishian@Sun.COM 	}
17518485SPeter.Memishian@Sun.COM 
17528485SPeter.Memishian@Sun.COM 	/*
17538485SPeter.Memishian@Sun.COM 	 * Look up the appropriate ipif.
17548485SPeter.Memishian@Sun.COM 	 */
17558485SPeter.Memishian@Sun.COM 	ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL,
17568485SPeter.Memishian@Sun.COM 	    NULL, ipst);
17578485SPeter.Memishian@Sun.COM 	if (ipif == NULL)
17588485SPeter.Memishian@Sun.COM 		goto ignore_conflict;
17598485SPeter.Memishian@Sun.COM 
17608485SPeter.Memishian@Sun.COM 	/* Reload the ill to match the ipif */
17618485SPeter.Memishian@Sun.COM 	ill = ipif->ipif_ill;
17628485SPeter.Memishian@Sun.COM 
17638485SPeter.Memishian@Sun.COM 	/* If it's already duplicate or ineligible, then don't do anything. */
17648485SPeter.Memishian@Sun.COM 	if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) {
17658485SPeter.Memishian@Sun.COM 		ipif_refrele(ipif);
17662546Scarlsonj 		goto ignore_conflict;
17672546Scarlsonj 	}
17684972Smeem 
17698485SPeter.Memishian@Sun.COM 	/*
17708485SPeter.Memishian@Sun.COM 	 * If this is a failure during duplicate recovery, then don't
17718485SPeter.Memishian@Sun.COM 	 * complain.  It may take a long time to recover.
17728485SPeter.Memishian@Sun.COM 	 */
17738485SPeter.Memishian@Sun.COM 	if (!ipif->ipif_was_dup) {
17748485SPeter.Memishian@Sun.COM 		char ibuf[LIFNAMSIZ];
17758485SPeter.Memishian@Sun.COM 		char hbuf[MAC_STR_LEN];
17768485SPeter.Memishian@Sun.COM 		char sbuf[INET6_ADDRSTRLEN];
17778485SPeter.Memishian@Sun.COM 
17788485SPeter.Memishian@Sun.COM 		ipif_get_name(ipif, ibuf, sizeof (ibuf));
17798485SPeter.Memishian@Sun.COM 		cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);"
17808485SPeter.Memishian@Sun.COM 		    " disabled", ibuf,
17818485SPeter.Memishian@Sun.COM 		    inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)),
17828485SPeter.Memishian@Sun.COM 		    mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)));
17832546Scarlsonj 	}
17848485SPeter.Memishian@Sun.COM 	mutex_enter(&ill->ill_lock);
17858485SPeter.Memishian@Sun.COM 	ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE));
17868485SPeter.Memishian@Sun.COM 	ipif->ipif_flags |= IPIF_DUPLICATE;
17878485SPeter.Memishian@Sun.COM 	ill->ill_ipif_dup_count++;
17888485SPeter.Memishian@Sun.COM 	mutex_exit(&ill->ill_lock);
17898485SPeter.Memishian@Sun.COM 	(void) ipif_down(ipif, NULL, NULL);
17908485SPeter.Memishian@Sun.COM 	ipif_down_tail(ipif);
17918485SPeter.Memishian@Sun.COM 	mutex_enter(&ill->ill_lock);
17928485SPeter.Memishian@Sun.COM 	if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) &&
17938485SPeter.Memishian@Sun.COM 	    ill->ill_net_type == IRE_IF_RESOLVER &&
17948485SPeter.Memishian@Sun.COM 	    !(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
17958485SPeter.Memishian@Sun.COM 	    ipst->ips_ip_dup_recovery > 0) {
17968485SPeter.Memishian@Sun.COM 		ASSERT(ipif->ipif_recovery_id == 0);
17978485SPeter.Memishian@Sun.COM 		ipif->ipif_recovery_id = timeout(ipif6_dup_recovery,
17988485SPeter.Memishian@Sun.COM 		    ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery));
17998485SPeter.Memishian@Sun.COM 	}
18008485SPeter.Memishian@Sun.COM 	mutex_exit(&ill->ill_lock);
18018485SPeter.Memishian@Sun.COM 	ipif_refrele(ipif);
18022546Scarlsonj ignore_conflict:
18032546Scarlsonj 	if (dl_mp != NULL)
18042546Scarlsonj 		freeb(dl_mp);
18052546Scarlsonj 	freemsg(mp);
18062546Scarlsonj }
18072546Scarlsonj 
18082546Scarlsonj /*
18092546Scarlsonj  * Handle failure by tearing down the ipifs with the specified address.  Note
18102546Scarlsonj  * that tearing down the ipif also means deleting the nce through ipif_down, so
18112546Scarlsonj  * it's not possible to do recovery by just restarting the nce timer.  Instead,
18122546Scarlsonj  * we start a timer on the ipif.
18132546Scarlsonj  */
18142546Scarlsonj static void
18158485SPeter.Memishian@Sun.COM ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
18162546Scarlsonj {
18172546Scarlsonj 	if ((mp = copymsg(mp)) != NULL) {
18182546Scarlsonj 		if (dl_mp == NULL)
18192546Scarlsonj 			dl_mp = mp;
18202546Scarlsonj 		else if ((dl_mp = copyb(dl_mp)) != NULL)
18212546Scarlsonj 			dl_mp->b_cont = mp;
18222546Scarlsonj 		if (dl_mp == NULL) {
18232546Scarlsonj 			freemsg(mp);
18242546Scarlsonj 		} else {
18252546Scarlsonj 			ill_refhold(ill);
18264360Smeem 			qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP,
18274360Smeem 			    B_FALSE);
18282546Scarlsonj 		}
18292546Scarlsonj 	}
18302546Scarlsonj }
18312546Scarlsonj 
18322546Scarlsonj /*
18332546Scarlsonj  * Handle a discovered conflict: some other system is advertising that it owns
18342546Scarlsonj  * one of our IP addresses.  We need to defend ourselves, or just shut down the
18352546Scarlsonj  * interface.
18362546Scarlsonj  */
18372546Scarlsonj static void
18382546Scarlsonj ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce)
18392546Scarlsonj {
18402546Scarlsonj 	ipif_t *ipif;
18412546Scarlsonj 	uint32_t now;
18422546Scarlsonj 	uint_t maxdefense;
18432546Scarlsonj 	uint_t defs;
18443448Sdh155122 	ip_stack_t *ipst = ill->ill_ipst;
18452546Scarlsonj 
18462546Scarlsonj 	ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL,
18473448Sdh155122 	    NULL, NULL, ipst);
18482546Scarlsonj 	if (ipif == NULL)
18492546Scarlsonj 		return;
18508485SPeter.Memishian@Sun.COM 
18512546Scarlsonj 	/*
18522546Scarlsonj 	 * First, figure out if this address is disposable.
18532546Scarlsonj 	 */
18542546Scarlsonj 	if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY))
18553448Sdh155122 		maxdefense = ipst->ips_ip_max_temp_defend;
18562546Scarlsonj 	else
18573448Sdh155122 		maxdefense = ipst->ips_ip_max_defend;
18582546Scarlsonj 
18592546Scarlsonj 	/*
18602546Scarlsonj 	 * Now figure out how many times we've defended ourselves.  Ignore
18612546Scarlsonj 	 * defenses that happened long in the past.
18622546Scarlsonj 	 */
18632546Scarlsonj 	now = gethrestime_sec();
18642546Scarlsonj 	mutex_enter(&nce->nce_lock);
18652546Scarlsonj 	if ((defs = nce->nce_defense_count) > 0 &&
18663448Sdh155122 	    now - nce->nce_defense_time > ipst->ips_ip_defend_interval) {
18672546Scarlsonj 		nce->nce_defense_count = defs = 0;
18682546Scarlsonj 	}
18692546Scarlsonj 	nce->nce_defense_count++;
18702546Scarlsonj 	nce->nce_defense_time = now;
18712546Scarlsonj 	mutex_exit(&nce->nce_lock);
18722546Scarlsonj 	ipif_refrele(ipif);
18732546Scarlsonj 
18742546Scarlsonj 	/*
18752546Scarlsonj 	 * If we've defended ourselves too many times already, then give up and
18762546Scarlsonj 	 * tear down the interface(s) using this address.  Otherwise, defend by
18772546Scarlsonj 	 * sending out an unsolicited Neighbor Advertisement.
18782546Scarlsonj 	 */
18792546Scarlsonj 	if (defs >= maxdefense) {
18808485SPeter.Memishian@Sun.COM 		ip_ndp_failure(ill, mp, dl_mp);
18812546Scarlsonj 	} else {
18822546Scarlsonj 		char hbuf[MAC_STR_LEN];
18832546Scarlsonj 		char sbuf[INET6_ADDRSTRLEN];
18842546Scarlsonj 		uchar_t *haddr;
18858485SPeter.Memishian@Sun.COM 		uint_t haddrlen;
18868485SPeter.Memishian@Sun.COM 		in6_addr_t targ;
18878485SPeter.Memishian@Sun.COM 
18888485SPeter.Memishian@Sun.COM 		ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen);
18892546Scarlsonj 		cmn_err(CE_WARN, "node %s is using our IP address %s on %s",
18908485SPeter.Memishian@Sun.COM 		    mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)),
18918485SPeter.Memishian@Sun.COM 		    inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)),
18928485SPeter.Memishian@Sun.COM 		    ill->ill_name);
18938485SPeter.Memishian@Sun.COM 
18948485SPeter.Memishian@Sun.COM 		(void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0);
18952546Scarlsonj 	}
18962546Scarlsonj }
18972546Scarlsonj 
18982546Scarlsonj static void
18992546Scarlsonj ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
19000Sstevel@tonic-gate {
19010Sstevel@tonic-gate 	nd_neighbor_solicit_t *ns;
19020Sstevel@tonic-gate 	uint32_t	hlen = ill->ill_nd_lla_len;
19030Sstevel@tonic-gate 	uchar_t		*haddr = NULL;
19040Sstevel@tonic-gate 	icmp6_t		*icmp_nd;
19050Sstevel@tonic-gate 	ip6_t		*ip6h;
19060Sstevel@tonic-gate 	nce_t		*our_nce = NULL;
19070Sstevel@tonic-gate 	in6_addr_t	target;
19080Sstevel@tonic-gate 	in6_addr_t	src;
19090Sstevel@tonic-gate 	int		len;
19100Sstevel@tonic-gate 	int		flag = 0;
19110Sstevel@tonic-gate 	nd_opt_hdr_t	*opt = NULL;
19120Sstevel@tonic-gate 	boolean_t	bad_solicit = B_FALSE;
19130Sstevel@tonic-gate 	mib2_ipv6IfIcmpEntry_t	*mib = ill->ill_icmp6_mib;
19140Sstevel@tonic-gate 
19150Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
19160Sstevel@tonic-gate 	icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
19170Sstevel@tonic-gate 	len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
19180Sstevel@tonic-gate 	src = ip6h->ip6_src;
19190Sstevel@tonic-gate 	ns = (nd_neighbor_solicit_t *)icmp_nd;
19200Sstevel@tonic-gate 	target = ns->nd_ns_target;
19210Sstevel@tonic-gate 	if (IN6_IS_ADDR_MULTICAST(&target)) {
19220Sstevel@tonic-gate 		if (ip_debug > 2) {
19230Sstevel@tonic-gate 			/* ip1dbg */
19240Sstevel@tonic-gate 			pr_addr_dbg("ndp_input_solicit: Target is"
19250Sstevel@tonic-gate 			    " multicast! %s\n", AF_INET6, &target);
19260Sstevel@tonic-gate 		}
19270Sstevel@tonic-gate 		bad_solicit = B_TRUE;
19280Sstevel@tonic-gate 		goto done;
19290Sstevel@tonic-gate 	}
19300Sstevel@tonic-gate 	if (len > sizeof (nd_neighbor_solicit_t)) {
19310Sstevel@tonic-gate 		/* Options present */
19320Sstevel@tonic-gate 		opt = (nd_opt_hdr_t *)&ns[1];
19330Sstevel@tonic-gate 		len -= sizeof (nd_neighbor_solicit_t);
19340Sstevel@tonic-gate 		if (!ndp_verify_optlen(opt, len)) {
19350Sstevel@tonic-gate 			ip1dbg(("ndp_input_solicit: Bad opt len\n"));
19360Sstevel@tonic-gate 			bad_solicit = B_TRUE;
19370Sstevel@tonic-gate 			goto done;
19380Sstevel@tonic-gate 		}
19398485SPeter.Memishian@Sun.COM 
19400Sstevel@tonic-gate 	}
19410Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
19420Sstevel@tonic-gate 		/* Check to see if this is a valid DAD solicitation */
19430Sstevel@tonic-gate 		if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) {
19440Sstevel@tonic-gate 			if (ip_debug > 2) {
19450Sstevel@tonic-gate 				/* ip1dbg */
19460Sstevel@tonic-gate 				pr_addr_dbg("ndp_input_solicit: IPv6 "
19470Sstevel@tonic-gate 				    "Destination is not solicited node "
19480Sstevel@tonic-gate 				    "multicast %s\n", AF_INET6,
19490Sstevel@tonic-gate 				    &ip6h->ip6_dst);
19500Sstevel@tonic-gate 			}
19510Sstevel@tonic-gate 			bad_solicit = B_TRUE;
19520Sstevel@tonic-gate 			goto done;
19530Sstevel@tonic-gate 		}
19540Sstevel@tonic-gate 	}
19550Sstevel@tonic-gate 
19568485SPeter.Memishian@Sun.COM 	/*
19578485SPeter.Memishian@Sun.COM 	 * NOTE: with IPMP, it's possible the nominated multicast ill (which
19588485SPeter.Memishian@Sun.COM 	 * received this packet if it's multicast) is not the ill tied to
19598485SPeter.Memishian@Sun.COM 	 * e.g. the IPMP ill's data link-local.  So we match across the illgrp
19608485SPeter.Memishian@Sun.COM 	 * to ensure we find the associated NCE.
19618485SPeter.Memishian@Sun.COM 	 */
19628485SPeter.Memishian@Sun.COM 	our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE);
19630Sstevel@tonic-gate 	/*
19640Sstevel@tonic-gate 	 * If this is a valid Solicitation, a permanent
19650Sstevel@tonic-gate 	 * entry should exist in the cache
19660Sstevel@tonic-gate 	 */
19670Sstevel@tonic-gate 	if (our_nce == NULL ||
19680Sstevel@tonic-gate 	    !(our_nce->nce_flags & NCE_F_PERMANENT)) {
19690Sstevel@tonic-gate 		ip1dbg(("ndp_input_solicit: Wrong target in NS?!"
19700Sstevel@tonic-gate 		    "ifname=%s ", ill->ill_name));
19710Sstevel@tonic-gate 		if (ip_debug > 2) {
19720Sstevel@tonic-gate 			/* ip1dbg */
19730Sstevel@tonic-gate 			pr_addr_dbg(" dst %s\n", AF_INET6, &target);
19740Sstevel@tonic-gate 		}
19750Sstevel@tonic-gate 		bad_solicit = B_TRUE;
19760Sstevel@tonic-gate 		goto done;
19770Sstevel@tonic-gate 	}
19780Sstevel@tonic-gate 
19790Sstevel@tonic-gate 	/* At this point we should have a verified NS per spec */
19800Sstevel@tonic-gate 	if (opt != NULL) {
19810Sstevel@tonic-gate 		opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR);
19820Sstevel@tonic-gate 		if (opt != NULL) {
19830Sstevel@tonic-gate 			haddr = (uchar_t *)&opt[1];
19842546Scarlsonj 			if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
19850Sstevel@tonic-gate 			    hlen == 0) {
19868485SPeter.Memishian@Sun.COM 				ip1dbg(("ndp_input_solicit: bad SLLA\n"));
19870Sstevel@tonic-gate 				bad_solicit = B_TRUE;
19880Sstevel@tonic-gate 				goto done;
19890Sstevel@tonic-gate 			}
19900Sstevel@tonic-gate 		}
19910Sstevel@tonic-gate 	}
19922546Scarlsonj 
19932699Scarlsonj 	/* If sending directly to peer, set the unicast flag */
19942699Scarlsonj 	if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))
19950Sstevel@tonic-gate 		flag |= NDP_UNICAST;
19960Sstevel@tonic-gate 
19970Sstevel@tonic-gate 	/*
19980Sstevel@tonic-gate 	 * Create/update the entry for the soliciting node.
19990Sstevel@tonic-gate 	 * or respond to outstanding queries, don't if
20000Sstevel@tonic-gate 	 * the source is unspecified address.
20010Sstevel@tonic-gate 	 */
20020Sstevel@tonic-gate 	if (!IN6_IS_ADDR_UNSPECIFIED(&src)) {
20032546Scarlsonj 		int	err;
20040Sstevel@tonic-gate 		nce_t	*nnce;
20050Sstevel@tonic-gate 
20062535Ssangeeta 		ASSERT(ill->ill_isv6);
20072546Scarlsonj 		/*
20082546Scarlsonj 		 * Regular solicitations *must* include the Source Link-Layer
20092546Scarlsonj 		 * Address option.  Ignore messages that do not.
20102546Scarlsonj 		 */
20112546Scarlsonj 		if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) {
20122546Scarlsonj 			ip1dbg(("ndp_input_solicit: source link-layer address "
20132546Scarlsonj 			    "option missing with a specified source.\n"));
20142546Scarlsonj 			bad_solicit = B_TRUE;
20152546Scarlsonj 			goto done;
20162546Scarlsonj 		}
20172546Scarlsonj 
20182546Scarlsonj 		/*
20192546Scarlsonj 		 * This is a regular solicitation.  If we're still in the
20202546Scarlsonj 		 * process of verifying the address, then don't respond at all
20212546Scarlsonj 		 * and don't keep track of the sender.
20222546Scarlsonj 		 */
20232546Scarlsonj 		if (our_nce->nce_state == ND_PROBE)
20242546Scarlsonj 			goto done;
20252546Scarlsonj 
20262546Scarlsonj 		/*
20272546Scarlsonj 		 * If the solicitation doesn't have sender hardware address
20282546Scarlsonj 		 * (legal for unicast solicitation), then process without
20292546Scarlsonj 		 * installing the return NCE.  Either we already know it, or
20302546Scarlsonj 		 * we'll be forced to look it up when (and if) we reply to the
20312546Scarlsonj 		 * packet.
20322546Scarlsonj 		 */
20332546Scarlsonj 		if (haddr == NULL)
20342546Scarlsonj 			goto no_source;
20352546Scarlsonj 
20364714Ssowmini 		err = ndp_lookup_then_add_v6(ill,
20378485SPeter.Memishian@Sun.COM 		    B_FALSE,
20380Sstevel@tonic-gate 		    haddr,
20390Sstevel@tonic-gate 		    &src,	/* Soliciting nodes address */
20400Sstevel@tonic-gate 		    &ipv6_all_ones,
20410Sstevel@tonic-gate 		    &ipv6_all_zeros,
20420Sstevel@tonic-gate 		    0,
20430Sstevel@tonic-gate 		    0,
20440Sstevel@tonic-gate 		    ND_STALE,
20454714Ssowmini 		    &nnce);
20460Sstevel@tonic-gate 		switch (err) {
20470Sstevel@tonic-gate 		case 0:
20480Sstevel@tonic-gate 			/* done with this entry */
20490Sstevel@tonic-gate 			NCE_REFRELE(nnce);
20500Sstevel@tonic-gate 			break;
20510Sstevel@tonic-gate 		case EEXIST:
20520Sstevel@tonic-gate 			/*
20538485SPeter.Memishian@Sun.COM 			 * B_FALSE indicates this is not an an advertisement.
20540Sstevel@tonic-gate 			 */
20550Sstevel@tonic-gate 			ndp_process(nnce, haddr, 0, B_FALSE);
20560Sstevel@tonic-gate 			NCE_REFRELE(nnce);
20570Sstevel@tonic-gate 			break;
20580Sstevel@tonic-gate 		default:
20590Sstevel@tonic-gate 			ip1dbg(("ndp_input_solicit: Can't create NCE %d\n",
20600Sstevel@tonic-gate 			    err));
20610Sstevel@tonic-gate 			goto done;
20620Sstevel@tonic-gate 		}
20632546Scarlsonj no_source:
20640Sstevel@tonic-gate 		flag |= NDP_SOLICITED;
20650Sstevel@tonic-gate 	} else {
20660Sstevel@tonic-gate 		/*
20672546Scarlsonj 		 * No source link layer address option should be present in a
20682546Scarlsonj 		 * valid DAD request.
20692546Scarlsonj 		 */
20702546Scarlsonj 		if (haddr != NULL) {
20712546Scarlsonj 			ip1dbg(("ndp_input_solicit: source link-layer address "
20722546Scarlsonj 			    "option present with an unspecified source.\n"));
20732546Scarlsonj 			bad_solicit = B_TRUE;
20742546Scarlsonj 			goto done;
20752546Scarlsonj 		}
20762546Scarlsonj 		if (our_nce->nce_state == ND_PROBE) {
20772546Scarlsonj 			/*
20782546Scarlsonj 			 * Internally looped-back probes won't have DLPI
20792546Scarlsonj 			 * attached to them.  External ones (which are sent by
20802546Scarlsonj 			 * multicast) always will.  Just ignore our own
20812546Scarlsonj 			 * transmissions.
20822546Scarlsonj 			 */
20832546Scarlsonj 			if (dl_mp != NULL) {
20842546Scarlsonj 				/*
20852546Scarlsonj 				 * If someone else is probing our address, then
20862546Scarlsonj 				 * we've crossed wires.  Declare failure.
20872546Scarlsonj 				 */
20888485SPeter.Memishian@Sun.COM 				ip_ndp_failure(ill, mp, dl_mp);
20892546Scarlsonj 			}
20902546Scarlsonj 			goto done;
20912546Scarlsonj 		}
20922546Scarlsonj 		/*
20932546Scarlsonj 		 * This is a DAD probe.  Multicast the advertisement to the
20942546Scarlsonj 		 * all-nodes address.
20950Sstevel@tonic-gate 		 */
20960Sstevel@tonic-gate 		src = ipv6_all_hosts_mcast;
20970Sstevel@tonic-gate 	}
20980Sstevel@tonic-gate 	/* Response to a solicitation */
20998485SPeter.Memishian@Sun.COM 	(void) nce_xmit_advert(our_nce, B_TRUE, &src, flag);
21000Sstevel@tonic-gate done:
21010Sstevel@tonic-gate 	if (bad_solicit)
21020Sstevel@tonic-gate 		BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations);
21030Sstevel@tonic-gate 	if (our_nce != NULL)
21040Sstevel@tonic-gate 		NCE_REFRELE(our_nce);
21050Sstevel@tonic-gate }
21060Sstevel@tonic-gate 
21070Sstevel@tonic-gate void
21082546Scarlsonj ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
21090Sstevel@tonic-gate {
21100Sstevel@tonic-gate 	nd_neighbor_advert_t *na;
21110Sstevel@tonic-gate 	uint32_t	hlen = ill->ill_nd_lla_len;
21120Sstevel@tonic-gate 	uchar_t		*haddr = NULL;
21130Sstevel@tonic-gate 	icmp6_t		*icmp_nd;
21140Sstevel@tonic-gate 	ip6_t		*ip6h;
21150Sstevel@tonic-gate 	nce_t		*dst_nce = NULL;
21160Sstevel@tonic-gate 	in6_addr_t	target;
21170Sstevel@tonic-gate 	nd_opt_hdr_t	*opt = NULL;
21180Sstevel@tonic-gate 	int		len;
21198485SPeter.Memishian@Sun.COM 	ip_stack_t	*ipst = ill->ill_ipst;
21200Sstevel@tonic-gate 	mib2_ipv6IfIcmpEntry_t	*mib = ill->ill_icmp6_mib;
21210Sstevel@tonic-gate 
21220Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
21230Sstevel@tonic-gate 	icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
21240Sstevel@tonic-gate 	len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
21250Sstevel@tonic-gate 	na = (nd_neighbor_advert_t *)icmp_nd;
21260Sstevel@tonic-gate 	if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
21270Sstevel@tonic-gate 	    (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) {
21280Sstevel@tonic-gate 		ip1dbg(("ndp_input_advert: Target is multicast but the "
21290Sstevel@tonic-gate 		    "solicited flag is not zero\n"));
21300Sstevel@tonic-gate 		BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
21310Sstevel@tonic-gate 		return;
21320Sstevel@tonic-gate 	}
21330Sstevel@tonic-gate 	target = na->nd_na_target;
21340Sstevel@tonic-gate 	if (IN6_IS_ADDR_MULTICAST(&target)) {
21350Sstevel@tonic-gate 		ip1dbg(("ndp_input_advert: Target is multicast!\n"));
21360Sstevel@tonic-gate 		BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
21370Sstevel@tonic-gate 		return;
21380Sstevel@tonic-gate 	}
21390Sstevel@tonic-gate 	if (len > sizeof (nd_neighbor_advert_t)) {
21400Sstevel@tonic-gate 		opt = (nd_opt_hdr_t *)&na[1];
21410Sstevel@tonic-gate 		if (!ndp_verify_optlen(opt,
21420Sstevel@tonic-gate 		    len - sizeof (nd_neighbor_advert_t))) {
21432546Scarlsonj 			ip1dbg(("ndp_input_advert: cannot verify SLLA\n"));
21440Sstevel@tonic-gate 			BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements);
21450Sstevel@tonic-gate 			return;
21460Sstevel@tonic-gate 		}
21470Sstevel@tonic-gate 		/* At this point we have a verified NA per spec */
21480Sstevel@tonic-gate 		len -= sizeof (nd_neighbor_advert_t);
21490Sstevel@tonic-gate 		opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR);
21500Sstevel@tonic-gate 		if (opt != NULL) {
21510Sstevel@tonic-gate 			haddr = (uchar_t *)&opt[1];
21522546Scarlsonj 			if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) ||
21530Sstevel@tonic-gate 			    hlen == 0) {
21542546Scarlsonj 				ip1dbg(("ndp_input_advert: bad SLLA\n"));
21550Sstevel@tonic-gate 				BUMP_MIB(mib,
21560Sstevel@tonic-gate 				    ipv6IfIcmpInBadNeighborAdvertisements);
21570Sstevel@tonic-gate 				return;
21580Sstevel@tonic-gate 			}
21590Sstevel@tonic-gate 		}
21600Sstevel@tonic-gate 	}
21610Sstevel@tonic-gate 
21620Sstevel@tonic-gate 	/*
21638485SPeter.Memishian@Sun.COM 	 * NOTE: we match across the illgrp since we need to do DAD for all of
21648485SPeter.Memishian@Sun.COM 	 * our local addresses, and those are spread across all the active
21650Sstevel@tonic-gate 	 * ills in the group.
21660Sstevel@tonic-gate 	 */
21678485SPeter.Memishian@Sun.COM 	if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL)
21688485SPeter.Memishian@Sun.COM 		return;
21698485SPeter.Memishian@Sun.COM 
21708485SPeter.Memishian@Sun.COM 	if (dst_nce->nce_flags & NCE_F_PERMANENT) {
21718485SPeter.Memishian@Sun.COM 		/*
21728485SPeter.Memishian@Sun.COM 		 * Someone just advertised one of our local addresses.	First,
21738485SPeter.Memishian@Sun.COM 		 * check it it was us -- if so, we can safely ignore it.
21748485SPeter.Memishian@Sun.COM 		 */
21758485SPeter.Memishian@Sun.COM 		if (haddr != NULL) {
21768485SPeter.Memishian@Sun.COM 			if (!nce_cmp_ll_addr(dst_nce, haddr, hlen))
21778700SPeter.Memishian@Sun.COM 				goto out;	/* from us -- no conflict */
21788485SPeter.Memishian@Sun.COM 
21798485SPeter.Memishian@Sun.COM 			/*
21808485SPeter.Memishian@Sun.COM 			 * If we're in an IPMP group, check if this is an echo
21818485SPeter.Memishian@Sun.COM 			 * from another ill in the group.  Use the double-
21828485SPeter.Memishian@Sun.COM 			 * checked locking pattern to avoid grabbing
21838485SPeter.Memishian@Sun.COM 			 * ill_g_lock in the non-IPMP case.
21848485SPeter.Memishian@Sun.COM 			 */
21858485SPeter.Memishian@Sun.COM 			if (IS_UNDER_IPMP(ill)) {
21868485SPeter.Memishian@Sun.COM 				rw_enter(&ipst->ips_ill_g_lock, RW_READER);
21878485SPeter.Memishian@Sun.COM 				if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill(
21888485SPeter.Memishian@Sun.COM 				    ill->ill_grp, haddr, hlen) != NULL) {
21898485SPeter.Memishian@Sun.COM 					rw_exit(&ipst->ips_ill_g_lock);
21908485SPeter.Memishian@Sun.COM 					goto out;
21918485SPeter.Memishian@Sun.COM 				}
21928485SPeter.Memishian@Sun.COM 				rw_exit(&ipst->ips_ill_g_lock);
21938485SPeter.Memishian@Sun.COM 			}
21940Sstevel@tonic-gate 		}
21958485SPeter.Memishian@Sun.COM 
21968485SPeter.Memishian@Sun.COM 		/*
21978700SPeter.Memishian@Sun.COM 		 * Our own (looped-back) unsolicited neighbor advertisements
21988700SPeter.Memishian@Sun.COM 		 * will get here with dl_mp == NULL.  (These will usually be
21998700SPeter.Memishian@Sun.COM 		 * filtered by the `haddr' checks above, but point-to-point
22008700SPeter.Memishian@Sun.COM 		 * links have no hardware address and thus make it here.)
22018700SPeter.Memishian@Sun.COM 		 */
22028700SPeter.Memishian@Sun.COM 		if (dl_mp == NULL && dst_nce->nce_state != ND_PROBE)
22038700SPeter.Memishian@Sun.COM 			goto out;
22048700SPeter.Memishian@Sun.COM 
22058700SPeter.Memishian@Sun.COM 		/*
22068485SPeter.Memishian@Sun.COM 		 * This appears to be a real conflict.  If we're trying to
22078485SPeter.Memishian@Sun.COM 		 * configure this NCE (ND_PROBE), then shut it down.
22088485SPeter.Memishian@Sun.COM 		 * Otherwise, handle the discovered conflict.
22098485SPeter.Memishian@Sun.COM 		 *
22108700SPeter.Memishian@Sun.COM 		 * In the ND_PROBE case, dl_mp might be NULL if we're getting
22118700SPeter.Memishian@Sun.COM 		 * a unicast reply.  This isn't typically done (multicast is
22128700SPeter.Memishian@Sun.COM 		 * the norm in response to a probe), but we can handle it.
22138485SPeter.Memishian@Sun.COM 		 */
22148485SPeter.Memishian@Sun.COM 		if (dst_nce->nce_state == ND_PROBE)
22158485SPeter.Memishian@Sun.COM 			ip_ndp_failure(ill, mp, dl_mp);
22168485SPeter.Memishian@Sun.COM 		else
22178485SPeter.Memishian@Sun.COM 			ip_ndp_conflict(ill, mp, dl_mp, dst_nce);
22188485SPeter.Memishian@Sun.COM 	} else {
22198485SPeter.Memishian@Sun.COM 		if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER)
22208485SPeter.Memishian@Sun.COM 			dst_nce->nce_flags |= NCE_F_ISROUTER;
22218485SPeter.Memishian@Sun.COM 
22228485SPeter.Memishian@Sun.COM 		/* B_TRUE indicates this an advertisement */
22238485SPeter.Memishian@Sun.COM 		ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE);
22240Sstevel@tonic-gate 	}
22258485SPeter.Memishian@Sun.COM out:
22268485SPeter.Memishian@Sun.COM 	NCE_REFRELE(dst_nce);
22270Sstevel@tonic-gate }
22280Sstevel@tonic-gate 
22290Sstevel@tonic-gate /*
22300Sstevel@tonic-gate  * Process NDP neighbor solicitation/advertisement messages.
22310Sstevel@tonic-gate  * The checksum has already checked o.k before reaching here.
22320Sstevel@tonic-gate  */
22330Sstevel@tonic-gate void
22342546Scarlsonj ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp)
22350Sstevel@tonic-gate {
22360Sstevel@tonic-gate 	icmp6_t		*icmp_nd;
22370Sstevel@tonic-gate 	ip6_t		*ip6h;
22380Sstevel@tonic-gate 	int		len;
22390Sstevel@tonic-gate 	mib2_ipv6IfIcmpEntry_t	*mib = ill->ill_icmp6_mib;
22400Sstevel@tonic-gate 
22410Sstevel@tonic-gate 
22420Sstevel@tonic-gate 	if (!pullupmsg(mp, -1)) {
22430Sstevel@tonic-gate 		ip1dbg(("ndp_input: pullupmsg failed\n"));
22443284Sapersson 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
22450Sstevel@tonic-gate 		goto done;
22460Sstevel@tonic-gate 	}
22470Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
22480Sstevel@tonic-gate 	if (ip6h->ip6_hops != IPV6_MAX_HOPS) {
22490Sstevel@tonic-gate 		ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n"));
22500Sstevel@tonic-gate 		BUMP_MIB(mib, ipv6IfIcmpBadHoplimit);
22510Sstevel@tonic-gate 		goto done;
22520Sstevel@tonic-gate 	}
22530Sstevel@tonic-gate 	/*
22540Sstevel@tonic-gate 	 * NDP does not accept any extension headers between the
22550Sstevel@tonic-gate 	 * IP header and the ICMP header since e.g. a routing
22560Sstevel@tonic-gate 	 * header could be dangerous.
22570Sstevel@tonic-gate 	 * This assumes that any AH or ESP headers are removed
22580Sstevel@tonic-gate 	 * by ip prior to passing the packet to ndp_input.
22590Sstevel@tonic-gate 	 */
22600Sstevel@tonic-gate 	if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
22610Sstevel@tonic-gate 		ip1dbg(("ndp_input: Wrong next header 0x%x\n",
22620Sstevel@tonic-gate 		    ip6h->ip6_nxt));
22630Sstevel@tonic-gate 		BUMP_MIB(mib, ipv6IfIcmpInErrors);
22640Sstevel@tonic-gate 		goto done;
22650Sstevel@tonic-gate 	}
22660Sstevel@tonic-gate 	icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN);
22670Sstevel@tonic-gate 	ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT ||
22680Sstevel@tonic-gate 	    icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT);
22690Sstevel@tonic-gate 	if (icmp_nd->icmp6_code != 0) {
22700Sstevel@tonic-gate 		ip1dbg(("ndp_input: icmp6 code != 0 \n"));
22710Sstevel@tonic-gate 		BUMP_MIB(mib, ipv6IfIcmpInErrors);
22720Sstevel@tonic-gate 		goto done;
22730Sstevel@tonic-gate 	}
22740Sstevel@tonic-gate 	len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN;
22750Sstevel@tonic-gate 	/*
22760Sstevel@tonic-gate 	 * Make sure packet length is large enough for either
22770Sstevel@tonic-gate 	 * a NS or a NA icmp packet.
22780Sstevel@tonic-gate 	 */
22790Sstevel@tonic-gate 	if (len <  sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) {
22800Sstevel@tonic-gate 		ip1dbg(("ndp_input: packet too short\n"));
22810Sstevel@tonic-gate 		BUMP_MIB(mib, ipv6IfIcmpInErrors);
22820Sstevel@tonic-gate 		goto done;
22830Sstevel@tonic-gate 	}
22840Sstevel@tonic-gate 	if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) {
22852546Scarlsonj 		ndp_input_solicit(ill, mp, dl_mp);
22860Sstevel@tonic-gate 	} else {
22872546Scarlsonj 		ndp_input_advert(ill, mp, dl_mp);
22880Sstevel@tonic-gate 	}
22890Sstevel@tonic-gate done:
22900Sstevel@tonic-gate 	freemsg(mp);
22910Sstevel@tonic-gate }
22920Sstevel@tonic-gate 
22930Sstevel@tonic-gate /*
22948485SPeter.Memishian@Sun.COM  * Utility routine to send an advertisement.  Assumes that the NCE cannot
22958485SPeter.Memishian@Sun.COM  * go away (e.g., because it's refheld).
22968485SPeter.Memishian@Sun.COM  */
22978485SPeter.Memishian@Sun.COM static boolean_t
22988485SPeter.Memishian@Sun.COM nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target,
22998485SPeter.Memishian@Sun.COM     uint_t flags)
23008485SPeter.Memishian@Sun.COM {
23018485SPeter.Memishian@Sun.COM 	ASSERT((flags & NDP_PROBE) == 0);
23028485SPeter.Memishian@Sun.COM 
23038485SPeter.Memishian@Sun.COM 	if (nce->nce_flags & NCE_F_ISROUTER)
23048485SPeter.Memishian@Sun.COM 		flags |= NDP_ISROUTER;
23058485SPeter.Memishian@Sun.COM 	if (!(nce->nce_flags & NCE_F_ANYCAST))
23068485SPeter.Memishian@Sun.COM 		flags |= NDP_ORIDE;
23078485SPeter.Memishian@Sun.COM 
23088485SPeter.Memishian@Sun.COM 	return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla,
23098485SPeter.Memishian@Sun.COM 	    &nce->nce_addr, target, flags));
23108485SPeter.Memishian@Sun.COM }
23118485SPeter.Memishian@Sun.COM 
23128485SPeter.Memishian@Sun.COM /*
23138485SPeter.Memishian@Sun.COM  * Utility routine to send a solicitation.  Assumes that the NCE cannot
23148485SPeter.Memishian@Sun.COM  * go away (e.g., because it's refheld).
23158485SPeter.Memishian@Sun.COM  */
23168485SPeter.Memishian@Sun.COM static boolean_t
23178485SPeter.Memishian@Sun.COM nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender,
23188485SPeter.Memishian@Sun.COM     uint_t flags)
23198485SPeter.Memishian@Sun.COM {
23208485SPeter.Memishian@Sun.COM 	if (flags & NDP_PROBE)
23218485SPeter.Memishian@Sun.COM 		sender = &ipv6_all_zeros;
23228485SPeter.Memishian@Sun.COM 
23238485SPeter.Memishian@Sun.COM 	return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla,
23248485SPeter.Memishian@Sun.COM 	    sender, &nce->nce_addr, flags));
23258485SPeter.Memishian@Sun.COM }
23268485SPeter.Memishian@Sun.COM 
23278485SPeter.Memishian@Sun.COM /*
23280Sstevel@tonic-gate  * nce_xmit is called to form and transmit a ND solicitation or
23290Sstevel@tonic-gate  * advertisement ICMP packet.
23302546Scarlsonj  *
23312546Scarlsonj  * If the source address is unspecified and this isn't a probe (used for
23322546Scarlsonj  * duplicate address detection), an appropriate source address and link layer
23332546Scarlsonj  * address will be chosen here.  The link layer address option is included if
23342546Scarlsonj  * the source is specified (i.e., all non-probe packets), and omitted (per the
23352546Scarlsonj  * specification) otherwise.
23362546Scarlsonj  *
23370Sstevel@tonic-gate  * It returns B_FALSE only if it does a successful put() to the
23380Sstevel@tonic-gate  * corresponding ill's ill_wq otherwise returns B_TRUE.
23390Sstevel@tonic-gate  */
23400Sstevel@tonic-gate static boolean_t
23418485SPeter.Memishian@Sun.COM nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla,
23428485SPeter.Memishian@Sun.COM     const in6_addr_t *sender, const in6_addr_t *target, int flag)
23430Sstevel@tonic-gate {
23448485SPeter.Memishian@Sun.COM 	ill_t		*hwaddr_ill;
23450Sstevel@tonic-gate 	uint32_t	len;
23460Sstevel@tonic-gate 	icmp6_t 	*icmp6;
23470Sstevel@tonic-gate 	mblk_t		*mp;
23480Sstevel@tonic-gate 	ip6_t		*ip6h;
23490Sstevel@tonic-gate 	nd_opt_hdr_t	*opt;
23508485SPeter.Memishian@Sun.COM 	uint_t		plen, maxplen;
23510Sstevel@tonic-gate 	ip6i_t		*ip6i;
23520Sstevel@tonic-gate 	ipif_t		*src_ipif = NULL;
23532598Scarlsonj 	uint8_t		*hw_addr;
23543909Sja97890 	zoneid_t	zoneid = GLOBAL_ZONEID;
23558485SPeter.Memishian@Sun.COM 	char		buf[INET6_ADDRSTRLEN];
23568485SPeter.Memishian@Sun.COM 
23578485SPeter.Memishian@Sun.COM 	ASSERT(!IS_IPMP(ill));
23580Sstevel@tonic-gate 
23590Sstevel@tonic-gate 	/*
23608485SPeter.Memishian@Sun.COM 	 * Check that the sender is actually a usable address on `ill', and if
23618485SPeter.Memishian@Sun.COM 	 * so, track that as the src_ipif.  If not, for solicitations, set the
23628485SPeter.Memishian@Sun.COM 	 * sender to :: so that a new one will be picked below; for adverts,
23638485SPeter.Memishian@Sun.COM 	 * drop the packet since we expect nce_xmit_advert() to always provide
23648485SPeter.Memishian@Sun.COM 	 * a valid sender.
23650Sstevel@tonic-gate 	 */
23668485SPeter.Memishian@Sun.COM 	if (!IN6_IS_ADDR_UNSPECIFIED(sender)) {
23678485SPeter.Memishian@Sun.COM 		if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL ||
23688485SPeter.Memishian@Sun.COM 		    !src_ipif->ipif_addr_ready) {
23698485SPeter.Memishian@Sun.COM 			if (src_ipif != NULL) {
23708485SPeter.Memishian@Sun.COM 				ipif_refrele(src_ipif);
23718485SPeter.Memishian@Sun.COM 				src_ipif = NULL;
23728485SPeter.Memishian@Sun.COM 			}
23738485SPeter.Memishian@Sun.COM 			if (type == ND_NEIGHBOR_ADVERT) {
23748485SPeter.Memishian@Sun.COM 				ip1dbg(("nce_xmit: No source ipif for src %s\n",
23758485SPeter.Memishian@Sun.COM 				    inet_ntop(AF_INET6, sender, buf,
23768485SPeter.Memishian@Sun.COM 				    sizeof (buf))));
23778485SPeter.Memishian@Sun.COM 				return (B_TRUE);
23788485SPeter.Memishian@Sun.COM 			}
23798485SPeter.Memishian@Sun.COM 			sender = &ipv6_all_zeros;
23808485SPeter.Memishian@Sun.COM 		}
23818485SPeter.Memishian@Sun.COM 	}
23828485SPeter.Memishian@Sun.COM 
23838485SPeter.Memishian@Sun.COM 	/*
23848485SPeter.Memishian@Sun.COM 	 * If we still have an unspecified source (sender) address and this
23858485SPeter.Memishian@Sun.COM 	 * isn't a probe, select a source address from `ill'.
23868485SPeter.Memishian@Sun.COM 	 */
23872546Scarlsonj 	if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) {
23888485SPeter.Memishian@Sun.COM 		ASSERT(type != ND_NEIGHBOR_ADVERT);
23890Sstevel@tonic-gate 		/*
23908485SPeter.Memishian@Sun.COM 		 * Pick a source address for this solicitation, but restrict
23918485SPeter.Memishian@Sun.COM 		 * the selection to addresses assigned to the output
23928485SPeter.Memishian@Sun.COM 		 * interface.  We do this because the destination will create
23938485SPeter.Memishian@Sun.COM 		 * a neighbor cache entry for the source address of this
23948485SPeter.Memishian@Sun.COM 		 * packet, so the source address needs to be a valid neighbor.
23950Sstevel@tonic-gate 		 */
23968485SPeter.Memishian@Sun.COM 		src_ipif = ipif_select_source_v6(ill, target, B_TRUE,
23973909Sja97890 		    IPV6_PREFER_SRC_DEFAULT, ALL_ZONES);
23980Sstevel@tonic-gate 		if (src_ipif == NULL) {
23992202Srk129064 			ip1dbg(("nce_xmit: No source ipif for dst %s\n",
24008485SPeter.Memishian@Sun.COM 			    inet_ntop(AF_INET6, target, buf, sizeof (buf))));
24010Sstevel@tonic-gate 			return (B_TRUE);
24020Sstevel@tonic-gate 		}
24030Sstevel@tonic-gate 		sender = &src_ipif->ipif_v6src_addr;
24040Sstevel@tonic-gate 	}
24050Sstevel@tonic-gate 
24060Sstevel@tonic-gate 	/*
24078485SPeter.Memishian@Sun.COM 	 * We're either sending a probe or we have a source address.
24080Sstevel@tonic-gate 	 */
24098485SPeter.Memishian@Sun.COM 	ASSERT((flag & NDP_PROBE) || src_ipif != NULL);
24108485SPeter.Memishian@Sun.COM 
24118485SPeter.Memishian@Sun.COM 	maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8);
24120Sstevel@tonic-gate 	len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) +
24138485SPeter.Memishian@Sun.COM 	    maxplen;
24140Sstevel@tonic-gate 	mp = allocb(len,  BPRI_LO);
24150Sstevel@tonic-gate 	if (mp == NULL) {
24160Sstevel@tonic-gate 		if (src_ipif != NULL)
24170Sstevel@tonic-gate 			ipif_refrele(src_ipif);
24180Sstevel@tonic-gate 		return (B_TRUE);
24190Sstevel@tonic-gate 	}
24200Sstevel@tonic-gate 	bzero((char *)mp->b_rptr, len);
24210Sstevel@tonic-gate 	mp->b_wptr = mp->b_rptr + len;
24220Sstevel@tonic-gate 
24230Sstevel@tonic-gate 	ip6i = (ip6i_t *)mp->b_rptr;
24240Sstevel@tonic-gate 	ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
24250Sstevel@tonic-gate 	ip6i->ip6i_nxt = IPPROTO_RAW;
24268485SPeter.Memishian@Sun.COM 	ip6i->ip6i_flags = IP6I_HOPLIMIT;
24272546Scarlsonj 	if (flag & NDP_PROBE)
24282546Scarlsonj 		ip6i->ip6i_flags |= IP6I_UNSPEC_SRC;
24290Sstevel@tonic-gate 
24300Sstevel@tonic-gate 	ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t));
24310Sstevel@tonic-gate 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
24320Sstevel@tonic-gate 	ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t));
24330Sstevel@tonic-gate 	ip6h->ip6_nxt = IPPROTO_ICMPV6;
24340Sstevel@tonic-gate 	ip6h->ip6_hops = IPV6_MAX_HOPS;
24358485SPeter.Memishian@Sun.COM 	ip6h->ip6_src = *sender;
24360Sstevel@tonic-gate 	ip6h->ip6_dst = *target;
24370Sstevel@tonic-gate 	icmp6 = (icmp6_t *)&ip6h[1];
24380Sstevel@tonic-gate 
24390Sstevel@tonic-gate 	opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN +
24400Sstevel@tonic-gate 	    sizeof (nd_neighbor_advert_t));
24410Sstevel@tonic-gate 
24428485SPeter.Memishian@Sun.COM 	if (type == ND_NEIGHBOR_SOLICIT) {
24430Sstevel@tonic-gate 		nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6;
24440Sstevel@tonic-gate 
24452546Scarlsonj 		if (!(flag & NDP_PROBE))
24462546Scarlsonj 			opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
24470Sstevel@tonic-gate 		ns->nd_ns_target = *target;
24480Sstevel@tonic-gate 		if (!(flag & NDP_UNICAST)) {
24490Sstevel@tonic-gate 			/* Form multicast address of the target */
24500Sstevel@tonic-gate 			ip6h->ip6_dst = ipv6_solicited_node_mcast;
24510Sstevel@tonic-gate 			ip6h->ip6_dst.s6_addr32[3] |=
24520Sstevel@tonic-gate 			    ns->nd_ns_target.s6_addr32[3];
24530Sstevel@tonic-gate 		}
24540Sstevel@tonic-gate 	} else {
24550Sstevel@tonic-gate 		nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6;
24560Sstevel@tonic-gate 
24572546Scarlsonj 		ASSERT(!(flag & NDP_PROBE));
24580Sstevel@tonic-gate 		opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
24590Sstevel@tonic-gate 		na->nd_na_target = *sender;
24600Sstevel@tonic-gate 		if (flag & NDP_ISROUTER)
24610Sstevel@tonic-gate 			na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER;
24620Sstevel@tonic-gate 		if (flag & NDP_SOLICITED)
24630Sstevel@tonic-gate 			na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED;
24640Sstevel@tonic-gate 		if (flag & NDP_ORIDE)
24650Sstevel@tonic-gate 			na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE;
24660Sstevel@tonic-gate 	}
24672546Scarlsonj 
24682598Scarlsonj 	hw_addr = NULL;
24692546Scarlsonj 	if (!(flag & NDP_PROBE)) {
24708485SPeter.Memishian@Sun.COM 		/*
24718485SPeter.Memishian@Sun.COM 		 * Use our source address to find the hardware address to put
24728485SPeter.Memishian@Sun.COM 		 * in the packet, so that the hardware address and IP address
24738485SPeter.Memishian@Sun.COM 		 * will match up -- even if that hardware address doesn't
24748485SPeter.Memishian@Sun.COM 		 * match the ill we actually transmit the packet through.
24758485SPeter.Memishian@Sun.COM 		 */
24768485SPeter.Memishian@Sun.COM 		if (IS_IPMP(src_ipif->ipif_ill)) {
24778485SPeter.Memishian@Sun.COM 			hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif);
24788485SPeter.Memishian@Sun.COM 			if (hwaddr_ill == NULL) {
24798485SPeter.Memishian@Sun.COM 				ip1dbg(("nce_xmit: no bound ill!\n"));
24808485SPeter.Memishian@Sun.COM 				ipif_refrele(src_ipif);
24818485SPeter.Memishian@Sun.COM 				freemsg(mp);
24828485SPeter.Memishian@Sun.COM 				return (B_TRUE);
24838485SPeter.Memishian@Sun.COM 			}
24848485SPeter.Memishian@Sun.COM 		} else {
24858485SPeter.Memishian@Sun.COM 			hwaddr_ill = src_ipif->ipif_ill;
24868485SPeter.Memishian@Sun.COM 			ill_refhold(hwaddr_ill);	/* for symmetry */
24878485SPeter.Memishian@Sun.COM 		}
24888485SPeter.Memishian@Sun.COM 
24898485SPeter.Memishian@Sun.COM 		plen = roundup(sizeof (nd_opt_hdr_t) +
24908485SPeter.Memishian@Sun.COM 		    hwaddr_ill->ill_nd_lla_len, 8);
24918485SPeter.Memishian@Sun.COM 
24922598Scarlsonj 		hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla :
24932598Scarlsonj 		    hwaddr_ill->ill_phys_addr;
24942598Scarlsonj 		if (hw_addr != NULL) {
24952598Scarlsonj 			/* Fill in link layer address and option len */
24968485SPeter.Memishian@Sun.COM 			opt->nd_opt_len = (uint8_t)(plen / 8);
24972598Scarlsonj 			bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len);
24982598Scarlsonj 		}
24998485SPeter.Memishian@Sun.COM 
25008485SPeter.Memishian@Sun.COM 		ill_refrele(hwaddr_ill);
25012546Scarlsonj 	}
25028485SPeter.Memishian@Sun.COM 
25038485SPeter.Memishian@Sun.COM 	if (hw_addr == NULL)
25048485SPeter.Memishian@Sun.COM 		plen = 0;
25058485SPeter.Memishian@Sun.COM 
25068485SPeter.Memishian@Sun.COM 	/* Fix up the length of the packet now that plen is known */
25078485SPeter.Memishian@Sun.COM 	len -= (maxplen - plen);
25088485SPeter.Memishian@Sun.COM 	mp->b_wptr = mp->b_rptr + len;
25098485SPeter.Memishian@Sun.COM 	ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t));
25108485SPeter.Memishian@Sun.COM 
25118485SPeter.Memishian@Sun.COM 	icmp6->icmp6_type = type;
25120Sstevel@tonic-gate 	icmp6->icmp6_code = 0;
25130Sstevel@tonic-gate 	/*
25140Sstevel@tonic-gate 	 * Prepare for checksum by putting icmp length in the icmp
25150Sstevel@tonic-gate 	 * checksum field. The checksum is calculated in ip_wput_v6.
25160Sstevel@tonic-gate 	 */
25170Sstevel@tonic-gate 	icmp6->icmp6_cksum = ip6h->ip6_plen;
25180Sstevel@tonic-gate 
25198485SPeter.Memishian@Sun.COM 	/*
25208485SPeter.Memishian@Sun.COM 	 * Before we toss the src_ipif, look up the zoneid to pass to
25218485SPeter.Memishian@Sun.COM 	 * ip_output_v6().  This is to ensure unicast ND_NEIGHBOR_ADVERT
25228485SPeter.Memishian@Sun.COM 	 * packets to be routed correctly by IP (we cannot guarantee that the
25238485SPeter.Memishian@Sun.COM 	 * global zone has an interface route to the destination).
25248485SPeter.Memishian@Sun.COM 	 */
25258485SPeter.Memishian@Sun.COM 	if (src_ipif != NULL) {
25268485SPeter.Memishian@Sun.COM 		if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES)
25278485SPeter.Memishian@Sun.COM 			zoneid = GLOBAL_ZONEID;
25280Sstevel@tonic-gate 		ipif_refrele(src_ipif);
25298485SPeter.Memishian@Sun.COM 	}
25303909Sja97890 
25313909Sja97890 	ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT);
25323909Sja97890 	return (B_FALSE);
25330Sstevel@tonic-gate }
25340Sstevel@tonic-gate 
25350Sstevel@tonic-gate /*
25360Sstevel@tonic-gate  * Make a link layer address (does not include the SAP) from an nce.
25370Sstevel@tonic-gate  * To form the link layer address, use the last four bytes of ipv6
25380Sstevel@tonic-gate  * address passed in and the fixed offset stored in nce.
25390Sstevel@tonic-gate  */
25400Sstevel@tonic-gate static void
25410Sstevel@tonic-gate nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr)
25420Sstevel@tonic-gate {
25430Sstevel@tonic-gate 	uchar_t *mask, *to;
25440Sstevel@tonic-gate 	ill_t	*ill = nce->nce_ill;
25450Sstevel@tonic-gate 	int 	len;
25460Sstevel@tonic-gate 
25470Sstevel@tonic-gate 	if (ill->ill_net_type == IRE_IF_NORESOLVER)
25480Sstevel@tonic-gate 		return;
25490Sstevel@tonic-gate 	ASSERT(nce->nce_res_mp != NULL);
25500Sstevel@tonic-gate 	ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
25510Sstevel@tonic-gate 	ASSERT(nce->nce_flags & NCE_F_MAPPING);
25520Sstevel@tonic-gate 	ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask));
25530Sstevel@tonic-gate 	ASSERT(addr != NULL);
25540Sstevel@tonic-gate 	bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill),
25550Sstevel@tonic-gate 	    addrpos, ill->ill_nd_lla_len);
25560Sstevel@tonic-gate 	len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start,
25570Sstevel@tonic-gate 	    IPV6_ADDR_LEN);
25580Sstevel@tonic-gate 	mask = (uchar_t *)&nce->nce_extract_mask;
25590Sstevel@tonic-gate 	mask += (IPV6_ADDR_LEN - len);
25600Sstevel@tonic-gate 	addr += (IPV6_ADDR_LEN - len);
25610Sstevel@tonic-gate 	to = addrpos + nce->nce_ll_extract_start;
25620Sstevel@tonic-gate 	while (len-- > 0)
25630Sstevel@tonic-gate 		*to++ |= *mask++ & *addr++;
25640Sstevel@tonic-gate }
25650Sstevel@tonic-gate 
25660Sstevel@tonic-gate mblk_t *
25670Sstevel@tonic-gate nce_udreq_alloc(ill_t *ill)
25680Sstevel@tonic-gate {
25690Sstevel@tonic-gate 	mblk_t	*template_mp = NULL;
25700Sstevel@tonic-gate 	dl_unitdata_req_t *dlur;
25710Sstevel@tonic-gate 	int	sap_length;
25720Sstevel@tonic-gate 
25732535Ssangeeta 	ASSERT(ill->ill_isv6);
25742535Ssangeeta 
25750Sstevel@tonic-gate 	sap_length = ill->ill_sap_length;
25760Sstevel@tonic-gate 	template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) +
25770Sstevel@tonic-gate 	    ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ);
25780Sstevel@tonic-gate 	if (template_mp == NULL)
25790Sstevel@tonic-gate 		return (NULL);
25800Sstevel@tonic-gate 
25810Sstevel@tonic-gate 	dlur = (dl_unitdata_req_t *)template_mp->b_rptr;
25820Sstevel@tonic-gate 	dlur->dl_priority.dl_min = 0;
25830Sstevel@tonic-gate 	dlur->dl_priority.dl_max = 0;
25840Sstevel@tonic-gate 	dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len;
25850Sstevel@tonic-gate 	dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t);
25860Sstevel@tonic-gate 
25870Sstevel@tonic-gate 	/* Copy in the SAP value. */
25880Sstevel@tonic-gate 	NCE_LL_SAP_COPY(ill, template_mp);
25890Sstevel@tonic-gate 
25900Sstevel@tonic-gate 	return (template_mp);
25910Sstevel@tonic-gate }
25920Sstevel@tonic-gate 
25930Sstevel@tonic-gate /*
25940Sstevel@tonic-gate  * NDP retransmit timer.
25950Sstevel@tonic-gate  * This timer goes off when:
25960Sstevel@tonic-gate  * a. It is time to retransmit NS for resolver.
25970Sstevel@tonic-gate  * b. It is time to send reachability probes.
25980Sstevel@tonic-gate  */
25990Sstevel@tonic-gate void
26000Sstevel@tonic-gate ndp_timer(void *arg)
26010Sstevel@tonic-gate {
26020Sstevel@tonic-gate 	nce_t		*nce = arg;
26030Sstevel@tonic-gate 	ill_t		*ill = nce->nce_ill;
26040Sstevel@tonic-gate 	char		addrbuf[INET6_ADDRSTRLEN];
26050Sstevel@tonic-gate 	boolean_t	dropped = B_FALSE;
26063448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
26070Sstevel@tonic-gate 
26080Sstevel@tonic-gate 	/*
26090Sstevel@tonic-gate 	 * The timer has to be cancelled by ndp_delete before doing the final
26100Sstevel@tonic-gate 	 * refrele. So the NCE is guaranteed to exist when the timer runs
26110Sstevel@tonic-gate 	 * until it clears the timeout_id. Before clearing the timeout_id
26120Sstevel@tonic-gate 	 * bump up the refcnt so that we can continue to use the nce
26130Sstevel@tonic-gate 	 */
26140Sstevel@tonic-gate 	ASSERT(nce != NULL);
26150Sstevel@tonic-gate 
26160Sstevel@tonic-gate 	mutex_enter(&nce->nce_lock);
26170Sstevel@tonic-gate 	NCE_REFHOLD_LOCKED(nce);
26180Sstevel@tonic-gate 	nce->nce_timeout_id = 0;
26190Sstevel@tonic-gate 
26200Sstevel@tonic-gate 	/*
26210Sstevel@tonic-gate 	 * Check the reachability state first.
26220Sstevel@tonic-gate 	 */
26230Sstevel@tonic-gate 	switch (nce->nce_state) {
26240Sstevel@tonic-gate 	case ND_DELAY:
26250Sstevel@tonic-gate 		nce->nce_state = ND_PROBE;
26260Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
26278485SPeter.Memishian@Sun.COM 		(void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros,
26288485SPeter.Memishian@Sun.COM 		    NDP_UNICAST);
26290Sstevel@tonic-gate 		if (ip_debug > 3) {
26300Sstevel@tonic-gate 			/* ip2dbg */
26310Sstevel@tonic-gate 			pr_addr_dbg("ndp_timer: state for %s changed "
26320Sstevel@tonic-gate 			    "to PROBE\n", AF_INET6, &nce->nce_addr);
26330Sstevel@tonic-gate 		}
26340Sstevel@tonic-gate 		NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time);
26350Sstevel@tonic-gate 		NCE_REFRELE(nce);
26360Sstevel@tonic-gate 		return;
26370Sstevel@tonic-gate 	case ND_PROBE:
26380Sstevel@tonic-gate 		/* must be retransmit timer */
26390Sstevel@tonic-gate 		nce->nce_pcnt--;
26400Sstevel@tonic-gate 		ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT &&
26410Sstevel@tonic-gate 		    nce->nce_pcnt >= -1);
26422546Scarlsonj 		if (nce->nce_pcnt > 0) {
26430Sstevel@tonic-gate 			/*
26440Sstevel@tonic-gate 			 * As per RFC2461, the nce gets deleted after
26450Sstevel@tonic-gate 			 * MAX_UNICAST_SOLICIT unsuccessful re-transmissions.
26460Sstevel@tonic-gate 			 * Note that the first unicast solicitation is sent
26470Sstevel@tonic-gate 			 * during the DELAY state.
26480Sstevel@tonic-gate 			 */
26492546Scarlsonj 			ip2dbg(("ndp_timer: pcount=%x dst %s\n",
26502546Scarlsonj 			    nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr,
26512546Scarlsonj 			    addrbuf, sizeof (addrbuf))));
26522546Scarlsonj 			mutex_exit(&nce->nce_lock);
26538485SPeter.Memishian@Sun.COM 			dropped = nce_xmit_solicit(nce, B_FALSE,
26548485SPeter.Memishian@Sun.COM 			    &ipv6_all_zeros,
26552546Scarlsonj 			    (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE :
26562546Scarlsonj 			    NDP_UNICAST);
26572546Scarlsonj 			if (dropped) {
26582546Scarlsonj 				mutex_enter(&nce->nce_lock);
26592546Scarlsonj 				nce->nce_pcnt++;
26600Sstevel@tonic-gate 				mutex_exit(&nce->nce_lock);
26612546Scarlsonj 			}
26622546Scarlsonj 			NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill));
26632546Scarlsonj 		} else if (nce->nce_pcnt < 0) {
26642546Scarlsonj 			/* No hope, delete the nce */
26652546Scarlsonj 			nce->nce_state = ND_UNREACHABLE;
26662546Scarlsonj 			mutex_exit(&nce->nce_lock);
26672546Scarlsonj 			if (ip_debug > 2) {
26682546Scarlsonj 				/* ip1dbg */
26692546Scarlsonj 				pr_addr_dbg("ndp_timer: Delete IRE for"
26702546Scarlsonj 				    " dst %s\n", AF_INET6, &nce->nce_addr);
26712546Scarlsonj 			}
26722546Scarlsonj 			ndp_delete(nce);
26732546Scarlsonj 		} else if (!(nce->nce_flags & NCE_F_PERMANENT)) {
26742546Scarlsonj 			/* Wait RetransTimer, before deleting the entry */
26752546Scarlsonj 			ip2dbg(("ndp_timer: pcount=%x dst %s\n",
26762546Scarlsonj 			    nce->nce_pcnt, inet_ntop(AF_INET6,
26772546Scarlsonj 			    &nce->nce_addr, addrbuf, sizeof (addrbuf))));
26782546Scarlsonj 			mutex_exit(&nce->nce_lock);
26792546Scarlsonj 			/* Wait one interval before killing */
26802546Scarlsonj 			NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time);
26812546Scarlsonj 		} else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) {
26822546Scarlsonj 			ipif_t *ipif;
26832546Scarlsonj 
26842546Scarlsonj 			/*
26852546Scarlsonj 			 * We're done probing, and we can now declare this
26862546Scarlsonj 			 * address to be usable.  Let IP know that it's ok to
26872546Scarlsonj 			 * use.
26882546Scarlsonj 			 */
26892546Scarlsonj 			nce->nce_state = ND_REACHABLE;
26902546Scarlsonj 			mutex_exit(&nce->nce_lock);
26918485SPeter.Memishian@Sun.COM 			ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr,
26928485SPeter.Memishian@Sun.COM 			    nce->nce_ill);
26932546Scarlsonj 			if (ipif != NULL) {
26942546Scarlsonj 				if (ipif->ipif_was_dup) {
26952546Scarlsonj 					char ibuf[LIFNAMSIZ + 10];
26962546Scarlsonj 					char sbuf[INET6_ADDRSTRLEN];
26972546Scarlsonj 
26982546Scarlsonj 					ipif->ipif_was_dup = B_FALSE;
26992546Scarlsonj 					(void) inet_ntop(AF_INET6,
27002546Scarlsonj 					    &ipif->ipif_v6lcl_addr,
27012546Scarlsonj 					    sbuf, sizeof (sbuf));
27024972Smeem 					ipif_get_name(ipif, ibuf,
27034972Smeem 					    sizeof (ibuf));
27042546Scarlsonj 					cmn_err(CE_NOTE, "recovered address "
27052546Scarlsonj 					    "%s on %s", sbuf, ibuf);
27060Sstevel@tonic-gate 				}
27072546Scarlsonj 				if ((ipif->ipif_flags & IPIF_UP) &&
27088023SPhil.Kirk@Sun.COM 				    !ipif->ipif_addr_ready)
27098023SPhil.Kirk@Sun.COM 					ipif_up_notify(ipif);
27102546Scarlsonj 				ipif->ipif_addr_ready = 1;
27112546Scarlsonj 				ipif_refrele(ipif);
27122546Scarlsonj 			}
27132546Scarlsonj 			/* Begin defending our new address */
27142546Scarlsonj 			nce->nce_unsolicit_count = 0;
27158485SPeter.Memishian@Sun.COM 			dropped = nce_xmit_advert(nce, B_FALSE,
27168485SPeter.Memishian@Sun.COM 			    &ipv6_all_hosts_mcast, 0);
27172546Scarlsonj 			if (dropped) {
27182546Scarlsonj 				nce->nce_unsolicit_count = 1;
27190Sstevel@tonic-gate 				NDP_RESTART_TIMER(nce,
27203448Sdh155122 				    ipst->ips_ip_ndp_unsolicit_interval);
27213448Sdh155122 			} else if (ipst->ips_ip_ndp_defense_interval != 0) {
27223448Sdh155122 				NDP_RESTART_TIMER(nce,
27233448Sdh155122 				    ipst->ips_ip_ndp_defense_interval);
27240Sstevel@tonic-gate 			}
27252546Scarlsonj 		} else {
27262546Scarlsonj 			/*
27272546Scarlsonj 			 * This is an address we're probing to be our own, but
27282546Scarlsonj 			 * the ill is down.  Wait until it comes back before
27292546Scarlsonj 			 * doing anything, but switch to reachable state so
27302546Scarlsonj 			 * that the restart will work.
27312546Scarlsonj 			 */
27322546Scarlsonj 			nce->nce_state = ND_REACHABLE;
27332546Scarlsonj 			mutex_exit(&nce->nce_lock);
27340Sstevel@tonic-gate 		}
27350Sstevel@tonic-gate 		NCE_REFRELE(nce);
27360Sstevel@tonic-gate 		return;
27378485SPeter.Memishian@Sun.COM 	case ND_INCOMPLETE: {
27388485SPeter.Memishian@Sun.COM 		ip6_t	*ip6h;
27398485SPeter.Memishian@Sun.COM 		ip6i_t	*ip6i;
27408485SPeter.Memishian@Sun.COM 		mblk_t	*mp, *datamp, *nextmp, **prevmpp;
27418485SPeter.Memishian@Sun.COM 
27420Sstevel@tonic-gate 		/*
27438485SPeter.Memishian@Sun.COM 		 * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp
27448485SPeter.Memishian@Sun.COM 		 * for any IPMP probe packets, and toss 'em.  IPMP probe
27458485SPeter.Memishian@Sun.COM 		 * packets will always be at the head of nce_qd_mp and always
27468485SPeter.Memishian@Sun.COM 		 * have an ip6i_t header, so we can stop at the first queued
27478485SPeter.Memishian@Sun.COM 		 * ND packet without an ip6i_t.
27480Sstevel@tonic-gate 		 */
27498485SPeter.Memishian@Sun.COM 		prevmpp = &nce->nce_qd_mp;
27508485SPeter.Memishian@Sun.COM 		for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) {
27518485SPeter.Memishian@Sun.COM 			nextmp = mp->b_next;
27528485SPeter.Memishian@Sun.COM 			datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp;
27538485SPeter.Memishian@Sun.COM 			ip6h = (ip6_t *)datamp->b_rptr;
27540Sstevel@tonic-gate 			if (ip6h->ip6_nxt != IPPROTO_RAW)
27550Sstevel@tonic-gate 				break;
27560Sstevel@tonic-gate 
27570Sstevel@tonic-gate 			ip6i = (ip6i_t *)ip6h;
27588485SPeter.Memishian@Sun.COM 			if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) {
27598485SPeter.Memishian@Sun.COM 				inet_freemsg(mp);
27608485SPeter.Memishian@Sun.COM 				*prevmpp = nextmp;
27618485SPeter.Memishian@Sun.COM 			} else {
27628485SPeter.Memishian@Sun.COM 				prevmpp = &mp->b_next;
27638485SPeter.Memishian@Sun.COM 			}
27640Sstevel@tonic-gate 		}
2765*9175SSowmini.Varadhan@Sun.COM 		ip_ndp_resolve(nce);
27660Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
27670Sstevel@tonic-gate 		NCE_REFRELE(nce);
27680Sstevel@tonic-gate 		break;
27698485SPeter.Memishian@Sun.COM 	}
27708485SPeter.Memishian@Sun.COM 	case ND_REACHABLE:
27712546Scarlsonj 		if (((nce->nce_flags & NCE_F_UNSOL_ADV) &&
27722546Scarlsonj 		    nce->nce_unsolicit_count != 0) ||
27732546Scarlsonj 		    ((nce->nce_flags & NCE_F_PERMANENT) &&
27743448Sdh155122 		    ipst->ips_ip_ndp_defense_interval != 0)) {
27752546Scarlsonj 			if (nce->nce_unsolicit_count > 0)
27762546Scarlsonj 				nce->nce_unsolicit_count--;
27770Sstevel@tonic-gate 			mutex_exit(&nce->nce_lock);
27788485SPeter.Memishian@Sun.COM 			dropped = nce_xmit_advert(nce, B_FALSE,
27798485SPeter.Memishian@Sun.COM 			    &ipv6_all_hosts_mcast, 0);
27800Sstevel@tonic-gate 			if (dropped) {
27810Sstevel@tonic-gate 				mutex_enter(&nce->nce_lock);
27820Sstevel@tonic-gate 				nce->nce_unsolicit_count++;
27830Sstevel@tonic-gate 				mutex_exit(&nce->nce_lock);
27840Sstevel@tonic-gate 			}
27850Sstevel@tonic-gate 			if (nce->nce_unsolicit_count != 0) {
27860Sstevel@tonic-gate 				NDP_RESTART_TIMER(nce,
27873448Sdh155122 				    ipst->ips_ip_ndp_unsolicit_interval);
27882546Scarlsonj 			} else {
27892546Scarlsonj 				NDP_RESTART_TIMER(nce,
27903448Sdh155122 				    ipst->ips_ip_ndp_defense_interval);
27910Sstevel@tonic-gate 			}
27920Sstevel@tonic-gate 		} else {
27930Sstevel@tonic-gate 			mutex_exit(&nce->nce_lock);
27940Sstevel@tonic-gate 		}
27950Sstevel@tonic-gate 		NCE_REFRELE(nce);
27960Sstevel@tonic-gate 		break;
27970Sstevel@tonic-gate 	default:
27980Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
27990Sstevel@tonic-gate 		NCE_REFRELE(nce);
28000Sstevel@tonic-gate 		break;
28010Sstevel@tonic-gate 	}
28020Sstevel@tonic-gate }
28030Sstevel@tonic-gate 
28040Sstevel@tonic-gate /*
28050Sstevel@tonic-gate  * Set a link layer address from the ll_addr passed in.
28060Sstevel@tonic-gate  * Copy SAP from ill.
28070Sstevel@tonic-gate  */
28080Sstevel@tonic-gate static void
28090Sstevel@tonic-gate nce_set_ll(nce_t *nce, uchar_t *ll_addr)
28100Sstevel@tonic-gate {
28110Sstevel@tonic-gate 	ill_t	*ill = nce->nce_ill;
28120Sstevel@tonic-gate 	uchar_t	*woffset;
28130Sstevel@tonic-gate 
28140Sstevel@tonic-gate 	ASSERT(ll_addr != NULL);
28150Sstevel@tonic-gate 	/* Always called before fast_path_probe */
2816741Smasputra 	ASSERT(nce->nce_fp_mp == NULL);
28170Sstevel@tonic-gate 	if (ill->ill_sap_length != 0) {
28180Sstevel@tonic-gate 		/*
28190Sstevel@tonic-gate 		 * Copy the SAP type specified in the
28200Sstevel@tonic-gate 		 * request into the xmit template.
28210Sstevel@tonic-gate 		 */
28220Sstevel@tonic-gate 		NCE_LL_SAP_COPY(ill, nce->nce_res_mp);
28230Sstevel@tonic-gate 	}
28240Sstevel@tonic-gate 	if (ill->ill_phys_addr_length > 0) {
28250Sstevel@tonic-gate 		/*
28260Sstevel@tonic-gate 		 * The bcopy() below used to be called for the physical address
28270Sstevel@tonic-gate 		 * length rather than the link layer address length. For
28280Sstevel@tonic-gate 		 * ethernet and many other media, the phys_addr and lla are
28290Sstevel@tonic-gate 		 * identical.
28300Sstevel@tonic-gate 		 * However, with xresolv interfaces being introduced, the
28310Sstevel@tonic-gate 		 * phys_addr and lla are no longer the same, and the physical
28320Sstevel@tonic-gate 		 * address may not have any useful meaning, so we use the lla
28330Sstevel@tonic-gate 		 * for IPv6 address resolution and destination addressing.
28340Sstevel@tonic-gate 		 *
28350Sstevel@tonic-gate 		 * For PPP or other interfaces with a zero length
28360Sstevel@tonic-gate 		 * physical address, don't do anything here.
28370Sstevel@tonic-gate 		 * The bcopy() with a zero phys_addr length was previously
28380Sstevel@tonic-gate 		 * a no-op for interfaces with a zero-length physical address.
28390Sstevel@tonic-gate 		 * Using the lla for them would change the way they operate.
28400Sstevel@tonic-gate 		 * Doing nothing in such cases preserves expected behavior.
28410Sstevel@tonic-gate 		 */
28420Sstevel@tonic-gate 		woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
28430Sstevel@tonic-gate 		bcopy(ll_addr, woffset, ill->ill_nd_lla_len);
28440Sstevel@tonic-gate 	}
28450Sstevel@tonic-gate }
28460Sstevel@tonic-gate 
28470Sstevel@tonic-gate static boolean_t
28482546Scarlsonj nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len)
28490Sstevel@tonic-gate {
28500Sstevel@tonic-gate 	ill_t	*ill = nce->nce_ill;
28510Sstevel@tonic-gate 	uchar_t	*ll_offset;
28520Sstevel@tonic-gate 
28530Sstevel@tonic-gate 	ASSERT(nce->nce_res_mp != NULL);
28540Sstevel@tonic-gate 	if (ll_addr == NULL)
28550Sstevel@tonic-gate 		return (B_FALSE);
28560Sstevel@tonic-gate 	ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill);
28572546Scarlsonj 	if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0)
28580Sstevel@tonic-gate 		return (B_TRUE);
28590Sstevel@tonic-gate 	return (B_FALSE);
28600Sstevel@tonic-gate }
28610Sstevel@tonic-gate 
28620Sstevel@tonic-gate /*
28630Sstevel@tonic-gate  * Updates the link layer address or the reachability state of
28640Sstevel@tonic-gate  * a cache entry.  Reset probe counter if needed.
28650Sstevel@tonic-gate  */
28660Sstevel@tonic-gate static void
28670Sstevel@tonic-gate nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr)
28680Sstevel@tonic-gate {
28690Sstevel@tonic-gate 	ill_t	*ill = nce->nce_ill;
28700Sstevel@tonic-gate 	boolean_t need_stop_timer = B_FALSE;
28710Sstevel@tonic-gate 	boolean_t need_fastpath_update = B_FALSE;
28720Sstevel@tonic-gate 
28730Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&nce->nce_lock));
28742535Ssangeeta 	ASSERT(nce->nce_ipversion == IPV6_VERSION);
28750Sstevel@tonic-gate 	/*
28760Sstevel@tonic-gate 	 * If this interface does not do NUD, there is no point
28770Sstevel@tonic-gate 	 * in allowing an update to the cache entry.  Although
28780Sstevel@tonic-gate 	 * we will respond to NS.
28790Sstevel@tonic-gate 	 * The only time we accept an update for a resolver when
28800Sstevel@tonic-gate 	 * NUD is turned off is when it has just been created.
28810Sstevel@tonic-gate 	 * Non-Resolvers will always be created as REACHABLE.
28820Sstevel@tonic-gate 	 */
28830Sstevel@tonic-gate 	if (new_state != ND_UNCHANGED) {
28840Sstevel@tonic-gate 		if ((nce->nce_flags & NCE_F_NONUD) &&
28850Sstevel@tonic-gate 		    (nce->nce_state != ND_INCOMPLETE))
28860Sstevel@tonic-gate 			return;
28870Sstevel@tonic-gate 		ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN);
28880Sstevel@tonic-gate 		ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX);
28890Sstevel@tonic-gate 		need_stop_timer = B_TRUE;
28900Sstevel@tonic-gate 		if (new_state == ND_REACHABLE)
28910Sstevel@tonic-gate 			nce->nce_last = TICK_TO_MSEC(lbolt64);
28920Sstevel@tonic-gate 		else {
28930Sstevel@tonic-gate 			/* We force NUD in this case */
28940Sstevel@tonic-gate 			nce->nce_last = 0;
28950Sstevel@tonic-gate 		}
28960Sstevel@tonic-gate 		nce->nce_state = new_state;
28970Sstevel@tonic-gate 		nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
28980Sstevel@tonic-gate 	}
28990Sstevel@tonic-gate 	/*
29000Sstevel@tonic-gate 	 * In case of fast path we need to free the the fastpath
29010Sstevel@tonic-gate 	 * M_DATA and do another probe.  Otherwise we can just
29020Sstevel@tonic-gate 	 * overwrite the DL_UNITDATA_REQ data, noting we'll lose
29030Sstevel@tonic-gate 	 * whatever packets that happens to be transmitting at the time.
29040Sstevel@tonic-gate 	 */
29050Sstevel@tonic-gate 	if (new_ll_addr != NULL) {
29060Sstevel@tonic-gate 		ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) +
29070Sstevel@tonic-gate 		    ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr);
29080Sstevel@tonic-gate 		bcopy(new_ll_addr, nce->nce_res_mp->b_rptr +
29090Sstevel@tonic-gate 		    NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len);
29100Sstevel@tonic-gate 		if (nce->nce_fp_mp != NULL) {
29110Sstevel@tonic-gate 			freemsg(nce->nce_fp_mp);
29120Sstevel@tonic-gate 			nce->nce_fp_mp = NULL;
29130Sstevel@tonic-gate 		}
2914741Smasputra 		need_fastpath_update = B_TRUE;
29150Sstevel@tonic-gate 	}
29160Sstevel@tonic-gate 	mutex_exit(&nce->nce_lock);
29170Sstevel@tonic-gate 	if (need_stop_timer) {
29180Sstevel@tonic-gate 		(void) untimeout(nce->nce_timeout_id);
29190Sstevel@tonic-gate 		nce->nce_timeout_id = 0;
29200Sstevel@tonic-gate 	}
29210Sstevel@tonic-gate 	if (need_fastpath_update)
29220Sstevel@tonic-gate 		nce_fastpath(nce);
29230Sstevel@tonic-gate 	mutex_enter(&nce->nce_lock);
29240Sstevel@tonic-gate }
29250Sstevel@tonic-gate 
29262535Ssangeeta void
29272535Ssangeeta nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert)
29282535Ssangeeta {
29292535Ssangeeta 	uint_t	count = 0;
29308485SPeter.Memishian@Sun.COM 	mblk_t  **mpp, *tmp;
29312535Ssangeeta 
29322535Ssangeeta 	ASSERT(MUTEX_HELD(&nce->nce_lock));
29332535Ssangeeta 
29348485SPeter.Memishian@Sun.COM 	for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) {
29358485SPeter.Memishian@Sun.COM 		if (++count > nce->nce_ill->ill_max_buf) {
29368485SPeter.Memishian@Sun.COM 			tmp = nce->nce_qd_mp->b_next;
29372535Ssangeeta 			nce->nce_qd_mp->b_next = NULL;
29382535Ssangeeta 			nce->nce_qd_mp->b_prev = NULL;
29392535Ssangeeta 			freemsg(nce->nce_qd_mp);
29402535Ssangeeta 			nce->nce_qd_mp = tmp;
29412535Ssangeeta 		}
29422535Ssangeeta 	}
29438485SPeter.Memishian@Sun.COM 
29442535Ssangeeta 	if (head_insert) {
29452535Ssangeeta 		mp->b_next = nce->nce_qd_mp;
29462535Ssangeeta 		nce->nce_qd_mp = mp;
29472535Ssangeeta 	} else {
29482535Ssangeeta 		*mpp = mp;
29492535Ssangeeta 	}
29502535Ssangeeta }
29512535Ssangeeta 
29520Sstevel@tonic-gate static void
29530Sstevel@tonic-gate nce_queue_mp(nce_t *nce, mblk_t *mp)
29540Sstevel@tonic-gate {
29550Sstevel@tonic-gate 	boolean_t head_insert = B_FALSE;
29560Sstevel@tonic-gate 	ip6_t	*ip6h;
29578485SPeter.Memishian@Sun.COM 	ip6i_t  *ip6i;
29588485SPeter.Memishian@Sun.COM 	mblk_t	*data_mp;
29590Sstevel@tonic-gate 
29600Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&nce->nce_lock));
29610Sstevel@tonic-gate 
29620Sstevel@tonic-gate 	if (mp->b_datap->db_type == M_CTL)
29630Sstevel@tonic-gate 		data_mp = mp->b_cont;
29640Sstevel@tonic-gate 	else
29650Sstevel@tonic-gate 		data_mp = mp;
29660Sstevel@tonic-gate 	ip6h = (ip6_t *)data_mp->b_rptr;
29670Sstevel@tonic-gate 	if (ip6h->ip6_nxt == IPPROTO_RAW) {
29680Sstevel@tonic-gate 		/*
29690Sstevel@tonic-gate 		 * This message should have been pulled up already in
29700Sstevel@tonic-gate 		 * ip_wput_v6. We can't do pullups here because the message
29710Sstevel@tonic-gate 		 * could be from the nce_qd_mp which could have b_next/b_prev
29720Sstevel@tonic-gate 		 * non-NULL.
29730Sstevel@tonic-gate 		 */
29740Sstevel@tonic-gate 		ip6i = (ip6i_t *)ip6h;
29758485SPeter.Memishian@Sun.COM 		ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN);
29768485SPeter.Memishian@Sun.COM 
29770Sstevel@tonic-gate 		/*
29788485SPeter.Memishian@Sun.COM 		 * If this packet is marked IP6I_IPMP_PROBE, then we need to:
29798485SPeter.Memishian@Sun.COM 		 *
29808485SPeter.Memishian@Sun.COM 		 *   1. Insert it at the head of the nce_qd_mp list.  Consider
29818485SPeter.Memishian@Sun.COM 		 *	the normal (non-probe) load-speading case where the
29828485SPeter.Memishian@Sun.COM 		 *	source address of the ND packet is not tied to nce_ill.
29838485SPeter.Memishian@Sun.COM 		 *	If the ill bound to the source address cannot receive,
29848485SPeter.Memishian@Sun.COM 		 *	the response to the ND packet will not be received.
29858485SPeter.Memishian@Sun.COM 		 *	However, if ND packets for nce_ill's probes are queued
29868485SPeter.Memishian@Sun.COM 		 *	behind that ND packet, those probes will also fail to
29878485SPeter.Memishian@Sun.COM 		 *	be sent, and thus in.mpathd will erroneously conclude
29888485SPeter.Memishian@Sun.COM 		 *	that nce_ill has also failed.
29898485SPeter.Memishian@Sun.COM 		 *
29908485SPeter.Memishian@Sun.COM 		 *   2. Drop the probe packet in ndp_timer() if the ND did
29918485SPeter.Memishian@Sun.COM 		 *	not succeed on the first attempt.  This ensures that
29928485SPeter.Memishian@Sun.COM 		 *	ND problems do not manifest as probe RTT spikes.
29930Sstevel@tonic-gate 		 */
29948485SPeter.Memishian@Sun.COM 		if (ip6i->ip6i_flags & IP6I_IPMP_PROBE)
29950Sstevel@tonic-gate 			head_insert = B_TRUE;
29960Sstevel@tonic-gate 	}
29972535Ssangeeta 	nce_queue_mp_common(nce, mp, head_insert);
29980Sstevel@tonic-gate }
29990Sstevel@tonic-gate 
30000Sstevel@tonic-gate /*
30010Sstevel@tonic-gate  * Called when address resolution failed due to a timeout.
30020Sstevel@tonic-gate  * Send an ICMP unreachable in response to all queued packets.
30030Sstevel@tonic-gate  */
30040Sstevel@tonic-gate void
30050Sstevel@tonic-gate nce_resolv_failed(nce_t *nce)
30060Sstevel@tonic-gate {
30070Sstevel@tonic-gate 	mblk_t	*mp, *nxt_mp, *first_mp;
30080Sstevel@tonic-gate 	char	buf[INET6_ADDRSTRLEN];
30090Sstevel@tonic-gate 	ip6_t *ip6h;
30100Sstevel@tonic-gate 	zoneid_t zoneid = GLOBAL_ZONEID;
30113448Sdh155122 	ip_stack_t	*ipst = nce->nce_ill->ill_ipst;
30120Sstevel@tonic-gate 
30130Sstevel@tonic-gate 	ip1dbg(("nce_resolv_failed: dst %s\n",
30140Sstevel@tonic-gate 	    inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf))));
30150Sstevel@tonic-gate 	mutex_enter(&nce->nce_lock);
30160Sstevel@tonic-gate 	mp = nce->nce_qd_mp;
30170Sstevel@tonic-gate 	nce->nce_qd_mp = NULL;
30180Sstevel@tonic-gate 	mutex_exit(&nce->nce_lock);
30190Sstevel@tonic-gate 	while (mp != NULL) {
30200Sstevel@tonic-gate 		nxt_mp = mp->b_next;
30210Sstevel@tonic-gate 		mp->b_next = NULL;
30220Sstevel@tonic-gate 		mp->b_prev = NULL;
30230Sstevel@tonic-gate 
30240Sstevel@tonic-gate 		first_mp = mp;
30250Sstevel@tonic-gate 		if (mp->b_datap->db_type == M_CTL) {
30260Sstevel@tonic-gate 			ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr;
30270Sstevel@tonic-gate 			ASSERT(io->ipsec_out_type == IPSEC_OUT);
30280Sstevel@tonic-gate 			zoneid = io->ipsec_out_zoneid;
30290Sstevel@tonic-gate 			ASSERT(zoneid != ALL_ZONES);
30300Sstevel@tonic-gate 			mp = mp->b_cont;
30316851Skp158701 			mp->b_next = NULL;
30326851Skp158701 			mp->b_prev = NULL;
30330Sstevel@tonic-gate 		}
30340Sstevel@tonic-gate 
30350Sstevel@tonic-gate 		ip6h = (ip6_t *)mp->b_rptr;
30360Sstevel@tonic-gate 		if (ip6h->ip6_nxt == IPPROTO_RAW) {
30370Sstevel@tonic-gate 			ip6i_t *ip6i;
30380Sstevel@tonic-gate 			/*
30390Sstevel@tonic-gate 			 * This message should have been pulled up already
30400Sstevel@tonic-gate 			 * in ip_wput_v6. ip_hdr_complete_v6 assumes that
30410Sstevel@tonic-gate 			 * the header is pulled up.
30420Sstevel@tonic-gate 			 */
30430Sstevel@tonic-gate 			ip6i = (ip6i_t *)ip6h;
30440Sstevel@tonic-gate 			ASSERT((mp->b_wptr - (uchar_t *)ip6i) >=
30450Sstevel@tonic-gate 			    sizeof (ip6i_t) + IPV6_HDR_LEN);
30460Sstevel@tonic-gate 			mp->b_rptr += sizeof (ip6i_t);
30470Sstevel@tonic-gate 		}
30480Sstevel@tonic-gate 		/*
30490Sstevel@tonic-gate 		 * Ignore failure since icmp_unreachable_v6 will silently
30500Sstevel@tonic-gate 		 * drop packets with an unspecified source address.
30510Sstevel@tonic-gate 		 */
30523448Sdh155122 		(void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst);
30530Sstevel@tonic-gate 		icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp,
30543448Sdh155122 		    ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst);
30550Sstevel@tonic-gate 		mp = nxt_mp;
30560Sstevel@tonic-gate 	}
3057*9175SSowmini.Varadhan@Sun.COM 	nce_cb_dispatch(nce);
30580Sstevel@tonic-gate }
30590Sstevel@tonic-gate 
30600Sstevel@tonic-gate /*
30610Sstevel@tonic-gate  * Called by SIOCSNDP* ioctl to add/change an nce entry
30620Sstevel@tonic-gate  * and the corresponding attributes.
30630Sstevel@tonic-gate  * Disallow states other than ND_REACHABLE or ND_STALE.
30640Sstevel@tonic-gate  */
30650Sstevel@tonic-gate int
30660Sstevel@tonic-gate ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr)
30670Sstevel@tonic-gate {
30680Sstevel@tonic-gate 	sin6_t		*sin6;
30690Sstevel@tonic-gate 	in6_addr_t	*addr;
30700Sstevel@tonic-gate 	nce_t		*nce;
30710Sstevel@tonic-gate 	int		err;
30720Sstevel@tonic-gate 	uint16_t	new_flags = 0;
30730Sstevel@tonic-gate 	uint16_t	old_flags = 0;
30740Sstevel@tonic-gate 	int		inflags = lnr->lnr_flags;
30753448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
30760Sstevel@tonic-gate 
30772535Ssangeeta 	ASSERT(ill->ill_isv6);
30780Sstevel@tonic-gate 	if ((lnr->lnr_state_create != ND_REACHABLE) &&
30790Sstevel@tonic-gate 	    (lnr->lnr_state_create != ND_STALE))
30800Sstevel@tonic-gate 		return (EINVAL);
30810Sstevel@tonic-gate 
30828485SPeter.Memishian@Sun.COM 	if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN)
30838485SPeter.Memishian@Sun.COM 		return (EINVAL);
30848485SPeter.Memishian@Sun.COM 
30850Sstevel@tonic-gate 	sin6 = (sin6_t *)&lnr->lnr_addr;
30860Sstevel@tonic-gate 	addr = &sin6->sin6_addr;
30870Sstevel@tonic-gate 
30883448Sdh155122 	mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
30890Sstevel@tonic-gate 	/* We know it can not be mapping so just look in the hash table */
30903448Sdh155122 	nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr));
30918485SPeter.Memishian@Sun.COM 	/* See comment in ndp_query() regarding IS_IPMP(ill) usage */
30928485SPeter.Memishian@Sun.COM 	nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce);
30930Sstevel@tonic-gate 	if (nce != NULL)
30940Sstevel@tonic-gate 		new_flags = nce->nce_flags;
30950Sstevel@tonic-gate 
30960Sstevel@tonic-gate 	switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) {
30970Sstevel@tonic-gate 	case NDF_ISROUTER_ON:
30980Sstevel@tonic-gate 		new_flags |= NCE_F_ISROUTER;
30990Sstevel@tonic-gate 		break;
31000Sstevel@tonic-gate 	case NDF_ISROUTER_OFF:
31010Sstevel@tonic-gate 		new_flags &= ~NCE_F_ISROUTER;
31020Sstevel@tonic-gate 		break;
31030Sstevel@tonic-gate 	case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON):
31043448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
31050Sstevel@tonic-gate 		if (nce != NULL)
31060Sstevel@tonic-gate 			NCE_REFRELE(nce);
31070Sstevel@tonic-gate 		return (EINVAL);
31080Sstevel@tonic-gate 	}
31090Sstevel@tonic-gate 
31100Sstevel@tonic-gate 	switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) {
31110Sstevel@tonic-gate 	case NDF_ANYCAST_ON:
31120Sstevel@tonic-gate 		new_flags |= NCE_F_ANYCAST;
31130Sstevel@tonic-gate 		break;
31140Sstevel@tonic-gate 	case NDF_ANYCAST_OFF:
31150Sstevel@tonic-gate 		new_flags &= ~NCE_F_ANYCAST;
31160Sstevel@tonic-gate 		break;
31170Sstevel@tonic-gate 	case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON):
31183448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
31190Sstevel@tonic-gate 		if (nce != NULL)
31200Sstevel@tonic-gate 			NCE_REFRELE(nce);
31210Sstevel@tonic-gate 		return (EINVAL);
31220Sstevel@tonic-gate 	}
31230Sstevel@tonic-gate 
31240Sstevel@tonic-gate 	if (nce == NULL) {
31254714Ssowmini 		err = ndp_add_v6(ill,
31260Sstevel@tonic-gate 		    (uchar_t *)lnr->lnr_hdw_addr,
31270Sstevel@tonic-gate 		    addr,
31280Sstevel@tonic-gate 		    &ipv6_all_ones,
31290Sstevel@tonic-gate 		    &ipv6_all_zeros,
31300Sstevel@tonic-gate 		    0,
31310Sstevel@tonic-gate 		    new_flags,
31320Sstevel@tonic-gate 		    lnr->lnr_state_create,
31334714Ssowmini 		    &nce);
31340Sstevel@tonic-gate 		if (err != 0) {
31353448Sdh155122 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
31360Sstevel@tonic-gate 			ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err));
31370Sstevel@tonic-gate 			return (err);
31380Sstevel@tonic-gate 		}
31390Sstevel@tonic-gate 	}
31400Sstevel@tonic-gate 	old_flags = nce->nce_flags;
31410Sstevel@tonic-gate 	if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) {
31420Sstevel@tonic-gate 		/*
31430Sstevel@tonic-gate 		 * Router turned to host, delete all ires.
31440Sstevel@tonic-gate 		 * XXX Just delete the entry, but we need to add too.
31450Sstevel@tonic-gate 		 */
31460Sstevel@tonic-gate 		nce->nce_flags &= ~NCE_F_ISROUTER;
31473448Sdh155122 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
31480Sstevel@tonic-gate 		ndp_delete(nce);
31490Sstevel@tonic-gate 		NCE_REFRELE(nce);
31500Sstevel@tonic-gate 		return (0);
31510Sstevel@tonic-gate 	}
31523448Sdh155122 	mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
31530Sstevel@tonic-gate 
31540Sstevel@tonic-gate 	mutex_enter(&nce->nce_lock);
31550Sstevel@tonic-gate 	nce->nce_flags = new_flags;
31560Sstevel@tonic-gate 	mutex_exit(&nce->nce_lock);
31570Sstevel@tonic-gate 	/*
31580Sstevel@tonic-gate 	 * Note that we ignore the state at this point, which
31590Sstevel@tonic-gate 	 * should be either STALE or REACHABLE.  Instead we let
31600Sstevel@tonic-gate 	 * the link layer address passed in to determine the state
31610Sstevel@tonic-gate 	 * much like incoming packets.
31620Sstevel@tonic-gate 	 */
31638485SPeter.Memishian@Sun.COM 	nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE);
31640Sstevel@tonic-gate 	NCE_REFRELE(nce);
31650Sstevel@tonic-gate 	return (0);
31660Sstevel@tonic-gate }
31670Sstevel@tonic-gate 
31680Sstevel@tonic-gate /*
31690Sstevel@tonic-gate  * If the device driver supports it, we make nce_fp_mp to have
31700Sstevel@tonic-gate  * an M_DATA prepend.  Otherwise nce_fp_mp will be null.
31714714Ssowmini  * The caller ensures there is hold on nce for this function.
31720Sstevel@tonic-gate  * Note that since ill_fastpath_probe() copies the mblk there is
31730Sstevel@tonic-gate  * no need for the hold beyond this function.
31740Sstevel@tonic-gate  */
31753425Ssowmini void
31760Sstevel@tonic-gate nce_fastpath(nce_t *nce)
31770Sstevel@tonic-gate {
31780Sstevel@tonic-gate 	ill_t	*ill = nce->nce_ill;
31790Sstevel@tonic-gate 	int res;
31800Sstevel@tonic-gate 
31810Sstevel@tonic-gate 	ASSERT(ill != NULL);
31824714Ssowmini 	ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE);
31834714Ssowmini 
31844714Ssowmini 	if (nce->nce_fp_mp != NULL) {
31854714Ssowmini 		/* Already contains fastpath info */
31860Sstevel@tonic-gate 		return;
31870Sstevel@tonic-gate 	}
31880Sstevel@tonic-gate 	if (nce->nce_res_mp != NULL) {
31890Sstevel@tonic-gate 		nce_fastpath_list_add(nce);
31900Sstevel@tonic-gate 		res = ill_fastpath_probe(ill, nce->nce_res_mp);
31910Sstevel@tonic-gate 		/*
31920Sstevel@tonic-gate 		 * EAGAIN is an indication of a transient error
31930Sstevel@tonic-gate 		 * i.e. allocation failure etc. leave the nce in the list it
31940Sstevel@tonic-gate 		 * will be updated when another probe happens for another ire
31950Sstevel@tonic-gate 		 * if not it will be taken out of the list when the ire is
31960Sstevel@tonic-gate 		 * deleted.
31970Sstevel@tonic-gate 		 */
31980Sstevel@tonic-gate 
31990Sstevel@tonic-gate 		if (res != 0 && res != EAGAIN)
32000Sstevel@tonic-gate 			nce_fastpath_list_delete(nce);
32010Sstevel@tonic-gate 	}
32020Sstevel@tonic-gate }
32030Sstevel@tonic-gate 
32040Sstevel@tonic-gate /*
32050Sstevel@tonic-gate  * Drain the list of nce's waiting for fastpath response.
32060Sstevel@tonic-gate  */
32070Sstevel@tonic-gate void
32080Sstevel@tonic-gate nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void  *),
32090Sstevel@tonic-gate     void *arg)
32100Sstevel@tonic-gate {
32110Sstevel@tonic-gate 
32120Sstevel@tonic-gate 	nce_t *next_nce;
32130Sstevel@tonic-gate 	nce_t *current_nce;
32140Sstevel@tonic-gate 	nce_t *first_nce;
32150Sstevel@tonic-gate 	nce_t *prev_nce = NULL;
32160Sstevel@tonic-gate 
32170Sstevel@tonic-gate 	mutex_enter(&ill->ill_lock);
32180Sstevel@tonic-gate 	first_nce = current_nce = (nce_t *)ill->ill_fastpath_list;
32190Sstevel@tonic-gate 	while (current_nce != (nce_t *)&ill->ill_fastpath_list) {
32200Sstevel@tonic-gate 		next_nce = current_nce->nce_fastpath;
32210Sstevel@tonic-gate 		/*
32220Sstevel@tonic-gate 		 * Take it off the list if we're flushing, or if the callback
32230Sstevel@tonic-gate 		 * routine tells us to do so.  Otherwise, leave the nce in the
32240Sstevel@tonic-gate 		 * fastpath list to handle any pending response from the lower
32250Sstevel@tonic-gate 		 * layer.  We can't drain the list when the callback routine
32260Sstevel@tonic-gate 		 * comparison failed, because the response is asynchronous in
32270Sstevel@tonic-gate 		 * nature, and may not arrive in the same order as the list
32280Sstevel@tonic-gate 		 * insertion.
32290Sstevel@tonic-gate 		 */
32300Sstevel@tonic-gate 		if (func == NULL || func(current_nce, arg)) {
32310Sstevel@tonic-gate 			current_nce->nce_fastpath = NULL;
32320Sstevel@tonic-gate 			if (current_nce == first_nce)
32330Sstevel@tonic-gate 				ill->ill_fastpath_list = first_nce = next_nce;
32340Sstevel@tonic-gate 			else
32350Sstevel@tonic-gate 				prev_nce->nce_fastpath = next_nce;
32360Sstevel@tonic-gate 		} else {
32370Sstevel@tonic-gate 			/* previous element that is still in the list */
32380Sstevel@tonic-gate 			prev_nce = current_nce;
32390Sstevel@tonic-gate 		}
32400Sstevel@tonic-gate 		current_nce = next_nce;
32410Sstevel@tonic-gate 	}
32420Sstevel@tonic-gate 	mutex_exit(&ill->ill_lock);
32430Sstevel@tonic-gate }
32440Sstevel@tonic-gate 
32450Sstevel@tonic-gate /*
32460Sstevel@tonic-gate  * Add nce to the nce fastpath list.
32470Sstevel@tonic-gate  */
32480Sstevel@tonic-gate void
32490Sstevel@tonic-gate nce_fastpath_list_add(nce_t *nce)
32500Sstevel@tonic-gate {
32510Sstevel@tonic-gate 	ill_t *ill;
32520Sstevel@tonic-gate 
32530Sstevel@tonic-gate 	ill = nce->nce_ill;
32540Sstevel@tonic-gate 
32550Sstevel@tonic-gate 	mutex_enter(&ill->ill_lock);
32560Sstevel@tonic-gate 	mutex_enter(&nce->nce_lock);
32570Sstevel@tonic-gate 
32580Sstevel@tonic-gate 	/*
32590Sstevel@tonic-gate 	 * if nce has not been deleted and
32600Sstevel@tonic-gate 	 * is not already in the list add it.
32610Sstevel@tonic-gate 	 */
32620Sstevel@tonic-gate 	if (!(nce->nce_flags & NCE_F_CONDEMNED) &&
32630Sstevel@tonic-gate 	    (nce->nce_fastpath == NULL)) {
32640Sstevel@tonic-gate 		nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list;
32650Sstevel@tonic-gate 		ill->ill_fastpath_list = nce;
32660Sstevel@tonic-gate 	}
32670Sstevel@tonic-gate 
32680Sstevel@tonic-gate 	mutex_exit(&nce->nce_lock);
32690Sstevel@tonic-gate 	mutex_exit(&ill->ill_lock);
32700Sstevel@tonic-gate }
32710Sstevel@tonic-gate 
32720Sstevel@tonic-gate /*
32730Sstevel@tonic-gate  * remove nce from the nce fastpath list.
32740Sstevel@tonic-gate  */
32750Sstevel@tonic-gate void
32760Sstevel@tonic-gate nce_fastpath_list_delete(nce_t *nce)
32770Sstevel@tonic-gate {
32780Sstevel@tonic-gate 	nce_t *nce_ptr;
32790Sstevel@tonic-gate 
32800Sstevel@tonic-gate 	ill_t *ill;
32810Sstevel@tonic-gate 
32820Sstevel@tonic-gate 	ill = nce->nce_ill;
32830Sstevel@tonic-gate 	ASSERT(ill != NULL);
32840Sstevel@tonic-gate 
32850Sstevel@tonic-gate 	mutex_enter(&ill->ill_lock);
32860Sstevel@tonic-gate 	if (nce->nce_fastpath == NULL)
32870Sstevel@tonic-gate 		goto done;
32880Sstevel@tonic-gate 
32890Sstevel@tonic-gate 	ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list);
32900Sstevel@tonic-gate 
32910Sstevel@tonic-gate 	if (ill->ill_fastpath_list == nce) {
32920Sstevel@tonic-gate 		ill->ill_fastpath_list = nce->nce_fastpath;
32930Sstevel@tonic-gate 	} else {
32940Sstevel@tonic-gate 		nce_ptr = ill->ill_fastpath_list;
32950Sstevel@tonic-gate 		while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) {
32960Sstevel@tonic-gate 			if (nce_ptr->nce_fastpath == nce) {
32970Sstevel@tonic-gate 				nce_ptr->nce_fastpath = nce->nce_fastpath;
32980Sstevel@tonic-gate 				break;
32990Sstevel@tonic-gate 			}
33000Sstevel@tonic-gate 			nce_ptr = nce_ptr->nce_fastpath;
33010Sstevel@tonic-gate 		}
33020Sstevel@tonic-gate 	}
33030Sstevel@tonic-gate 
33040Sstevel@tonic-gate 	nce->nce_fastpath = NULL;
33050Sstevel@tonic-gate done:
33060Sstevel@tonic-gate 	mutex_exit(&ill->ill_lock);
33070Sstevel@tonic-gate }
33080Sstevel@tonic-gate 
33090Sstevel@tonic-gate /*
33100Sstevel@tonic-gate  * Update all NCE's that are not in fastpath mode and
33110Sstevel@tonic-gate  * have an nce_fp_mp that matches mp. mp->b_cont contains
33120Sstevel@tonic-gate  * the fastpath header.
33130Sstevel@tonic-gate  *
33140Sstevel@tonic-gate  * Returns TRUE if entry should be dequeued, or FALSE otherwise.
33150Sstevel@tonic-gate  */
33160Sstevel@tonic-gate boolean_t
33170Sstevel@tonic-gate ndp_fastpath_update(nce_t *nce, void *arg)
33180Sstevel@tonic-gate {
33190Sstevel@tonic-gate 	mblk_t 	*mp, *fp_mp;
33200Sstevel@tonic-gate 	uchar_t	*mp_rptr, *ud_mp_rptr;
33210Sstevel@tonic-gate 	mblk_t	*ud_mp = nce->nce_res_mp;
33220Sstevel@tonic-gate 	ptrdiff_t	cmplen;
33230Sstevel@tonic-gate 
33240Sstevel@tonic-gate 	if (nce->nce_flags & NCE_F_MAPPING)
33250Sstevel@tonic-gate 		return (B_TRUE);
33260Sstevel@tonic-gate 	if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL))
33270Sstevel@tonic-gate 		return (B_TRUE);
33280Sstevel@tonic-gate 
33290Sstevel@tonic-gate 	ip2dbg(("ndp_fastpath_update: trying\n"));
33300Sstevel@tonic-gate 	mp = (mblk_t *)arg;
33310Sstevel@tonic-gate 	mp_rptr = mp->b_rptr;
33320Sstevel@tonic-gate 	cmplen = mp->b_wptr - mp_rptr;
33330Sstevel@tonic-gate 	ASSERT(cmplen >= 0);
33340Sstevel@tonic-gate 	ud_mp_rptr = ud_mp->b_rptr;
33350Sstevel@tonic-gate 	/*
33360Sstevel@tonic-gate 	 * The nce is locked here to prevent any other threads
33370Sstevel@tonic-gate 	 * from accessing and changing nce_res_mp when the IPv6 address
33380Sstevel@tonic-gate 	 * becomes resolved to an lla while we're in the middle
33390Sstevel@tonic-gate 	 * of looking at and comparing the hardware address (lla).
33400Sstevel@tonic-gate 	 * It is also locked to prevent multiple threads in nce_fastpath_update
33410Sstevel@tonic-gate 	 * from examining nce_res_mp atthe same time.
33420Sstevel@tonic-gate 	 */
33430Sstevel@tonic-gate 	mutex_enter(&nce->nce_lock);
33440Sstevel@tonic-gate 	if (ud_mp->b_wptr - ud_mp_rptr != cmplen ||
33450Sstevel@tonic-gate 	    bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) {
33460Sstevel@tonic-gate 		mutex_exit(&nce->nce_lock);
33470Sstevel@tonic-gate 		/*
33480Sstevel@tonic-gate 		 * Don't take the ire off the fastpath list yet,
33490Sstevel@tonic-gate 		 * since the response may come later.
33500Sstevel@tonic-gate 		 */
33510Sstevel@tonic-gate 		return (B_FALSE);
33520Sstevel@tonic-gate 	}
33530Sstevel@tonic-gate 	/* Matched - install mp as the fastpath mp */
33540Sstevel@tonic-gate 	ip1dbg(("ndp_fastpath_update: match\n"));
33550Sstevel@tonic-gate 	fp_mp = dupb(mp->b_cont);
33560Sstevel@tonic-gate 	if (fp_mp != NULL) {
33570Sstevel@tonic-gate 		nce->nce_fp_mp = fp_mp;
33580Sstevel@tonic-gate 	}
33590Sstevel@tonic-gate 	mutex_exit(&nce->nce_lock);
33600Sstevel@tonic-gate 	return (B_TRUE);
33610Sstevel@tonic-gate }
33620Sstevel@tonic-gate 
33630Sstevel@tonic-gate /*
33640Sstevel@tonic-gate  * This function handles the DL_NOTE_FASTPATH_FLUSH notification from
33650Sstevel@tonic-gate  * driver.  Note that it assumes IP is exclusive...
33660Sstevel@tonic-gate  */
33670Sstevel@tonic-gate /* ARGSUSED */
33680Sstevel@tonic-gate void
33690Sstevel@tonic-gate ndp_fastpath_flush(nce_t *nce, char *arg)
33700Sstevel@tonic-gate {
33710Sstevel@tonic-gate 	if (nce->nce_flags & NCE_F_MAPPING)
33720Sstevel@tonic-gate 		return;
33730Sstevel@tonic-gate 	/* No fastpath info? */
33740Sstevel@tonic-gate 	if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL)
33750Sstevel@tonic-gate 		return;
33760Sstevel@tonic-gate 
33773425Ssowmini 	if (nce->nce_ipversion == IPV4_VERSION &&
33783425Ssowmini 	    nce->nce_flags & NCE_F_BCAST) {
33793425Ssowmini 		/*
33803425Ssowmini 		 * IPv4 BROADCAST entries:
33813425Ssowmini 		 * We can't delete the nce since it is difficult to
33823425Ssowmini 		 * recreate these without going through the
33833425Ssowmini 		 * ipif down/up dance.
33843425Ssowmini 		 *
33853425Ssowmini 		 * All access to nce->nce_fp_mp in the case of these
33863425Ssowmini 		 * is protected by nce_lock.
33873425Ssowmini 		 */
33883425Ssowmini 		mutex_enter(&nce->nce_lock);
33893425Ssowmini 		if (nce->nce_fp_mp != NULL) {
33903425Ssowmini 			freeb(nce->nce_fp_mp);
33913425Ssowmini 			nce->nce_fp_mp = NULL;
33923425Ssowmini 			mutex_exit(&nce->nce_lock);
33933425Ssowmini 			nce_fastpath(nce);
33943425Ssowmini 		} else {
33953425Ssowmini 			mutex_exit(&nce->nce_lock);
33963425Ssowmini 		}
33973425Ssowmini 	} else {
33983425Ssowmini 		/* Just delete the NCE... */
33993425Ssowmini 		ndp_delete(nce);
34003425Ssowmini 	}
34010Sstevel@tonic-gate }
34020Sstevel@tonic-gate 
34030Sstevel@tonic-gate /*
34040Sstevel@tonic-gate  * Return a pointer to a given option in the packet.
34050Sstevel@tonic-gate  * Assumes that option part of the packet have already been validated.
34060Sstevel@tonic-gate  */
34070Sstevel@tonic-gate nd_opt_hdr_t *
34080Sstevel@tonic-gate ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type)
34090Sstevel@tonic-gate {
34100Sstevel@tonic-gate 	while (optlen > 0) {
34110Sstevel@tonic-gate 		if (opt->nd_opt_type == opt_type)
34120Sstevel@tonic-gate 			return (opt);
34130Sstevel@tonic-gate 		optlen -= 8 * opt->nd_opt_len;
34140Sstevel@tonic-gate 		opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len);
34150Sstevel@tonic-gate 	}
34160Sstevel@tonic-gate 	return (NULL);
34170Sstevel@tonic-gate }
34180Sstevel@tonic-gate 
34190Sstevel@tonic-gate /*
34200Sstevel@tonic-gate  * Verify all option lengths present are > 0, also check to see
34210Sstevel@tonic-gate  * if the option lengths and packet length are consistent.
34220Sstevel@tonic-gate  */
34230Sstevel@tonic-gate boolean_t
34240Sstevel@tonic-gate ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen)
34250Sstevel@tonic-gate {
34260Sstevel@tonic-gate 	ASSERT(opt != NULL);
34270Sstevel@tonic-gate 	while (optlen > 0) {
34280Sstevel@tonic-gate 		if (opt->nd_opt_len == 0)
34290Sstevel@tonic-gate 			return (B_FALSE);
34300Sstevel@tonic-gate 		optlen -= 8 * opt->nd_opt_len;
34310Sstevel@tonic-gate 		if (optlen < 0)
34320Sstevel@tonic-gate 			return (B_FALSE);
34330Sstevel@tonic-gate 		opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len);
34340Sstevel@tonic-gate 	}
34350Sstevel@tonic-gate 	return (B_TRUE);
34360Sstevel@tonic-gate }
34370Sstevel@tonic-gate 
34380Sstevel@tonic-gate /*
34390Sstevel@tonic-gate  * ndp_walk function.
34400Sstevel@tonic-gate  * Free a fraction of the NCE cache entries.
34410Sstevel@tonic-gate  * A fraction of zero means to not free any in that category.
34420Sstevel@tonic-gate  */
34430Sstevel@tonic-gate void
34440Sstevel@tonic-gate ndp_cache_reclaim(nce_t *nce, char *arg)
34450Sstevel@tonic-gate {
34460Sstevel@tonic-gate 	nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg;
34470Sstevel@tonic-gate 	uint_t	rand;
34480Sstevel@tonic-gate 
34490Sstevel@tonic-gate 	if (nce->nce_flags & NCE_F_PERMANENT)
34500Sstevel@tonic-gate 		return;
34510Sstevel@tonic-gate 
34520Sstevel@tonic-gate 	rand = (uint_t)lbolt +
34530Sstevel@tonic-gate 	    NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE);
34540Sstevel@tonic-gate 	if (ncr->ncr_host != 0 &&
34550Sstevel@tonic-gate 	    (rand/ncr->ncr_host)*ncr->ncr_host == rand) {
34560Sstevel@tonic-gate 		ndp_delete(nce);
34570Sstevel@tonic-gate 		return;
34580Sstevel@tonic-gate 	}
34590Sstevel@tonic-gate }
34600Sstevel@tonic-gate 
34610Sstevel@tonic-gate /*
34620Sstevel@tonic-gate  * ndp_walk function.
34630Sstevel@tonic-gate  * Count the number of NCEs that can be deleted.
34640Sstevel@tonic-gate  * These would be hosts but not routers.
34650Sstevel@tonic-gate  */
34660Sstevel@tonic-gate void
34670Sstevel@tonic-gate ndp_cache_count(nce_t *nce, char *arg)
34680Sstevel@tonic-gate {
34690Sstevel@tonic-gate 	ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg;
34700Sstevel@tonic-gate 
34710Sstevel@tonic-gate 	if (nce->nce_flags & NCE_F_PERMANENT)
34720Sstevel@tonic-gate 		return;
34730Sstevel@tonic-gate 
34740Sstevel@tonic-gate 	ncc->ncc_total++;
34750Sstevel@tonic-gate 	if (!(nce->nce_flags & NCE_F_ISROUTER))
34760Sstevel@tonic-gate 		ncc->ncc_host++;
34770Sstevel@tonic-gate }
34780Sstevel@tonic-gate 
34795023Scarlsonj #ifdef DEBUG
34800Sstevel@tonic-gate void
34810Sstevel@tonic-gate nce_trace_ref(nce_t *nce)
34820Sstevel@tonic-gate {
34830Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&nce->nce_lock));
34840Sstevel@tonic-gate 
34855023Scarlsonj 	if (nce->nce_trace_disable)
34860Sstevel@tonic-gate 		return;
34870Sstevel@tonic-gate 
34885023Scarlsonj 	if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) {
34895023Scarlsonj 		nce->nce_trace_disable = B_TRUE;
34905023Scarlsonj 		nce_trace_cleanup(nce);
34910Sstevel@tonic-gate 	}
34920Sstevel@tonic-gate }
34930Sstevel@tonic-gate 
34940Sstevel@tonic-gate void
34950Sstevel@tonic-gate nce_untrace_ref(nce_t *nce)
34960Sstevel@tonic-gate {
34970Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&nce->nce_lock));
34980Sstevel@tonic-gate 
34995023Scarlsonj 	if (!nce->nce_trace_disable)
35005023Scarlsonj 		th_trace_unref(nce);
35010Sstevel@tonic-gate }
35020Sstevel@tonic-gate 
35035023Scarlsonj static void
35045023Scarlsonj nce_trace_cleanup(const nce_t *nce)
35050Sstevel@tonic-gate {
35065023Scarlsonj 	th_trace_cleanup(nce, nce->nce_trace_disable);
35070Sstevel@tonic-gate }
35080Sstevel@tonic-gate #endif
35092535Ssangeeta 
35102535Ssangeeta /*
35112535Ssangeeta  * Called when address resolution fails due to a timeout.
35122535Ssangeeta  * Send an ICMP unreachable in response to all queued packets.
35132535Ssangeeta  */
35142535Ssangeeta void
35152535Ssangeeta arp_resolv_failed(nce_t *nce)
35162535Ssangeeta {
35172535Ssangeeta 	mblk_t	*mp, *nxt_mp, *first_mp;
35182535Ssangeeta 	char	buf[INET6_ADDRSTRLEN];
35192535Ssangeeta 	zoneid_t zoneid = GLOBAL_ZONEID;
35202535Ssangeeta 	struct in_addr ipv4addr;
35213448Sdh155122 	ip_stack_t *ipst = nce->nce_ill->ill_ipst;
35222535Ssangeeta 
35232535Ssangeeta 	IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr);
35242535Ssangeeta 	ip3dbg(("arp_resolv_failed: dst %s\n",
35252535Ssangeeta 	    inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf))));
35262535Ssangeeta 	mutex_enter(&nce->nce_lock);
35272535Ssangeeta 	mp = nce->nce_qd_mp;
35282535Ssangeeta 	nce->nce_qd_mp = NULL;
35292535Ssangeeta 	mutex_exit(&nce->nce_lock);
35302535Ssangeeta 
35312535Ssangeeta 	while (mp != NULL) {
35322535Ssangeeta 		nxt_mp = mp->b_next;
35332535Ssangeeta 		mp->b_next = NULL;
35342535Ssangeeta 		mp->b_prev = NULL;
35352535Ssangeeta 
35362535Ssangeeta 		first_mp = mp;
35372535Ssangeeta 		/*
35382535Ssangeeta 		 * Send icmp unreachable messages
35392535Ssangeeta 		 * to the hosts.
35402535Ssangeeta 		 */
35413448Sdh155122 		(void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst);
35422535Ssangeeta 		ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n"));
35432535Ssangeeta 		icmp_unreachable(nce->nce_ill->ill_wq, first_mp,
35443448Sdh155122 		    ICMP_HOST_UNREACHABLE, zoneid, ipst);
35452535Ssangeeta 		mp = nxt_mp;
35462535Ssangeeta 	}
35472535Ssangeeta }
35482535Ssangeeta 
35494714Ssowmini int
35504714Ssowmini ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
35514714Ssowmini     nce_t **newnce, nce_t *src_nce)
35522535Ssangeeta {
35534714Ssowmini 	int	err;
35542535Ssangeeta 	nce_t	*nce;
35552535Ssangeeta 	in6_addr_t addr6;
35563448Sdh155122 	ip_stack_t *ipst = ill->ill_ipst;
35573448Sdh155122 
35583448Sdh155122 	mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
35593448Sdh155122 	nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
35602535Ssangeeta 	IN6_IPADDR_TO_V4MAPPED(*addr, &addr6);
35618485SPeter.Memishian@Sun.COM 	/*
35628485SPeter.Memishian@Sun.COM 	 * NOTE: IPv4 never matches across the illgrp since the NCE's we're
35638485SPeter.Memishian@Sun.COM 	 * looking up have fastpath headers that are inherently per-ill.
35648485SPeter.Memishian@Sun.COM 	 */
35658485SPeter.Memishian@Sun.COM 	nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce);
35662535Ssangeeta 	if (nce == NULL) {
35674714Ssowmini 		err = ndp_add_v4(ill, addr, flags, newnce, src_nce);
35682535Ssangeeta 	} else {
35692535Ssangeeta 		*newnce = nce;
35702535Ssangeeta 		err = EEXIST;
35712535Ssangeeta 	}
35723448Sdh155122 	mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
35732535Ssangeeta 	return (err);
35742535Ssangeeta }
35752535Ssangeeta 
35762535Ssangeeta /*
35772535Ssangeeta  * NDP Cache Entry creation routine for IPv4.
35782535Ssangeeta  * Mapped entries are handled in arp.
35793448Sdh155122  * This routine must always be called with ndp4->ndp_g_lock held.
35802535Ssangeeta  * Prior to return, nce_refcnt is incremented.
35812535Ssangeeta  */
35822535Ssangeeta static int
35834714Ssowmini ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags,
35844714Ssowmini     nce_t **newnce, nce_t *src_nce)
35852535Ssangeeta {
35862535Ssangeeta 	static	nce_t		nce_nil;
35872535Ssangeeta 	nce_t		*nce;
35882535Ssangeeta 	mblk_t		*mp;
35894714Ssowmini 	mblk_t		*template = NULL;
35902535Ssangeeta 	nce_t		**ncep;
35913448Sdh155122 	ip_stack_t	*ipst = ill->ill_ipst;
35924714Ssowmini 	uint16_t	state = ND_INITIAL;
35934714Ssowmini 	int		err;
35943448Sdh155122 
35953448Sdh155122 	ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock));
35964714Ssowmini 	ASSERT(!ill->ill_isv6);
35974714Ssowmini 	ASSERT((flags & NCE_F_MAPPING) == 0);
35984714Ssowmini 
35994714Ssowmini 	if (ill->ill_resolver_mp == NULL)
36002535Ssangeeta 		return (EINVAL);
36012535Ssangeeta 	/*
36022535Ssangeeta 	 * Allocate the mblk to hold the nce.
36032535Ssangeeta 	 */
36042535Ssangeeta 	mp = allocb(sizeof (nce_t), BPRI_MED);
36052535Ssangeeta 	if (mp == NULL)
36062535Ssangeeta 		return (ENOMEM);
36072535Ssangeeta 
36082535Ssangeeta 	nce = (nce_t *)mp->b_rptr;
36092535Ssangeeta 	mp->b_wptr = (uchar_t *)&nce[1];
36102535Ssangeeta 	*nce = nce_nil;
36112535Ssangeeta 	nce->nce_ill = ill;
36122535Ssangeeta 	nce->nce_ipversion = IPV4_VERSION;
36132535Ssangeeta 	nce->nce_flags = flags;
36142535Ssangeeta 	nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT;
36152535Ssangeeta 	nce->nce_rcnt = ill->ill_xmit_count;
36162535Ssangeeta 	IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr);
36174714Ssowmini 	nce->nce_mask = ipv6_all_ones;
36182535Ssangeeta 	nce->nce_extract_mask = ipv6_all_zeros;
36194714Ssowmini 	nce->nce_ll_extract_start = 0;
36202535Ssangeeta 	nce->nce_qd_mp = NULL;
36212535Ssangeeta 	nce->nce_mp = mp;
36222535Ssangeeta 	/* This one is for nce getting created */
36232535Ssangeeta 	nce->nce_refcnt = 1;
36242535Ssangeeta 	mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL);
36253448Sdh155122 	ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr));
36262535Ssangeeta 
36275023Scarlsonj 	nce->nce_trace_disable = B_FALSE;
36285023Scarlsonj 
36294714Ssowmini 	if (src_nce != NULL) {
36304714Ssowmini 		/*
36314714Ssowmini 		 * src_nce has been provided by the caller. The only
36324714Ssowmini 		 * caller who provides a non-null, non-broadcast
36334714Ssowmini 		 * src_nce is from ip_newroute() which must pass in
36344714Ssowmini 		 * a ND_REACHABLE src_nce (this condition is verified
36354714Ssowmini 		 * via an ASSERT for the save_ire->ire_nce in ip_newroute())
36364714Ssowmini 		 */
36374714Ssowmini 		mutex_enter(&src_nce->nce_lock);
36384714Ssowmini 		state = src_nce->nce_state;
36394714Ssowmini 		if ((src_nce->nce_flags & NCE_F_CONDEMNED) ||
36404714Ssowmini 		    (ipst->ips_ndp4->ndp_g_hw_change > 0)) {
36414714Ssowmini 			/*
36424714Ssowmini 			 * src_nce has been deleted, or
36434714Ssowmini 			 * ip_arp_news is in the middle of
36444714Ssowmini 			 * flushing entries in the the nce.
36454714Ssowmini 			 * Fail the add, since we don't know
36464714Ssowmini 			 * if it is safe to copy the contents of
36474714Ssowmini 			 * src_nce
36484714Ssowmini 			 */
36494714Ssowmini 			DTRACE_PROBE2(nce__bad__src__nce,
36504714Ssowmini 			    nce_t *, src_nce, ill_t *, ill);
36514714Ssowmini 			mutex_exit(&src_nce->nce_lock);
36524714Ssowmini 			err = EINVAL;
36534714Ssowmini 			goto err_ret;
36544714Ssowmini 		}
36554714Ssowmini 		template = copyb(src_nce->nce_res_mp);
36564714Ssowmini 		mutex_exit(&src_nce->nce_lock);
36574714Ssowmini 		if (template == NULL) {
36584714Ssowmini 			err = ENOMEM;
36594714Ssowmini 			goto err_ret;
36604714Ssowmini 		}
36614714Ssowmini 	} else if (flags & NCE_F_BCAST) {
36624714Ssowmini 		/*
36634714Ssowmini 		 * broadcast nce.
36644714Ssowmini 		 */
36654714Ssowmini 		template = copyb(ill->ill_bcast_mp);
36664714Ssowmini 		if (template == NULL) {
36674714Ssowmini 			err = ENOMEM;
36684714Ssowmini 			goto err_ret;
36694714Ssowmini 		}
36704714Ssowmini 		state = ND_REACHABLE;
36714714Ssowmini 	} else if (ill->ill_net_type == IRE_IF_NORESOLVER) {
36724714Ssowmini 		/*
36734714Ssowmini 		 * NORESOLVER entries are always created in the REACHABLE
36744714Ssowmini 		 * state. We create a nce_res_mp with the IP nexthop address
36754714Ssowmini 		 * in the destination address in the DLPI hdr if the
36764714Ssowmini 		 * physical length is exactly 4 bytes.
36774714Ssowmini 		 *
36784714Ssowmini 		 * XXX not clear which drivers set ill_phys_addr_length to
36794714Ssowmini 		 * IP_ADDR_LEN.
36804714Ssowmini 		 */
36814714Ssowmini 		if (ill->ill_phys_addr_length == IP_ADDR_LEN) {
36824714Ssowmini 			template = ill_dlur_gen((uchar_t *)addr,
36834714Ssowmini 			    ill->ill_phys_addr_length,
36844714Ssowmini 			    ill->ill_sap, ill->ill_sap_length);
36854714Ssowmini 		} else {
36864714Ssowmini 			template = copyb(ill->ill_resolver_mp);
36874714Ssowmini 		}
36884714Ssowmini 		if (template == NULL) {
36894714Ssowmini 			err = ENOMEM;
36904714Ssowmini 			goto err_ret;
36914714Ssowmini 		}
36924714Ssowmini 		state = ND_REACHABLE;
36934714Ssowmini 	}
36944714Ssowmini 	nce->nce_fp_mp = NULL;
36954714Ssowmini 	nce->nce_res_mp = template;
36964714Ssowmini 	nce->nce_state = state;
36974714Ssowmini 	if (state == ND_REACHABLE) {
36984714Ssowmini 		nce->nce_last = TICK_TO_MSEC(lbolt64);
36994714Ssowmini 		nce->nce_init_time = TICK_TO_MSEC(lbolt64);
37004714Ssowmini 	} else {
37014714Ssowmini 		nce->nce_last = 0;
37024714Ssowmini 		if (state == ND_INITIAL)
37034714Ssowmini 			nce->nce_init_time = TICK_TO_MSEC(lbolt64);
37044714Ssowmini 	}
37054714Ssowmini 
37064714Ssowmini 	ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) ||
37074714Ssowmini 	    (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE));
37082535Ssangeeta 	/*
37092535Ssangeeta 	 * Atomically ensure that the ill is not CONDEMNED, before
37102535Ssangeeta 	 * adding the NCE.
37112535Ssangeeta 	 */
37122535Ssangeeta 	mutex_enter(&ill->ill_lock);
37132535Ssangeeta 	if (ill->ill_state_flags & ILL_CONDEMNED) {
37142535Ssangeeta 		mutex_exit(&ill->ill_lock);
37154714Ssowmini 		err = EINVAL;
37164714Ssowmini 		goto err_ret;
37172535Ssangeeta 	}
37182535Ssangeeta 	if ((nce->nce_next = *ncep) != NULL)
37192535Ssangeeta 		nce->nce_next->nce_ptpn = &nce->nce_next;
37202535Ssangeeta 	*ncep = nce;
37212535Ssangeeta 	nce->nce_ptpn = ncep;
37222535Ssangeeta 	*newnce = nce;
37232535Ssangeeta 	/* This one is for nce being used by an active thread */
37242535Ssangeeta 	NCE_REFHOLD(*newnce);
37252535Ssangeeta 
37262535Ssangeeta 	/* Bump up the number of nce's referencing this ill */
37276255Ssowmini 	DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
37286255Ssowmini 	    (char *), "nce", (void *), nce);
37296379Ssowmini 	ill->ill_nce_cnt++;
37302535Ssangeeta 	mutex_exit(&ill->ill_lock);
37314714Ssowmini 	DTRACE_PROBE1(ndp__add__v4, nce_t *, nce);
37322535Ssangeeta 	return (0);
37334714Ssowmini err_ret:
37344714Ssowmini 	freeb(mp);
37354714Ssowmini 	freemsg(template);
37364714Ssowmini 	return (err);
37372535Ssangeeta }
37382535Ssangeeta 
37392546Scarlsonj /*
37402546Scarlsonj  * ndp_walk routine to delete all entries that have a given destination or
37412546Scarlsonj  * gateway address and cached link layer (MAC) address.  This is used when ARP
37422546Scarlsonj  * informs us that a network-to-link-layer mapping may have changed.
37432546Scarlsonj  */
37442546Scarlsonj void
37452546Scarlsonj nce_delete_hw_changed(nce_t *nce, void *arg)
37462546Scarlsonj {
37472546Scarlsonj 	nce_hw_map_t *hwm = arg;
37482546Scarlsonj 	mblk_t *mp;
37492546Scarlsonj 	dl_unitdata_req_t *dlu;
37502546Scarlsonj 	uchar_t *macaddr;
37512546Scarlsonj 	ill_t *ill;
37522546Scarlsonj 	int saplen;
37532546Scarlsonj 	ipaddr_t nce_addr;
37542546Scarlsonj 
37552546Scarlsonj 	if (nce->nce_state != ND_REACHABLE)
37562546Scarlsonj 		return;
37572546Scarlsonj 
37582546Scarlsonj 	IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr);
37592546Scarlsonj 	if (nce_addr != hwm->hwm_addr)
37602546Scarlsonj 		return;
37612546Scarlsonj 
37622546Scarlsonj 	mutex_enter(&nce->nce_lock);
37632546Scarlsonj 	if ((mp = nce->nce_res_mp) == NULL) {
37642546Scarlsonj 		mutex_exit(&nce->nce_lock);
37652546Scarlsonj 		return;
37662546Scarlsonj 	}
37672546Scarlsonj 	dlu = (dl_unitdata_req_t *)mp->b_rptr;
37682546Scarlsonj 	macaddr = (uchar_t *)(dlu + 1);
37692546Scarlsonj 	ill = nce->nce_ill;
37702546Scarlsonj 	if ((saplen = ill->ill_sap_length) > 0)
37712546Scarlsonj 		macaddr += saplen;
37722546Scarlsonj 	else
37732546Scarlsonj 		saplen = -saplen;
37742546Scarlsonj 
37752546Scarlsonj 	/*
37762546Scarlsonj 	 * If the hardware address is unchanged, then leave this one alone.
37772546Scarlsonj 	 * Note that saplen == abs(saplen) now.
37782546Scarlsonj 	 */
37792546Scarlsonj 	if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen &&
37802546Scarlsonj 	    bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) {
37812546Scarlsonj 		mutex_exit(&nce->nce_lock);
37822546Scarlsonj 		return;
37832546Scarlsonj 	}
37842546Scarlsonj 	mutex_exit(&nce->nce_lock);
37852546Scarlsonj 
37862546Scarlsonj 	DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce);
37872546Scarlsonj 	ndp_delete(nce);
37882546Scarlsonj }
37892546Scarlsonj 
37902546Scarlsonj /*
37912546Scarlsonj  * This function verifies whether a given IPv4 address is potentially known to
37922546Scarlsonj  * the NCE subsystem.  If so, then ARP must not delete the corresponding ace_t,
37932546Scarlsonj  * so that it can continue to look for hardware changes on that address.
37942546Scarlsonj  */
37952546Scarlsonj boolean_t
37963448Sdh155122 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns)
37972546Scarlsonj {
37982546Scarlsonj 	nce_t		*nce;
37992546Scarlsonj 	struct in_addr	nceaddr;
38003448Sdh155122 	ip_stack_t	*ipst = ns->netstack_ip;
38012546Scarlsonj 
38022546Scarlsonj 	if (addr == INADDR_ANY)
38032546Scarlsonj 		return (B_FALSE);
38042546Scarlsonj 
38053448Sdh155122 	mutex_enter(&ipst->ips_ndp4->ndp_g_lock);
38063448Sdh155122 	nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr);
38072546Scarlsonj 	for (; nce != NULL; nce = nce->nce_next) {
38082546Scarlsonj 		/* Note that only v4 mapped entries are in the table. */
38092546Scarlsonj 		IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr);
38102546Scarlsonj 		if (addr == nceaddr.s_addr &&
38112546Scarlsonj 		    IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) {
38122546Scarlsonj 			/* Single flag check; no lock needed */
38132546Scarlsonj 			if (!(nce->nce_flags & NCE_F_CONDEMNED))
38142546Scarlsonj 				break;
38152546Scarlsonj 		}
38162546Scarlsonj 	}
38173448Sdh155122 	mutex_exit(&ipst->ips_ndp4->ndp_g_lock);
38182546Scarlsonj 	return (nce != NULL);
38192546Scarlsonj }
38208485SPeter.Memishian@Sun.COM 
38218485SPeter.Memishian@Sun.COM /*
38228485SPeter.Memishian@Sun.COM  * Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly
38238485SPeter.Memishian@Sun.COM  * with IPMP.  Specifically, since neighbor discovery is always done on
38248485SPeter.Memishian@Sun.COM  * underlying interfaces (even for addresses owned by an IPMP interface), we
38258485SPeter.Memishian@Sun.COM  * need to check for `v6addrp' on both `ill' and on the IPMP meta-interface
38268485SPeter.Memishian@Sun.COM  * associated with `ill' (if it exists).
38278485SPeter.Memishian@Sun.COM  */
38288485SPeter.Memishian@Sun.COM static ipif_t *
38298485SPeter.Memishian@Sun.COM ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill)
38308485SPeter.Memishian@Sun.COM {
38318485SPeter.Memishian@Sun.COM 	ipif_t *ipif;
38328485SPeter.Memishian@Sun.COM 	ip_stack_t *ipst = ill->ill_ipst;
38338485SPeter.Memishian@Sun.COM 
38348485SPeter.Memishian@Sun.COM 	ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst);
38358485SPeter.Memishian@Sun.COM 	if (ipif == NULL && IS_UNDER_IPMP(ill)) {
38368485SPeter.Memishian@Sun.COM 		if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) {
38378485SPeter.Memishian@Sun.COM 			ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst);
38388485SPeter.Memishian@Sun.COM 			ill_refrele(ill);
38398485SPeter.Memishian@Sun.COM 		}
38408485SPeter.Memishian@Sun.COM 	}
38418485SPeter.Memishian@Sun.COM 	return (ipif);
38428485SPeter.Memishian@Sun.COM }
3843