10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51676Sjpk * Common Development and Distribution License (the "License"). 61676Sjpk * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*8485SPeter.Memishian@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #include <sys/types.h> 270Sstevel@tonic-gate #include <sys/stream.h> 280Sstevel@tonic-gate #include <sys/stropts.h> 292546Scarlsonj #include <sys/strsun.h> 300Sstevel@tonic-gate #include <sys/sysmacros.h> 310Sstevel@tonic-gate #include <sys/errno.h> 320Sstevel@tonic-gate #include <sys/dlpi.h> 330Sstevel@tonic-gate #include <sys/socket.h> 340Sstevel@tonic-gate #include <sys/ddi.h> 352546Scarlsonj #include <sys/sunddi.h> 360Sstevel@tonic-gate #include <sys/cmn_err.h> 370Sstevel@tonic-gate #include <sys/debug.h> 380Sstevel@tonic-gate #include <sys/vtrace.h> 390Sstevel@tonic-gate #include <sys/kmem.h> 400Sstevel@tonic-gate #include <sys/zone.h> 412546Scarlsonj #include <sys/ethernet.h> 422546Scarlsonj #include <sys/sdt.h> 430Sstevel@tonic-gate 440Sstevel@tonic-gate #include <net/if.h> 452546Scarlsonj #include <net/if_types.h> 460Sstevel@tonic-gate #include <net/if_dl.h> 470Sstevel@tonic-gate #include <net/route.h> 480Sstevel@tonic-gate #include <netinet/in.h> 490Sstevel@tonic-gate #include <netinet/ip6.h> 500Sstevel@tonic-gate #include <netinet/icmp6.h> 510Sstevel@tonic-gate 520Sstevel@tonic-gate #include <inet/common.h> 530Sstevel@tonic-gate #include <inet/mi.h> 540Sstevel@tonic-gate #include <inet/mib2.h> 550Sstevel@tonic-gate #include <inet/nd.h> 560Sstevel@tonic-gate #include <inet/ip.h> 572733Snordmark #include <inet/ip_impl.h> 583448Sdh155122 #include <inet/ipclassifier.h> 590Sstevel@tonic-gate #include <inet/ip_if.h> 600Sstevel@tonic-gate #include <inet/ip_ire.h> 610Sstevel@tonic-gate #include <inet/ip_rts.h> 620Sstevel@tonic-gate #include <inet/ip6.h> 630Sstevel@tonic-gate #include <inet/ip_ndp.h> 640Sstevel@tonic-gate #include <inet/ipsec_impl.h> 650Sstevel@tonic-gate #include <inet/ipsec_info.h> 662546Scarlsonj #include <inet/sctp_ip.h> 670Sstevel@tonic-gate 680Sstevel@tonic-gate /* 690Sstevel@tonic-gate * Function names with nce_ prefix are static while function 700Sstevel@tonic-gate * names with ndp_ prefix are used by rest of the IP. 712546Scarlsonj * 722546Scarlsonj * Lock ordering: 732546Scarlsonj * 742546Scarlsonj * ndp_g_lock -> ill_lock -> nce_lock 752546Scarlsonj * 762546Scarlsonj * The ndp_g_lock protects the NCE hash (nce_hash_tbl, NCE_HASH_PTR) and 772546Scarlsonj * nce_next. Nce_lock protects the contents of the NCE (particularly 782546Scarlsonj * nce_refcnt). 790Sstevel@tonic-gate */ 800Sstevel@tonic-gate 812546Scarlsonj static boolean_t nce_cmp_ll_addr(const nce_t *nce, const uchar_t *new_ll_addr, 820Sstevel@tonic-gate uint32_t ll_addr_len); 830Sstevel@tonic-gate static void nce_ire_delete(nce_t *nce); 840Sstevel@tonic-gate static void nce_ire_delete1(ire_t *ire, char *nce_arg); 850Sstevel@tonic-gate static void nce_set_ll(nce_t *nce, uchar_t *ll_addr); 86*8485SPeter.Memishian@Sun.COM static nce_t *nce_lookup_addr(ill_t *, boolean_t, const in6_addr_t *, 87*8485SPeter.Memishian@Sun.COM nce_t *); 88*8485SPeter.Memishian@Sun.COM static nce_t *nce_lookup_mapping(ill_t *, const in6_addr_t *); 890Sstevel@tonic-gate static void nce_make_mapping(nce_t *nce, uchar_t *addrpos, 900Sstevel@tonic-gate uchar_t *addr); 910Sstevel@tonic-gate static int nce_set_multicast(ill_t *ill, const in6_addr_t *addr); 920Sstevel@tonic-gate static void nce_queue_mp(nce_t *nce, mblk_t *mp); 930Sstevel@tonic-gate static mblk_t *nce_udreq_alloc(ill_t *ill); 940Sstevel@tonic-gate static void nce_update(nce_t *nce, uint16_t new_state, 950Sstevel@tonic-gate uchar_t *new_ll_addr); 960Sstevel@tonic-gate static uint32_t nce_solicit(nce_t *nce, mblk_t *mp); 97*8485SPeter.Memishian@Sun.COM static boolean_t nce_xmit(ill_t *ill, uint8_t type, 98*8485SPeter.Memishian@Sun.COM boolean_t use_lla_addr, const in6_addr_t *sender, 990Sstevel@tonic-gate const in6_addr_t *target, int flag); 100*8485SPeter.Memishian@Sun.COM static boolean_t nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, 101*8485SPeter.Memishian@Sun.COM const in6_addr_t *target, uint_t flags); 102*8485SPeter.Memishian@Sun.COM static boolean_t nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, 103*8485SPeter.Memishian@Sun.COM const in6_addr_t *src, uint_t flags); 1044714Ssowmini static int ndp_add_v4(ill_t *, const in_addr_t *, uint16_t, 1054714Ssowmini nce_t **, nce_t *); 106*8485SPeter.Memishian@Sun.COM static ipif_t *ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill); 1074714Ssowmini 1085023Scarlsonj #ifdef DEBUG 1095023Scarlsonj static void nce_trace_cleanup(const nce_t *); 1100Sstevel@tonic-gate #endif 1110Sstevel@tonic-gate 1123448Sdh155122 #define NCE_HASH_PTR_V4(ipst, addr) \ 1133448Sdh155122 (&((ipst)->ips_ndp4->nce_hash_tbl[IRE_ADDR_HASH(addr, NCE_TABLE_SIZE)])) 1143448Sdh155122 1153448Sdh155122 #define NCE_HASH_PTR_V6(ipst, addr) \ 1163448Sdh155122 (&((ipst)->ips_ndp6->nce_hash_tbl[NCE_ADDR_HASH_V6(addr, \ 1173448Sdh155122 NCE_TABLE_SIZE)])) 1180Sstevel@tonic-gate 1192546Scarlsonj /* Non-tunable probe interval, based on link capabilities */ 1202546Scarlsonj #define ILL_PROBE_INTERVAL(ill) ((ill)->ill_note_link ? 150 : 1500) 1212546Scarlsonj 1220Sstevel@tonic-gate /* 1230Sstevel@tonic-gate * NDP Cache Entry creation routine. 1240Sstevel@tonic-gate * Mapped entries will never do NUD . 1253448Sdh155122 * This routine must always be called with ndp6->ndp_g_lock held. 1260Sstevel@tonic-gate * Prior to return, nce_refcnt is incremented. 1270Sstevel@tonic-gate */ 1284714Ssowmini int 1292535Ssangeeta ndp_add_v6(ill_t *ill, uchar_t *hw_addr, const in6_addr_t *addr, 1300Sstevel@tonic-gate const in6_addr_t *mask, const in6_addr_t *extract_mask, 1310Sstevel@tonic-gate uint32_t hw_extract_start, uint16_t flags, uint16_t state, 1320Sstevel@tonic-gate nce_t **newnce) 1330Sstevel@tonic-gate { 1342535Ssangeeta static nce_t nce_nil; 1350Sstevel@tonic-gate nce_t *nce; 1360Sstevel@tonic-gate mblk_t *mp; 1370Sstevel@tonic-gate mblk_t *template; 1380Sstevel@tonic-gate nce_t **ncep; 1392546Scarlsonj int err; 1400Sstevel@tonic-gate boolean_t dropped = B_FALSE; 1413448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 1423448Sdh155122 1433448Sdh155122 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 1442535Ssangeeta ASSERT(ill != NULL && ill->ill_isv6); 1450Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(addr)) { 1464714Ssowmini ip0dbg(("ndp_add_v6: no addr\n")); 1470Sstevel@tonic-gate return (EINVAL); 1480Sstevel@tonic-gate } 1490Sstevel@tonic-gate if ((flags & ~NCE_EXTERNAL_FLAGS_MASK)) { 1504714Ssowmini ip0dbg(("ndp_add_v6: flags = %x\n", (int)flags)); 1510Sstevel@tonic-gate return (EINVAL); 1520Sstevel@tonic-gate } 1530Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(extract_mask) && 1540Sstevel@tonic-gate (flags & NCE_F_MAPPING)) { 1554714Ssowmini ip0dbg(("ndp_add_v6: extract mask zero for mapping")); 1560Sstevel@tonic-gate return (EINVAL); 1570Sstevel@tonic-gate } 1580Sstevel@tonic-gate /* 1590Sstevel@tonic-gate * Allocate the mblk to hold the nce. 1600Sstevel@tonic-gate * 1610Sstevel@tonic-gate * XXX This can come out of a separate cache - nce_cache. 1620Sstevel@tonic-gate * We don't need the mp anymore as there are no more 1630Sstevel@tonic-gate * "qwriter"s 1640Sstevel@tonic-gate */ 1650Sstevel@tonic-gate mp = allocb(sizeof (nce_t), BPRI_MED); 1660Sstevel@tonic-gate if (mp == NULL) 1670Sstevel@tonic-gate return (ENOMEM); 1680Sstevel@tonic-gate 1690Sstevel@tonic-gate nce = (nce_t *)mp->b_rptr; 1700Sstevel@tonic-gate mp->b_wptr = (uchar_t *)&nce[1]; 1710Sstevel@tonic-gate *nce = nce_nil; 1720Sstevel@tonic-gate 1730Sstevel@tonic-gate /* 1740Sstevel@tonic-gate * This one holds link layer address 1750Sstevel@tonic-gate */ 1760Sstevel@tonic-gate if (ill->ill_net_type == IRE_IF_RESOLVER) { 1770Sstevel@tonic-gate template = nce_udreq_alloc(ill); 1780Sstevel@tonic-gate } else { 1793150Ssowmini if (ill->ill_resolver_mp == NULL) { 1803150Ssowmini freeb(mp); 1813150Ssowmini return (EINVAL); 1823150Ssowmini } 1830Sstevel@tonic-gate ASSERT((ill->ill_net_type == IRE_IF_NORESOLVER)); 1840Sstevel@tonic-gate template = copyb(ill->ill_resolver_mp); 1850Sstevel@tonic-gate } 1860Sstevel@tonic-gate if (template == NULL) { 1870Sstevel@tonic-gate freeb(mp); 1880Sstevel@tonic-gate return (ENOMEM); 1890Sstevel@tonic-gate } 1900Sstevel@tonic-gate nce->nce_ill = ill; 1912535Ssangeeta nce->nce_ipversion = IPV6_VERSION; 1920Sstevel@tonic-gate nce->nce_flags = flags; 1930Sstevel@tonic-gate nce->nce_state = state; 1940Sstevel@tonic-gate nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 1950Sstevel@tonic-gate nce->nce_rcnt = ill->ill_xmit_count; 1960Sstevel@tonic-gate nce->nce_addr = *addr; 1970Sstevel@tonic-gate nce->nce_mask = *mask; 1980Sstevel@tonic-gate nce->nce_extract_mask = *extract_mask; 1990Sstevel@tonic-gate nce->nce_ll_extract_start = hw_extract_start; 2000Sstevel@tonic-gate nce->nce_fp_mp = NULL; 2010Sstevel@tonic-gate nce->nce_res_mp = template; 2020Sstevel@tonic-gate if (state == ND_REACHABLE) 2030Sstevel@tonic-gate nce->nce_last = TICK_TO_MSEC(lbolt64); 2040Sstevel@tonic-gate else 2050Sstevel@tonic-gate nce->nce_last = 0; 2060Sstevel@tonic-gate nce->nce_qd_mp = NULL; 2070Sstevel@tonic-gate nce->nce_mp = mp; 2080Sstevel@tonic-gate if (hw_addr != NULL) 2090Sstevel@tonic-gate nce_set_ll(nce, hw_addr); 2100Sstevel@tonic-gate /* This one is for nce getting created */ 2110Sstevel@tonic-gate nce->nce_refcnt = 1; 2120Sstevel@tonic-gate mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 2130Sstevel@tonic-gate if (nce->nce_flags & NCE_F_MAPPING) { 2140Sstevel@tonic-gate ASSERT(IN6_IS_ADDR_MULTICAST(addr)); 2150Sstevel@tonic-gate ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_mask)); 2160Sstevel@tonic-gate ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 2173448Sdh155122 ncep = &ipst->ips_ndp6->nce_mask_entries; 2180Sstevel@tonic-gate } else { 2193448Sdh155122 ncep = ((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 2200Sstevel@tonic-gate } 2210Sstevel@tonic-gate 2225023Scarlsonj nce->nce_trace_disable = B_FALSE; 2235023Scarlsonj 2240Sstevel@tonic-gate /* 2250Sstevel@tonic-gate * Atomically ensure that the ill is not CONDEMNED, before 2260Sstevel@tonic-gate * adding the NCE. 2270Sstevel@tonic-gate */ 2280Sstevel@tonic-gate mutex_enter(&ill->ill_lock); 2290Sstevel@tonic-gate if (ill->ill_state_flags & ILL_CONDEMNED) { 2300Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 2310Sstevel@tonic-gate freeb(mp); 2322546Scarlsonj freeb(template); 2330Sstevel@tonic-gate return (EINVAL); 2340Sstevel@tonic-gate } 2350Sstevel@tonic-gate if ((nce->nce_next = *ncep) != NULL) 2360Sstevel@tonic-gate nce->nce_next->nce_ptpn = &nce->nce_next; 2370Sstevel@tonic-gate *ncep = nce; 2380Sstevel@tonic-gate nce->nce_ptpn = ncep; 2390Sstevel@tonic-gate *newnce = nce; 2400Sstevel@tonic-gate /* This one is for nce being used by an active thread */ 2410Sstevel@tonic-gate NCE_REFHOLD(*newnce); 2420Sstevel@tonic-gate 2430Sstevel@tonic-gate /* Bump up the number of nce's referencing this ill */ 2446255Ssowmini DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 2456255Ssowmini (char *), "nce", (void *), nce); 2466379Ssowmini ill->ill_nce_cnt++; 2470Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 2480Sstevel@tonic-gate 2492546Scarlsonj err = 0; 2502546Scarlsonj if ((flags & NCE_F_PERMANENT) && state == ND_PROBE) { 2512546Scarlsonj mutex_enter(&nce->nce_lock); 2523448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 2532546Scarlsonj nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 2542546Scarlsonj mutex_exit(&nce->nce_lock); 255*8485SPeter.Memishian@Sun.COM dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 2562546Scarlsonj if (dropped) { 2572546Scarlsonj mutex_enter(&nce->nce_lock); 2582546Scarlsonj nce->nce_pcnt++; 2592546Scarlsonj mutex_exit(&nce->nce_lock); 2602546Scarlsonj } 2612546Scarlsonj NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 2623448Sdh155122 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 2632546Scarlsonj err = EINPROGRESS; 2642546Scarlsonj } else if (flags & NCE_F_UNSOL_ADV) { 2650Sstevel@tonic-gate /* 2660Sstevel@tonic-gate * We account for the transmit below by assigning one 2670Sstevel@tonic-gate * less than the ndd variable. Subsequent decrements 2680Sstevel@tonic-gate * are done in ndp_timer. 2690Sstevel@tonic-gate */ 2700Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 2713448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 2723448Sdh155122 nce->nce_unsolicit_count = ipst->ips_ip_ndp_unsolicit_count - 1; 2730Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 274*8485SPeter.Memishian@Sun.COM dropped = nce_xmit_advert(nce, B_TRUE, &ipv6_all_hosts_mcast, 275*8485SPeter.Memishian@Sun.COM 0); 2760Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 2770Sstevel@tonic-gate if (dropped) 2780Sstevel@tonic-gate nce->nce_unsolicit_count++; 2790Sstevel@tonic-gate if (nce->nce_unsolicit_count != 0) { 280*8485SPeter.Memishian@Sun.COM ASSERT(nce->nce_timeout_id == 0); 2810Sstevel@tonic-gate nce->nce_timeout_id = timeout(ndp_timer, nce, 2823448Sdh155122 MSEC_TO_TICK(ipst->ips_ip_ndp_unsolicit_interval)); 2830Sstevel@tonic-gate } 2840Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 2853448Sdh155122 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 2860Sstevel@tonic-gate } 287*8485SPeter.Memishian@Sun.COM 288741Smasputra /* 289741Smasputra * If the hw_addr is NULL, typically for ND_INCOMPLETE nces, then 290741Smasputra * we call nce_fastpath as soon as the nce is resolved in ndp_process. 291741Smasputra * We call nce_fastpath from nce_update if the link layer address of 292741Smasputra * the peer changes from nce_update 293741Smasputra */ 294741Smasputra if (hw_addr != NULL || ill->ill_net_type == IRE_IF_NORESOLVER) 295741Smasputra nce_fastpath(nce); 2962546Scarlsonj return (err); 2970Sstevel@tonic-gate } 2980Sstevel@tonic-gate 2990Sstevel@tonic-gate int 300*8485SPeter.Memishian@Sun.COM ndp_lookup_then_add_v6(ill_t *ill, boolean_t match_illgrp, uchar_t *hw_addr, 301*8485SPeter.Memishian@Sun.COM const in6_addr_t *addr, const in6_addr_t *mask, 302*8485SPeter.Memishian@Sun.COM const in6_addr_t *extract_mask, uint32_t hw_extract_start, uint16_t flags, 303*8485SPeter.Memishian@Sun.COM uint16_t state, nce_t **newnce) 3040Sstevel@tonic-gate { 3050Sstevel@tonic-gate int err = 0; 3060Sstevel@tonic-gate nce_t *nce; 3073448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 3080Sstevel@tonic-gate 3094714Ssowmini ASSERT(ill->ill_isv6); 3103448Sdh155122 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 3113448Sdh155122 3123448Sdh155122 /* Get head of v6 hash table */ 3133448Sdh155122 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 314*8485SPeter.Memishian@Sun.COM nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 3150Sstevel@tonic-gate if (nce == NULL) { 3164714Ssowmini err = ndp_add_v6(ill, 3170Sstevel@tonic-gate hw_addr, 3180Sstevel@tonic-gate addr, 3190Sstevel@tonic-gate mask, 3200Sstevel@tonic-gate extract_mask, 3210Sstevel@tonic-gate hw_extract_start, 3220Sstevel@tonic-gate flags, 3230Sstevel@tonic-gate state, 3244714Ssowmini newnce); 3250Sstevel@tonic-gate } else { 3260Sstevel@tonic-gate *newnce = nce; 3270Sstevel@tonic-gate err = EEXIST; 3280Sstevel@tonic-gate } 3293448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 3300Sstevel@tonic-gate return (err); 3310Sstevel@tonic-gate } 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate /* 3340Sstevel@tonic-gate * Remove all the CONDEMNED nces from the appropriate hash table. 3350Sstevel@tonic-gate * We create a private list of NCEs, these may have ires pointing 3360Sstevel@tonic-gate * to them, so the list will be passed through to clean up dependent 3370Sstevel@tonic-gate * ires and only then we can do NCE_REFRELE which can make NCE inactive. 3380Sstevel@tonic-gate */ 3390Sstevel@tonic-gate static void 3402535Ssangeeta nce_remove(ndp_g_t *ndp, nce_t *nce, nce_t **free_nce_list) 3410Sstevel@tonic-gate { 3420Sstevel@tonic-gate nce_t *nce1; 3430Sstevel@tonic-gate nce_t **ptpn; 3440Sstevel@tonic-gate 3452535Ssangeeta ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 3462535Ssangeeta ASSERT(ndp->ndp_g_walker == 0); 3470Sstevel@tonic-gate for (; nce; nce = nce1) { 3480Sstevel@tonic-gate nce1 = nce->nce_next; 3490Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 3500Sstevel@tonic-gate if (nce->nce_flags & NCE_F_CONDEMNED) { 3510Sstevel@tonic-gate ptpn = nce->nce_ptpn; 3520Sstevel@tonic-gate nce1 = nce->nce_next; 3530Sstevel@tonic-gate if (nce1 != NULL) 3540Sstevel@tonic-gate nce1->nce_ptpn = ptpn; 3550Sstevel@tonic-gate *ptpn = nce1; 3560Sstevel@tonic-gate nce->nce_ptpn = NULL; 3570Sstevel@tonic-gate nce->nce_next = NULL; 3580Sstevel@tonic-gate nce->nce_next = *free_nce_list; 3590Sstevel@tonic-gate *free_nce_list = nce; 3600Sstevel@tonic-gate } 3610Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 3620Sstevel@tonic-gate } 3630Sstevel@tonic-gate } 3640Sstevel@tonic-gate 3650Sstevel@tonic-gate /* 3660Sstevel@tonic-gate * 1. Mark the nce CONDEMNED. This ensures that no new nce_lookup() 3670Sstevel@tonic-gate * will return this NCE. Also no new IREs will be created that 3680Sstevel@tonic-gate * point to this NCE (See ire_add_v6). Also no new timeouts will 3690Sstevel@tonic-gate * be started (See NDP_RESTART_TIMER). 3700Sstevel@tonic-gate * 2. Cancel any currently running timeouts. 3710Sstevel@tonic-gate * 3. If there is an ndp walker, return. The walker will do the cleanup. 3720Sstevel@tonic-gate * This ensures that walkers see a consistent list of NCEs while walking. 3730Sstevel@tonic-gate * 4. Otherwise remove the NCE from the list of NCEs 3740Sstevel@tonic-gate * 5. Delete all IREs pointing to this NCE. 3750Sstevel@tonic-gate */ 3760Sstevel@tonic-gate void 3770Sstevel@tonic-gate ndp_delete(nce_t *nce) 3780Sstevel@tonic-gate { 3790Sstevel@tonic-gate nce_t **ptpn; 3800Sstevel@tonic-gate nce_t *nce1; 3812535Ssangeeta int ipversion = nce->nce_ipversion; 3823448Sdh155122 ndp_g_t *ndp; 3833448Sdh155122 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 3843448Sdh155122 3853448Sdh155122 if (ipversion == IPV4_VERSION) 3863448Sdh155122 ndp = ipst->ips_ndp4; 3873448Sdh155122 else 3883448Sdh155122 ndp = ipst->ips_ndp6; 3890Sstevel@tonic-gate 3900Sstevel@tonic-gate /* Serialize deletes */ 3910Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 3920Sstevel@tonic-gate if (nce->nce_flags & NCE_F_CONDEMNED) { 3930Sstevel@tonic-gate /* Some other thread is doing the delete */ 3940Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 3950Sstevel@tonic-gate return; 3960Sstevel@tonic-gate } 3970Sstevel@tonic-gate /* 3980Sstevel@tonic-gate * Caller has a refhold. Also 1 ref for being in the list. Thus 3990Sstevel@tonic-gate * refcnt has to be >= 2 4000Sstevel@tonic-gate */ 4010Sstevel@tonic-gate ASSERT(nce->nce_refcnt >= 2); 4020Sstevel@tonic-gate nce->nce_flags |= NCE_F_CONDEMNED; 4030Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate nce_fastpath_list_delete(nce); 4060Sstevel@tonic-gate 4070Sstevel@tonic-gate /* 4080Sstevel@tonic-gate * Cancel any running timer. Timeout can't be restarted 4090Sstevel@tonic-gate * since CONDEMNED is set. Can't hold nce_lock across untimeout. 4100Sstevel@tonic-gate * Passing invalid timeout id is fine. 4110Sstevel@tonic-gate */ 4120Sstevel@tonic-gate if (nce->nce_timeout_id != 0) { 4130Sstevel@tonic-gate (void) untimeout(nce->nce_timeout_id); 4140Sstevel@tonic-gate nce->nce_timeout_id = 0; 4150Sstevel@tonic-gate } 4160Sstevel@tonic-gate 4172535Ssangeeta mutex_enter(&ndp->ndp_g_lock); 4180Sstevel@tonic-gate if (nce->nce_ptpn == NULL) { 4190Sstevel@tonic-gate /* 4200Sstevel@tonic-gate * The last ndp walker has already removed this nce from 4210Sstevel@tonic-gate * the list after we marked the nce CONDEMNED and before 4222535Ssangeeta * we grabbed the global lock. 4230Sstevel@tonic-gate */ 4242535Ssangeeta mutex_exit(&ndp->ndp_g_lock); 4250Sstevel@tonic-gate return; 4260Sstevel@tonic-gate } 4272535Ssangeeta if (ndp->ndp_g_walker > 0) { 4280Sstevel@tonic-gate /* 4290Sstevel@tonic-gate * Can't unlink. The walker will clean up 4300Sstevel@tonic-gate */ 4312535Ssangeeta ndp->ndp_g_walker_cleanup = B_TRUE; 4322535Ssangeeta mutex_exit(&ndp->ndp_g_lock); 4330Sstevel@tonic-gate return; 4340Sstevel@tonic-gate } 4350Sstevel@tonic-gate 4360Sstevel@tonic-gate /* 4370Sstevel@tonic-gate * Now remove the nce from the list. NDP_RESTART_TIMER won't restart 4380Sstevel@tonic-gate * the timer since it is marked CONDEMNED. 4390Sstevel@tonic-gate */ 4400Sstevel@tonic-gate ptpn = nce->nce_ptpn; 4410Sstevel@tonic-gate nce1 = nce->nce_next; 4420Sstevel@tonic-gate if (nce1 != NULL) 4430Sstevel@tonic-gate nce1->nce_ptpn = ptpn; 4440Sstevel@tonic-gate *ptpn = nce1; 4450Sstevel@tonic-gate nce->nce_ptpn = NULL; 4460Sstevel@tonic-gate nce->nce_next = NULL; 4472535Ssangeeta mutex_exit(&ndp->ndp_g_lock); 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate nce_ire_delete(nce); 4500Sstevel@tonic-gate } 4510Sstevel@tonic-gate 4520Sstevel@tonic-gate void 4530Sstevel@tonic-gate ndp_inactive(nce_t *nce) 4540Sstevel@tonic-gate { 4550Sstevel@tonic-gate mblk_t **mpp; 4560Sstevel@tonic-gate ill_t *ill; 4570Sstevel@tonic-gate 4580Sstevel@tonic-gate ASSERT(nce->nce_refcnt == 0); 4590Sstevel@tonic-gate ASSERT(MUTEX_HELD(&nce->nce_lock)); 4600Sstevel@tonic-gate ASSERT(nce->nce_fastpath == NULL); 4610Sstevel@tonic-gate 4620Sstevel@tonic-gate /* Free all nce allocated messages */ 4630Sstevel@tonic-gate mpp = &nce->nce_first_mp_to_free; 4640Sstevel@tonic-gate do { 4650Sstevel@tonic-gate while (*mpp != NULL) { 4660Sstevel@tonic-gate mblk_t *mp; 4670Sstevel@tonic-gate 4680Sstevel@tonic-gate mp = *mpp; 4690Sstevel@tonic-gate *mpp = mp->b_next; 4702958Sdr146992 4712958Sdr146992 inet_freemsg(mp); 4720Sstevel@tonic-gate } 4730Sstevel@tonic-gate } while (mpp++ != &nce->nce_last_mp_to_free); 4740Sstevel@tonic-gate 4755023Scarlsonj #ifdef DEBUG 4765023Scarlsonj nce_trace_cleanup(nce); 4770Sstevel@tonic-gate #endif 4780Sstevel@tonic-gate 4790Sstevel@tonic-gate ill = nce->nce_ill; 4800Sstevel@tonic-gate mutex_enter(&ill->ill_lock); 4816255Ssowmini DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 4826255Ssowmini (char *), "nce", (void *), nce); 4836379Ssowmini ill->ill_nce_cnt--; 4840Sstevel@tonic-gate /* 4850Sstevel@tonic-gate * If the number of nce's associated with this ill have dropped 4860Sstevel@tonic-gate * to zero, check whether we need to restart any operation that 4870Sstevel@tonic-gate * is waiting for this to happen. 4880Sstevel@tonic-gate */ 4896255Ssowmini if (ILL_DOWN_OK(ill)) { 4900Sstevel@tonic-gate /* ipif_ill_refrele_tail drops the ill_lock */ 4910Sstevel@tonic-gate ipif_ill_refrele_tail(ill); 4920Sstevel@tonic-gate } else { 4930Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 4940Sstevel@tonic-gate } 4950Sstevel@tonic-gate mutex_destroy(&nce->nce_lock); 4962958Sdr146992 if (nce->nce_mp != NULL) 4972958Sdr146992 inet_freemsg(nce->nce_mp); 4980Sstevel@tonic-gate } 4990Sstevel@tonic-gate 5000Sstevel@tonic-gate /* 5010Sstevel@tonic-gate * ndp_walk routine. Delete the nce if it is associated with the ill 5020Sstevel@tonic-gate * that is going away. Always called as a writer. 5030Sstevel@tonic-gate */ 5040Sstevel@tonic-gate void 5050Sstevel@tonic-gate ndp_delete_per_ill(nce_t *nce, uchar_t *arg) 5060Sstevel@tonic-gate { 5070Sstevel@tonic-gate if ((nce != NULL) && nce->nce_ill == (ill_t *)arg) { 5080Sstevel@tonic-gate ndp_delete(nce); 5090Sstevel@tonic-gate } 5100Sstevel@tonic-gate } 5110Sstevel@tonic-gate 5120Sstevel@tonic-gate /* 5130Sstevel@tonic-gate * Walk a list of to be inactive NCEs and blow away all the ires. 5140Sstevel@tonic-gate */ 5150Sstevel@tonic-gate static void 5160Sstevel@tonic-gate nce_ire_delete_list(nce_t *nce) 5170Sstevel@tonic-gate { 5180Sstevel@tonic-gate nce_t *nce_next; 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate ASSERT(nce != NULL); 5210Sstevel@tonic-gate while (nce != NULL) { 5220Sstevel@tonic-gate nce_next = nce->nce_next; 5230Sstevel@tonic-gate nce->nce_next = NULL; 5240Sstevel@tonic-gate 5250Sstevel@tonic-gate /* 5260Sstevel@tonic-gate * It is possible for the last ndp walker (this thread) 5270Sstevel@tonic-gate * to come here after ndp_delete has marked the nce CONDEMNED 5280Sstevel@tonic-gate * and before it has removed the nce from the fastpath list 5290Sstevel@tonic-gate * or called untimeout. So we need to do it here. It is safe 5300Sstevel@tonic-gate * for both ndp_delete and this thread to do it twice or 5310Sstevel@tonic-gate * even simultaneously since each of the threads has a 5320Sstevel@tonic-gate * reference on the nce. 5330Sstevel@tonic-gate */ 5340Sstevel@tonic-gate nce_fastpath_list_delete(nce); 5350Sstevel@tonic-gate /* 5360Sstevel@tonic-gate * Cancel any running timer. Timeout can't be restarted 5370Sstevel@tonic-gate * since CONDEMNED is set. Can't hold nce_lock across untimeout. 5380Sstevel@tonic-gate * Passing invalid timeout id is fine. 5390Sstevel@tonic-gate */ 5400Sstevel@tonic-gate if (nce->nce_timeout_id != 0) { 5410Sstevel@tonic-gate (void) untimeout(nce->nce_timeout_id); 5420Sstevel@tonic-gate nce->nce_timeout_id = 0; 5430Sstevel@tonic-gate } 5442535Ssangeeta /* 5452535Ssangeeta * We might hit this func thus in the v4 case: 5462535Ssangeeta * ipif_down->ipif_ndp_down->ndp_walk 5472535Ssangeeta */ 5480Sstevel@tonic-gate 5492535Ssangeeta if (nce->nce_ipversion == IPV4_VERSION) { 5502535Ssangeeta ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, 551*8485SPeter.Memishian@Sun.COM IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 5522535Ssangeeta } else { 5532535Ssangeeta ASSERT(nce->nce_ipversion == IPV6_VERSION); 5542535Ssangeeta ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, 555*8485SPeter.Memishian@Sun.COM IRE_CACHE, nce_ire_delete1, nce, nce->nce_ill); 5562535Ssangeeta } 5570Sstevel@tonic-gate NCE_REFRELE_NOTR(nce); 5580Sstevel@tonic-gate nce = nce_next; 5590Sstevel@tonic-gate } 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate /* 5630Sstevel@tonic-gate * Delete an ire when the nce goes away. 5640Sstevel@tonic-gate */ 5650Sstevel@tonic-gate /* ARGSUSED */ 5660Sstevel@tonic-gate static void 5670Sstevel@tonic-gate nce_ire_delete(nce_t *nce) 5680Sstevel@tonic-gate { 5692535Ssangeeta if (nce->nce_ipversion == IPV6_VERSION) { 5702535Ssangeeta ire_walk_ill_v6(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 5712535Ssangeeta nce_ire_delete1, (char *)nce, nce->nce_ill); 5722535Ssangeeta NCE_REFRELE_NOTR(nce); 5732535Ssangeeta } else { 5742535Ssangeeta ire_walk_ill_v4(MATCH_IRE_ILL | MATCH_IRE_TYPE, IRE_CACHE, 5752535Ssangeeta nce_ire_delete1, (char *)nce, nce->nce_ill); 5762535Ssangeeta NCE_REFRELE_NOTR(nce); 5772535Ssangeeta } 5780Sstevel@tonic-gate } 5790Sstevel@tonic-gate 5800Sstevel@tonic-gate /* 5810Sstevel@tonic-gate * ire_walk routine used to delete every IRE that shares this nce 5820Sstevel@tonic-gate */ 5830Sstevel@tonic-gate static void 5840Sstevel@tonic-gate nce_ire_delete1(ire_t *ire, char *nce_arg) 5850Sstevel@tonic-gate { 5860Sstevel@tonic-gate nce_t *nce = (nce_t *)nce_arg; 5870Sstevel@tonic-gate 5880Sstevel@tonic-gate ASSERT(ire->ire_type == IRE_CACHE); 5890Sstevel@tonic-gate 5902535Ssangeeta if (ire->ire_nce == nce) { 5912535Ssangeeta ASSERT(ire->ire_ipversion == nce->nce_ipversion); 5920Sstevel@tonic-gate ire_delete(ire); 5932535Ssangeeta } 5940Sstevel@tonic-gate } 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate /* 5972546Scarlsonj * Restart DAD on given NCE. Returns B_TRUE if DAD has been restarted. 5982546Scarlsonj */ 5992546Scarlsonj boolean_t 6002546Scarlsonj ndp_restart_dad(nce_t *nce) 6012546Scarlsonj { 6022546Scarlsonj boolean_t started; 6032546Scarlsonj boolean_t dropped; 6042546Scarlsonj 6052546Scarlsonj if (nce == NULL) 6062546Scarlsonj return (B_FALSE); 6072546Scarlsonj mutex_enter(&nce->nce_lock); 6082546Scarlsonj if (nce->nce_state == ND_PROBE) { 6092546Scarlsonj mutex_exit(&nce->nce_lock); 6102546Scarlsonj started = B_TRUE; 6112546Scarlsonj } else if (nce->nce_state == ND_REACHABLE) { 6122546Scarlsonj nce->nce_state = ND_PROBE; 6132546Scarlsonj nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT - 1; 6142546Scarlsonj mutex_exit(&nce->nce_lock); 615*8485SPeter.Memishian@Sun.COM dropped = nce_xmit_solicit(nce, B_FALSE, NULL, NDP_PROBE); 6162546Scarlsonj if (dropped) { 6172546Scarlsonj mutex_enter(&nce->nce_lock); 6182546Scarlsonj nce->nce_pcnt++; 6192546Scarlsonj mutex_exit(&nce->nce_lock); 6202546Scarlsonj } 6212546Scarlsonj NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(nce->nce_ill)); 6222546Scarlsonj started = B_TRUE; 6232546Scarlsonj } else { 6242546Scarlsonj mutex_exit(&nce->nce_lock); 6252546Scarlsonj started = B_FALSE; 6262546Scarlsonj } 6272546Scarlsonj return (started); 6282546Scarlsonj } 6292546Scarlsonj 6302546Scarlsonj /* 6312535Ssangeeta * IPv6 Cache entry lookup. Try to find an nce matching the parameters passed. 6320Sstevel@tonic-gate * If one is found, the refcnt on the nce will be incremented. 6330Sstevel@tonic-gate */ 6340Sstevel@tonic-gate nce_t * 635*8485SPeter.Memishian@Sun.COM ndp_lookup_v6(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 636*8485SPeter.Memishian@Sun.COM boolean_t caller_holds_lock) 6370Sstevel@tonic-gate { 6380Sstevel@tonic-gate nce_t *nce; 639*8485SPeter.Memishian@Sun.COM ip_stack_t *ipst = ill->ill_ipst; 640*8485SPeter.Memishian@Sun.COM 641*8485SPeter.Memishian@Sun.COM ASSERT(ill->ill_isv6); 642*8485SPeter.Memishian@Sun.COM if (!caller_holds_lock) 6433448Sdh155122 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 6443448Sdh155122 6453448Sdh155122 /* Get head of v6 hash table */ 6463448Sdh155122 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 647*8485SPeter.Memishian@Sun.COM nce = nce_lookup_addr(ill, match_illgrp, addr, nce); 6480Sstevel@tonic-gate if (nce == NULL) 6490Sstevel@tonic-gate nce = nce_lookup_mapping(ill, addr); 6500Sstevel@tonic-gate if (!caller_holds_lock) 6513448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 6522535Ssangeeta return (nce); 6532535Ssangeeta } 6542535Ssangeeta /* 6552535Ssangeeta * IPv4 Cache entry lookup. Try to find an nce matching the parameters passed. 6562535Ssangeeta * If one is found, the refcnt on the nce will be incremented. 6572535Ssangeeta * Since multicast mappings are handled in arp, there are no nce_mcast_entries 6582535Ssangeeta * so we skip the nce_lookup_mapping call. 6592535Ssangeeta * XXX TODO: if the nce is found to be ND_STALE, ndp_delete it and return NULL 6602535Ssangeeta */ 6612535Ssangeeta nce_t * 6622535Ssangeeta ndp_lookup_v4(ill_t *ill, const in_addr_t *addr, boolean_t caller_holds_lock) 6632535Ssangeeta { 6642535Ssangeeta nce_t *nce; 6652535Ssangeeta in6_addr_t addr6; 6663448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 6672535Ssangeeta 668*8485SPeter.Memishian@Sun.COM if (!caller_holds_lock) 6693448Sdh155122 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 6703448Sdh155122 6713448Sdh155122 /* Get head of v4 hash table */ 6723448Sdh155122 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 6732535Ssangeeta IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 674*8485SPeter.Memishian@Sun.COM /* 675*8485SPeter.Memishian@Sun.COM * NOTE: IPv4 never matches across the illgrp since the NCE's we're 676*8485SPeter.Memishian@Sun.COM * looking up have fastpath headers that are inherently per-ill. 677*8485SPeter.Memishian@Sun.COM */ 678*8485SPeter.Memishian@Sun.COM nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 6792535Ssangeeta if (!caller_holds_lock) 6803448Sdh155122 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 6810Sstevel@tonic-gate return (nce); 6820Sstevel@tonic-gate } 6830Sstevel@tonic-gate 6840Sstevel@tonic-gate /* 6850Sstevel@tonic-gate * Cache entry lookup. Try to find an nce matching the parameters passed. 6860Sstevel@tonic-gate * Look only for exact entries (no mappings). If an nce is found, increment 6872535Ssangeeta * the hold count on that nce. The caller passes in the start of the 6882535Ssangeeta * appropriate hash table, and must be holding the appropriate global 6892535Ssangeeta * lock (ndp_g_lock). 6900Sstevel@tonic-gate */ 6910Sstevel@tonic-gate static nce_t * 692*8485SPeter.Memishian@Sun.COM nce_lookup_addr(ill_t *ill, boolean_t match_illgrp, const in6_addr_t *addr, 693*8485SPeter.Memishian@Sun.COM nce_t *nce) 6940Sstevel@tonic-gate { 6953448Sdh155122 ndp_g_t *ndp; 6963448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 6973448Sdh155122 6983448Sdh155122 if (ill->ill_isv6) 6993448Sdh155122 ndp = ipst->ips_ndp6; 7003448Sdh155122 else 7013448Sdh155122 ndp = ipst->ips_ndp4; 7020Sstevel@tonic-gate 7032535Ssangeeta ASSERT(MUTEX_HELD(&ndp->ndp_g_lock)); 7040Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(addr)) 7050Sstevel@tonic-gate return (NULL); 7060Sstevel@tonic-gate for (; nce != NULL; nce = nce->nce_next) { 707*8485SPeter.Memishian@Sun.COM if (nce->nce_ill == ill || 708*8485SPeter.Memishian@Sun.COM match_illgrp && IS_IN_SAME_ILLGRP(ill, nce->nce_ill)) { 7090Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, addr) && 7100Sstevel@tonic-gate IN6_ARE_ADDR_EQUAL(&nce->nce_mask, 7110Sstevel@tonic-gate &ipv6_all_ones)) { 7120Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 7130Sstevel@tonic-gate if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 7140Sstevel@tonic-gate NCE_REFHOLD_LOCKED(nce); 7150Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7160Sstevel@tonic-gate break; 7170Sstevel@tonic-gate } 7180Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7190Sstevel@tonic-gate } 7200Sstevel@tonic-gate } 7210Sstevel@tonic-gate } 7220Sstevel@tonic-gate return (nce); 7230Sstevel@tonic-gate } 7240Sstevel@tonic-gate 7250Sstevel@tonic-gate /* 7260Sstevel@tonic-gate * Cache entry lookup. Try to find an nce matching the parameters passed. 7270Sstevel@tonic-gate * Look only for mappings. 7280Sstevel@tonic-gate */ 7290Sstevel@tonic-gate static nce_t * 7300Sstevel@tonic-gate nce_lookup_mapping(ill_t *ill, const in6_addr_t *addr) 7310Sstevel@tonic-gate { 7320Sstevel@tonic-gate nce_t *nce; 7333448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 7340Sstevel@tonic-gate 7352535Ssangeeta ASSERT(ill != NULL && ill->ill_isv6); 7363448Sdh155122 ASSERT(MUTEX_HELD(&ipst->ips_ndp6->ndp_g_lock)); 7370Sstevel@tonic-gate if (!IN6_IS_ADDR_MULTICAST(addr)) 7380Sstevel@tonic-gate return (NULL); 7393448Sdh155122 nce = ipst->ips_ndp6->nce_mask_entries; 7400Sstevel@tonic-gate for (; nce != NULL; nce = nce->nce_next) 7410Sstevel@tonic-gate if (nce->nce_ill == ill && 7420Sstevel@tonic-gate (V6_MASK_EQ(*addr, nce->nce_mask, nce->nce_addr))) { 7430Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 7440Sstevel@tonic-gate if (!(nce->nce_flags & NCE_F_CONDEMNED)) { 7450Sstevel@tonic-gate NCE_REFHOLD_LOCKED(nce); 7460Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7470Sstevel@tonic-gate break; 7480Sstevel@tonic-gate } 7490Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7500Sstevel@tonic-gate } 7510Sstevel@tonic-gate return (nce); 7520Sstevel@tonic-gate } 7530Sstevel@tonic-gate 7540Sstevel@tonic-gate /* 7550Sstevel@tonic-gate * Process passed in parameters either from an incoming packet or via 7560Sstevel@tonic-gate * user ioctl. 7570Sstevel@tonic-gate */ 758*8485SPeter.Memishian@Sun.COM static void 759*8485SPeter.Memishian@Sun.COM nce_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 7600Sstevel@tonic-gate { 7610Sstevel@tonic-gate ill_t *ill = nce->nce_ill; 7620Sstevel@tonic-gate uint32_t hw_addr_len = ill->ill_nd_lla_len; 7630Sstevel@tonic-gate mblk_t *mp; 7640Sstevel@tonic-gate boolean_t ll_updated = B_FALSE; 7650Sstevel@tonic-gate boolean_t ll_changed; 7663448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 7670Sstevel@tonic-gate 7682535Ssangeeta ASSERT(nce->nce_ipversion == IPV6_VERSION); 7690Sstevel@tonic-gate /* 7700Sstevel@tonic-gate * No updates of link layer address or the neighbor state is 7710Sstevel@tonic-gate * allowed, when the cache is in NONUD state. This still 7720Sstevel@tonic-gate * allows for responding to reachability solicitation. 7730Sstevel@tonic-gate */ 7740Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 7750Sstevel@tonic-gate if (nce->nce_state == ND_INCOMPLETE) { 7760Sstevel@tonic-gate if (hw_addr == NULL) { 7770Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7780Sstevel@tonic-gate return; 7790Sstevel@tonic-gate } 7800Sstevel@tonic-gate nce_set_ll(nce, hw_addr); 7810Sstevel@tonic-gate /* 7820Sstevel@tonic-gate * Update nce state and send the queued packets 7830Sstevel@tonic-gate * back to ip this time ire will be added. 7840Sstevel@tonic-gate */ 7850Sstevel@tonic-gate if (flag & ND_NA_FLAG_SOLICITED) { 7860Sstevel@tonic-gate nce_update(nce, ND_REACHABLE, NULL); 7870Sstevel@tonic-gate } else { 7880Sstevel@tonic-gate nce_update(nce, ND_STALE, NULL); 7890Sstevel@tonic-gate } 7900Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7910Sstevel@tonic-gate nce_fastpath(nce); 7920Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 7930Sstevel@tonic-gate mp = nce->nce_qd_mp; 7940Sstevel@tonic-gate nce->nce_qd_mp = NULL; 7950Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 7960Sstevel@tonic-gate while (mp != NULL) { 7972958Sdr146992 mblk_t *nxt_mp, *data_mp; 7980Sstevel@tonic-gate 7990Sstevel@tonic-gate nxt_mp = mp->b_next; 8000Sstevel@tonic-gate mp->b_next = NULL; 8012958Sdr146992 8022958Sdr146992 if (mp->b_datap->db_type == M_CTL) 8032958Sdr146992 data_mp = mp->b_cont; 8042958Sdr146992 else 8052958Sdr146992 data_mp = mp; 8062958Sdr146992 if (data_mp->b_prev != NULL) { 8070Sstevel@tonic-gate ill_t *inbound_ill; 8080Sstevel@tonic-gate queue_t *fwdq = NULL; 8090Sstevel@tonic-gate uint_t ifindex; 8100Sstevel@tonic-gate 8112958Sdr146992 ifindex = (uint_t)(uintptr_t)data_mp->b_prev; 8120Sstevel@tonic-gate inbound_ill = ill_lookup_on_ifindex(ifindex, 8133448Sdh155122 B_TRUE, NULL, NULL, NULL, NULL, ipst); 8140Sstevel@tonic-gate if (inbound_ill == NULL) { 8152958Sdr146992 data_mp->b_prev = NULL; 8160Sstevel@tonic-gate freemsg(mp); 8170Sstevel@tonic-gate return; 8180Sstevel@tonic-gate } else { 8190Sstevel@tonic-gate fwdq = inbound_ill->ill_rq; 8200Sstevel@tonic-gate } 8212958Sdr146992 data_mp->b_prev = NULL; 8220Sstevel@tonic-gate /* 8230Sstevel@tonic-gate * Send a forwarded packet back into ip_rput_v6 8240Sstevel@tonic-gate * just as in ire_send_v6(). 8250Sstevel@tonic-gate * Extract the queue from b_prev (set in 8260Sstevel@tonic-gate * ip_rput_data_v6). 8270Sstevel@tonic-gate */ 8280Sstevel@tonic-gate if (fwdq != NULL) { 8290Sstevel@tonic-gate /* 8300Sstevel@tonic-gate * Forwarded packets hop count will 8310Sstevel@tonic-gate * get decremented in ip_rput_data_v6 8320Sstevel@tonic-gate */ 8332958Sdr146992 if (data_mp != mp) 8342958Sdr146992 freeb(mp); 8352958Sdr146992 put(fwdq, data_mp); 8360Sstevel@tonic-gate } else { 8370Sstevel@tonic-gate /* 8380Sstevel@tonic-gate * Send locally originated packets back 839*8485SPeter.Memishian@Sun.COM * into ip_wput_v6. 8400Sstevel@tonic-gate */ 8410Sstevel@tonic-gate put(ill->ill_wq, mp); 8420Sstevel@tonic-gate } 8430Sstevel@tonic-gate ill_refrele(inbound_ill); 8440Sstevel@tonic-gate } else { 8450Sstevel@tonic-gate put(ill->ill_wq, mp); 8460Sstevel@tonic-gate } 8470Sstevel@tonic-gate mp = nxt_mp; 8480Sstevel@tonic-gate } 8490Sstevel@tonic-gate return; 8500Sstevel@tonic-gate } 8512546Scarlsonj ll_changed = nce_cmp_ll_addr(nce, hw_addr, hw_addr_len); 8520Sstevel@tonic-gate if (!is_adv) { 8530Sstevel@tonic-gate /* If this is a SOLICITATION request only */ 8540Sstevel@tonic-gate if (ll_changed) 8550Sstevel@tonic-gate nce_update(nce, ND_STALE, hw_addr); 8560Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 8570Sstevel@tonic-gate return; 8580Sstevel@tonic-gate } 8590Sstevel@tonic-gate if (!(flag & ND_NA_FLAG_OVERRIDE) && ll_changed) { 8600Sstevel@tonic-gate /* If in any other state than REACHABLE, ignore */ 8610Sstevel@tonic-gate if (nce->nce_state == ND_REACHABLE) { 8620Sstevel@tonic-gate nce_update(nce, ND_STALE, NULL); 8630Sstevel@tonic-gate } 8640Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 8650Sstevel@tonic-gate return; 8660Sstevel@tonic-gate } else { 8670Sstevel@tonic-gate if (ll_changed) { 8680Sstevel@tonic-gate nce_update(nce, ND_UNCHANGED, hw_addr); 8690Sstevel@tonic-gate ll_updated = B_TRUE; 8700Sstevel@tonic-gate } 8710Sstevel@tonic-gate if (flag & ND_NA_FLAG_SOLICITED) { 8720Sstevel@tonic-gate nce_update(nce, ND_REACHABLE, NULL); 8730Sstevel@tonic-gate } else { 8740Sstevel@tonic-gate if (ll_updated) { 8750Sstevel@tonic-gate nce_update(nce, ND_STALE, NULL); 8760Sstevel@tonic-gate } 8770Sstevel@tonic-gate } 8780Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 8790Sstevel@tonic-gate if (!(flag & ND_NA_FLAG_ROUTER) && (nce->nce_flags & 8800Sstevel@tonic-gate NCE_F_ISROUTER)) { 8810Sstevel@tonic-gate ire_t *ire; 8820Sstevel@tonic-gate 8830Sstevel@tonic-gate /* 8840Sstevel@tonic-gate * Router turned to host. We need to remove the 8850Sstevel@tonic-gate * entry as well as any default route that may be 8860Sstevel@tonic-gate * using this as a next hop. This is required by 8870Sstevel@tonic-gate * section 7.2.5 of RFC 2461. 8880Sstevel@tonic-gate */ 8890Sstevel@tonic-gate ire = ire_ftable_lookup_v6(&ipv6_all_zeros, 8900Sstevel@tonic-gate &ipv6_all_zeros, &nce->nce_addr, IRE_DEFAULT, 8911676Sjpk nce->nce_ill->ill_ipif, NULL, ALL_ZONES, 0, NULL, 8920Sstevel@tonic-gate MATCH_IRE_ILL | MATCH_IRE_TYPE | MATCH_IRE_GW | 8933448Sdh155122 MATCH_IRE_DEFAULT, ipst); 8940Sstevel@tonic-gate if (ire != NULL) { 8953448Sdh155122 ip_rts_rtmsg(RTM_DELETE, ire, 0, ipst); 8960Sstevel@tonic-gate ire_delete(ire); 8970Sstevel@tonic-gate ire_refrele(ire); 8980Sstevel@tonic-gate } 8990Sstevel@tonic-gate ndp_delete(nce); 9000Sstevel@tonic-gate } 9010Sstevel@tonic-gate } 9020Sstevel@tonic-gate } 9030Sstevel@tonic-gate 9040Sstevel@tonic-gate /* 905*8485SPeter.Memishian@Sun.COM * Walker state structure used by ndp_process() / ndp_process_entry(). 906*8485SPeter.Memishian@Sun.COM */ 907*8485SPeter.Memishian@Sun.COM typedef struct ndp_process_data { 908*8485SPeter.Memishian@Sun.COM ill_t *np_ill; /* ill/illgrp to match against */ 909*8485SPeter.Memishian@Sun.COM const in6_addr_t *np_addr; /* IPv6 address to match */ 910*8485SPeter.Memishian@Sun.COM uchar_t *np_hw_addr; /* passed to nce_process() */ 911*8485SPeter.Memishian@Sun.COM uint32_t np_flag; /* passed to nce_process() */ 912*8485SPeter.Memishian@Sun.COM boolean_t np_is_adv; /* passed to nce_process() */ 913*8485SPeter.Memishian@Sun.COM } ndp_process_data_t; 914*8485SPeter.Memishian@Sun.COM 915*8485SPeter.Memishian@Sun.COM /* 916*8485SPeter.Memishian@Sun.COM * Walker callback used by ndp_process() for IPMP groups: calls nce_process() 917*8485SPeter.Memishian@Sun.COM * for each NCE with a matching address that's in the same IPMP group. 918*8485SPeter.Memishian@Sun.COM */ 919*8485SPeter.Memishian@Sun.COM static void 920*8485SPeter.Memishian@Sun.COM ndp_process_entry(nce_t *nce, void *arg) 921*8485SPeter.Memishian@Sun.COM { 922*8485SPeter.Memishian@Sun.COM ndp_process_data_t *npp = arg; 923*8485SPeter.Memishian@Sun.COM 924*8485SPeter.Memishian@Sun.COM if (IS_IN_SAME_ILLGRP(nce->nce_ill, npp->np_ill) && 925*8485SPeter.Memishian@Sun.COM IN6_ARE_ADDR_EQUAL(&nce->nce_addr, npp->np_addr) && 926*8485SPeter.Memishian@Sun.COM IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 927*8485SPeter.Memishian@Sun.COM nce_process(nce, npp->np_hw_addr, npp->np_flag, npp->np_is_adv); 928*8485SPeter.Memishian@Sun.COM } 929*8485SPeter.Memishian@Sun.COM } 930*8485SPeter.Memishian@Sun.COM 931*8485SPeter.Memishian@Sun.COM /* 932*8485SPeter.Memishian@Sun.COM * Wrapper around nce_process() that handles IPMP. In particular, for IPMP, 933*8485SPeter.Memishian@Sun.COM * NCEs are per-underlying-ill (because of nce_fp_mp) and thus we may have 934*8485SPeter.Memishian@Sun.COM * more than one NCE for a given IPv6 address to tend to. In that case, we 935*8485SPeter.Memishian@Sun.COM * need to walk all NCEs and callback nce_process() for each one. Since this 936*8485SPeter.Memishian@Sun.COM * is expensive, in the non-IPMP case we just directly call nce_process(). 937*8485SPeter.Memishian@Sun.COM * Ultimately, nce_fp_mp needs to be moved out of the nce_t so that all IP 938*8485SPeter.Memishian@Sun.COM * interfaces in an IPMP group share the same NCEs -- at which point this 939*8485SPeter.Memishian@Sun.COM * function can be removed entirely. 940*8485SPeter.Memishian@Sun.COM */ 941*8485SPeter.Memishian@Sun.COM void 942*8485SPeter.Memishian@Sun.COM ndp_process(nce_t *nce, uchar_t *hw_addr, uint32_t flag, boolean_t is_adv) 943*8485SPeter.Memishian@Sun.COM { 944*8485SPeter.Memishian@Sun.COM ill_t *ill = nce->nce_ill; 945*8485SPeter.Memishian@Sun.COM struct ndp_g_s *ndp = ill->ill_ipst->ips_ndp6; 946*8485SPeter.Memishian@Sun.COM ndp_process_data_t np; 947*8485SPeter.Memishian@Sun.COM 948*8485SPeter.Memishian@Sun.COM if (ill->ill_grp == NULL) { 949*8485SPeter.Memishian@Sun.COM nce_process(nce, hw_addr, flag, is_adv); 950*8485SPeter.Memishian@Sun.COM return; 951*8485SPeter.Memishian@Sun.COM } 952*8485SPeter.Memishian@Sun.COM 953*8485SPeter.Memishian@Sun.COM /* IPMP case: walk all NCEs */ 954*8485SPeter.Memishian@Sun.COM np.np_ill = ill; 955*8485SPeter.Memishian@Sun.COM np.np_addr = &nce->nce_addr; 956*8485SPeter.Memishian@Sun.COM np.np_flag = flag; 957*8485SPeter.Memishian@Sun.COM np.np_is_adv = is_adv; 958*8485SPeter.Memishian@Sun.COM np.np_hw_addr = hw_addr; 959*8485SPeter.Memishian@Sun.COM 960*8485SPeter.Memishian@Sun.COM ndp_walk_common(ndp, NULL, (pfi_t)ndp_process_entry, &np, ALL_ZONES); 961*8485SPeter.Memishian@Sun.COM } 962*8485SPeter.Memishian@Sun.COM 963*8485SPeter.Memishian@Sun.COM /* 9640Sstevel@tonic-gate * Pass arg1 to the pfi supplied, along with each nce in existence. 9650Sstevel@tonic-gate * ndp_walk() places a REFHOLD on the nce and drops the lock when 9660Sstevel@tonic-gate * walking the hash list. 9670Sstevel@tonic-gate */ 9680Sstevel@tonic-gate void 9692535Ssangeeta ndp_walk_common(ndp_g_t *ndp, ill_t *ill, pfi_t pfi, void *arg1, 9702535Ssangeeta boolean_t trace) 9710Sstevel@tonic-gate { 9720Sstevel@tonic-gate nce_t *nce; 9730Sstevel@tonic-gate nce_t *nce1; 9740Sstevel@tonic-gate nce_t **ncep; 9750Sstevel@tonic-gate nce_t *free_nce_list = NULL; 9760Sstevel@tonic-gate 9772535Ssangeeta mutex_enter(&ndp->ndp_g_lock); 9782535Ssangeeta /* Prevent ndp_delete from unlink and free of NCE */ 9792535Ssangeeta ndp->ndp_g_walker++; 9802535Ssangeeta mutex_exit(&ndp->ndp_g_lock); 9812535Ssangeeta for (ncep = ndp->nce_hash_tbl; 9822535Ssangeeta ncep < A_END(ndp->nce_hash_tbl); ncep++) { 9832535Ssangeeta for (nce = *ncep; nce != NULL; nce = nce1) { 9840Sstevel@tonic-gate nce1 = nce->nce_next; 9850Sstevel@tonic-gate if (ill == NULL || nce->nce_ill == ill) { 9860Sstevel@tonic-gate if (trace) { 9870Sstevel@tonic-gate NCE_REFHOLD(nce); 9880Sstevel@tonic-gate (*pfi)(nce, arg1); 9890Sstevel@tonic-gate NCE_REFRELE(nce); 9900Sstevel@tonic-gate } else { 9910Sstevel@tonic-gate NCE_REFHOLD_NOTR(nce); 9920Sstevel@tonic-gate (*pfi)(nce, arg1); 9930Sstevel@tonic-gate NCE_REFRELE_NOTR(nce); 9940Sstevel@tonic-gate } 9950Sstevel@tonic-gate } 9960Sstevel@tonic-gate } 9970Sstevel@tonic-gate } 9982535Ssangeeta for (nce = ndp->nce_mask_entries; nce != NULL; nce = nce1) { 9990Sstevel@tonic-gate nce1 = nce->nce_next; 10000Sstevel@tonic-gate if (ill == NULL || nce->nce_ill == ill) { 10010Sstevel@tonic-gate if (trace) { 10020Sstevel@tonic-gate NCE_REFHOLD(nce); 10030Sstevel@tonic-gate (*pfi)(nce, arg1); 10040Sstevel@tonic-gate NCE_REFRELE(nce); 10050Sstevel@tonic-gate } else { 10060Sstevel@tonic-gate NCE_REFHOLD_NOTR(nce); 10070Sstevel@tonic-gate (*pfi)(nce, arg1); 10080Sstevel@tonic-gate NCE_REFRELE_NOTR(nce); 10090Sstevel@tonic-gate } 10100Sstevel@tonic-gate } 10110Sstevel@tonic-gate } 10122535Ssangeeta mutex_enter(&ndp->ndp_g_lock); 10132535Ssangeeta ndp->ndp_g_walker--; 10140Sstevel@tonic-gate /* 10150Sstevel@tonic-gate * While NCE's are removed from global list they are placed 10160Sstevel@tonic-gate * in a private list, to be passed to nce_ire_delete_list(). 10170Sstevel@tonic-gate * The reason is, there may be ires pointing to this nce 10180Sstevel@tonic-gate * which needs to cleaned up. 10190Sstevel@tonic-gate */ 10202535Ssangeeta if (ndp->ndp_g_walker_cleanup && ndp->ndp_g_walker == 0) { 10210Sstevel@tonic-gate /* Time to delete condemned entries */ 10222535Ssangeeta for (ncep = ndp->nce_hash_tbl; 10232535Ssangeeta ncep < A_END(ndp->nce_hash_tbl); ncep++) { 10240Sstevel@tonic-gate nce = *ncep; 10250Sstevel@tonic-gate if (nce != NULL) { 10262535Ssangeeta nce_remove(ndp, nce, &free_nce_list); 10270Sstevel@tonic-gate } 10280Sstevel@tonic-gate } 10292535Ssangeeta nce = ndp->nce_mask_entries; 10300Sstevel@tonic-gate if (nce != NULL) { 10312535Ssangeeta nce_remove(ndp, nce, &free_nce_list); 10320Sstevel@tonic-gate } 10332535Ssangeeta ndp->ndp_g_walker_cleanup = B_FALSE; 10340Sstevel@tonic-gate } 10354714Ssowmini 10362535Ssangeeta mutex_exit(&ndp->ndp_g_lock); 10370Sstevel@tonic-gate 10380Sstevel@tonic-gate if (free_nce_list != NULL) { 10390Sstevel@tonic-gate nce_ire_delete_list(free_nce_list); 10400Sstevel@tonic-gate } 10410Sstevel@tonic-gate } 10420Sstevel@tonic-gate 10433448Sdh155122 /* 10443448Sdh155122 * Walk everything. 10453448Sdh155122 * Note that ill can be NULL hence can't derive the ipst from it. 10463448Sdh155122 */ 10470Sstevel@tonic-gate void 10483448Sdh155122 ndp_walk(ill_t *ill, pfi_t pfi, void *arg1, ip_stack_t *ipst) 10490Sstevel@tonic-gate { 10503448Sdh155122 ndp_walk_common(ipst->ips_ndp4, ill, pfi, arg1, B_TRUE); 10513448Sdh155122 ndp_walk_common(ipst->ips_ndp6, ill, pfi, arg1, B_TRUE); 10520Sstevel@tonic-gate } 10530Sstevel@tonic-gate 10540Sstevel@tonic-gate /* 10550Sstevel@tonic-gate * Process resolve requests. Handles both mapped entries 10560Sstevel@tonic-gate * as well as cases that needs to be send out on the wire. 10570Sstevel@tonic-gate * Lookup a NCE for a given IRE. Regardless of whether one exists 10580Sstevel@tonic-gate * or one is created, we defer making ire point to nce until the 10590Sstevel@tonic-gate * ire is actually added at which point the nce_refcnt on the nce is 10600Sstevel@tonic-gate * incremented. This is done primarily to have symmetry between ire_add() 10610Sstevel@tonic-gate * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 10620Sstevel@tonic-gate */ 10630Sstevel@tonic-gate int 10640Sstevel@tonic-gate ndp_resolver(ill_t *ill, const in6_addr_t *dst, mblk_t *mp, zoneid_t zoneid) 10650Sstevel@tonic-gate { 1066*8485SPeter.Memishian@Sun.COM nce_t *nce, *hw_nce = NULL; 1067*8485SPeter.Memishian@Sun.COM int err; 1068*8485SPeter.Memishian@Sun.COM ill_t *ipmp_ill; 1069*8485SPeter.Memishian@Sun.COM uint16_t nce_flags; 10700Sstevel@tonic-gate uint32_t ms; 10710Sstevel@tonic-gate mblk_t *mp_nce = NULL; 10723448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 1073*8485SPeter.Memishian@Sun.COM uchar_t *hwaddr = NULL; 10740Sstevel@tonic-gate 10752535Ssangeeta ASSERT(ill->ill_isv6); 1076*8485SPeter.Memishian@Sun.COM 1077*8485SPeter.Memishian@Sun.COM if (IN6_IS_ADDR_MULTICAST(dst)) 1078*8485SPeter.Memishian@Sun.COM return (nce_set_multicast(ill, dst)); 1079*8485SPeter.Memishian@Sun.COM 1080*8485SPeter.Memishian@Sun.COM nce_flags = (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0; 1081*8485SPeter.Memishian@Sun.COM 1082*8485SPeter.Memishian@Sun.COM /* 1083*8485SPeter.Memishian@Sun.COM * If `ill' is under IPMP, then first check to see if there's an NCE 1084*8485SPeter.Memishian@Sun.COM * for `dst' on the IPMP meta-interface (e.g., because an application 1085*8485SPeter.Memishian@Sun.COM * explicitly did an SIOCLIFSETND to tie a hardware address to `dst'). 1086*8485SPeter.Memishian@Sun.COM * If so, we use that hardware address when creating the NCE below. 1087*8485SPeter.Memishian@Sun.COM * Note that we don't yet have a mechanism to remove these NCEs if the 1088*8485SPeter.Memishian@Sun.COM * NCE for `dst' on the IPMP meta-interface is subsequently removed -- 1089*8485SPeter.Memishian@Sun.COM * but rather than build such a beast, we should fix NCEs so that they 1090*8485SPeter.Memishian@Sun.COM * can be properly shared across an IPMP group. 1091*8485SPeter.Memishian@Sun.COM */ 1092*8485SPeter.Memishian@Sun.COM if (IS_UNDER_IPMP(ill)) { 1093*8485SPeter.Memishian@Sun.COM if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 1094*8485SPeter.Memishian@Sun.COM hw_nce = ndp_lookup_v6(ipmp_ill, B_FALSE, dst, B_FALSE); 1095*8485SPeter.Memishian@Sun.COM if (hw_nce != NULL && hw_nce->nce_res_mp != NULL) { 1096*8485SPeter.Memishian@Sun.COM hwaddr = hw_nce->nce_res_mp->b_rptr + 1097*8485SPeter.Memishian@Sun.COM NCE_LL_ADDR_OFFSET(ipmp_ill); 1098*8485SPeter.Memishian@Sun.COM nce_flags |= hw_nce->nce_flags; 1099*8485SPeter.Memishian@Sun.COM } 1100*8485SPeter.Memishian@Sun.COM ill_refrele(ipmp_ill); 1101*8485SPeter.Memishian@Sun.COM } 11020Sstevel@tonic-gate } 1103*8485SPeter.Memishian@Sun.COM 11044714Ssowmini err = ndp_lookup_then_add_v6(ill, 1105*8485SPeter.Memishian@Sun.COM B_FALSE, /* NCE fastpath is per ill; don't match across group */ 1106*8485SPeter.Memishian@Sun.COM hwaddr, 11070Sstevel@tonic-gate dst, 11080Sstevel@tonic-gate &ipv6_all_ones, 11090Sstevel@tonic-gate &ipv6_all_zeros, 11100Sstevel@tonic-gate 0, 1111*8485SPeter.Memishian@Sun.COM nce_flags, 1112*8485SPeter.Memishian@Sun.COM hwaddr != NULL ? ND_REACHABLE : ND_INCOMPLETE, 11134714Ssowmini &nce); 11140Sstevel@tonic-gate 1115*8485SPeter.Memishian@Sun.COM if (hw_nce != NULL) 1116*8485SPeter.Memishian@Sun.COM NCE_REFRELE(hw_nce); 1117*8485SPeter.Memishian@Sun.COM 11180Sstevel@tonic-gate switch (err) { 11190Sstevel@tonic-gate case 0: 11200Sstevel@tonic-gate /* 11210Sstevel@tonic-gate * New cache entry was created. Make sure that the state 11220Sstevel@tonic-gate * is not ND_INCOMPLETE. It can be in some other state 11230Sstevel@tonic-gate * even before we send out the solicitation as we could 11240Sstevel@tonic-gate * get un-solicited advertisements. 11250Sstevel@tonic-gate * 11260Sstevel@tonic-gate * If this is an XRESOLV interface, simply return 0, 11270Sstevel@tonic-gate * since we don't want to solicit just yet. 11280Sstevel@tonic-gate */ 11290Sstevel@tonic-gate if (ill->ill_flags & ILLF_XRESOLV) { 11300Sstevel@tonic-gate NCE_REFRELE(nce); 11310Sstevel@tonic-gate return (0); 11320Sstevel@tonic-gate } 1133*8485SPeter.Memishian@Sun.COM 11340Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 11350Sstevel@tonic-gate if (nce->nce_state != ND_INCOMPLETE) { 11360Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 11370Sstevel@tonic-gate NCE_REFRELE(nce); 11380Sstevel@tonic-gate return (0); 11390Sstevel@tonic-gate } 11403448Sdh155122 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 11410Sstevel@tonic-gate if (mp_nce == NULL) { 11420Sstevel@tonic-gate /* The caller will free mp */ 11430Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 11440Sstevel@tonic-gate ndp_delete(nce); 11450Sstevel@tonic-gate NCE_REFRELE(nce); 11460Sstevel@tonic-gate return (ENOMEM); 11470Sstevel@tonic-gate } 1148*8485SPeter.Memishian@Sun.COM if ((ms = nce_solicit(nce, mp_nce)) == 0) { 11490Sstevel@tonic-gate /* The caller will free mp */ 11500Sstevel@tonic-gate if (mp_nce != mp) 11510Sstevel@tonic-gate freeb(mp_nce); 11520Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 11530Sstevel@tonic-gate ndp_delete(nce); 11540Sstevel@tonic-gate NCE_REFRELE(nce); 11550Sstevel@tonic-gate return (EBUSY); 11560Sstevel@tonic-gate } 11570Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 11580Sstevel@tonic-gate NDP_RESTART_TIMER(nce, (clock_t)ms); 11590Sstevel@tonic-gate NCE_REFRELE(nce); 11600Sstevel@tonic-gate return (EINPROGRESS); 11610Sstevel@tonic-gate case EEXIST: 11620Sstevel@tonic-gate /* Resolution in progress just queue the packet */ 11630Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 11640Sstevel@tonic-gate if (nce->nce_state == ND_INCOMPLETE) { 11653448Sdh155122 mp_nce = ip_prepend_zoneid(mp, zoneid, ipst); 11660Sstevel@tonic-gate if (mp_nce == NULL) { 11670Sstevel@tonic-gate err = ENOMEM; 11680Sstevel@tonic-gate } else { 11690Sstevel@tonic-gate nce_queue_mp(nce, mp_nce); 11700Sstevel@tonic-gate err = EINPROGRESS; 11710Sstevel@tonic-gate } 11720Sstevel@tonic-gate } else { 11730Sstevel@tonic-gate /* 11740Sstevel@tonic-gate * Any other state implies we have 11750Sstevel@tonic-gate * a nce but IRE needs to be added ... 11760Sstevel@tonic-gate * ire_add_v6() will take care of the 11770Sstevel@tonic-gate * the case when the nce becomes CONDEMNED 11780Sstevel@tonic-gate * before the ire is added to the table. 11790Sstevel@tonic-gate */ 11800Sstevel@tonic-gate err = 0; 11810Sstevel@tonic-gate } 11820Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 11830Sstevel@tonic-gate NCE_REFRELE(nce); 11840Sstevel@tonic-gate break; 11850Sstevel@tonic-gate default: 11860Sstevel@tonic-gate ip1dbg(("ndp_resolver: Can't create NCE %d\n", err)); 11870Sstevel@tonic-gate break; 11880Sstevel@tonic-gate } 11890Sstevel@tonic-gate return (err); 11900Sstevel@tonic-gate } 11910Sstevel@tonic-gate 11920Sstevel@tonic-gate /* 11930Sstevel@tonic-gate * When there is no resolver, the link layer template is passed in 11940Sstevel@tonic-gate * the IRE. 11950Sstevel@tonic-gate * Lookup a NCE for a given IRE. Regardless of whether one exists 11960Sstevel@tonic-gate * or one is created, we defer making ire point to nce until the 11970Sstevel@tonic-gate * ire is actually added at which point the nce_refcnt on the nce is 11980Sstevel@tonic-gate * incremented. This is done primarily to have symmetry between ire_add() 11990Sstevel@tonic-gate * and ire_delete() which decrements the nce_refcnt, when an ire is deleted. 12000Sstevel@tonic-gate */ 12010Sstevel@tonic-gate int 12020Sstevel@tonic-gate ndp_noresolver(ill_t *ill, const in6_addr_t *dst) 12030Sstevel@tonic-gate { 12040Sstevel@tonic-gate nce_t *nce; 12050Sstevel@tonic-gate int err = 0; 12060Sstevel@tonic-gate 12070Sstevel@tonic-gate ASSERT(ill != NULL); 12082535Ssangeeta ASSERT(ill->ill_isv6); 12090Sstevel@tonic-gate if (IN6_IS_ADDR_MULTICAST(dst)) { 12100Sstevel@tonic-gate err = nce_set_multicast(ill, dst); 12110Sstevel@tonic-gate return (err); 12120Sstevel@tonic-gate } 12130Sstevel@tonic-gate 12144714Ssowmini err = ndp_lookup_then_add_v6(ill, 1215*8485SPeter.Memishian@Sun.COM B_FALSE, /* NCE fastpath is per ill; don't match across group */ 12160Sstevel@tonic-gate NULL, /* hardware address */ 12170Sstevel@tonic-gate dst, 12180Sstevel@tonic-gate &ipv6_all_ones, 12190Sstevel@tonic-gate &ipv6_all_zeros, 12200Sstevel@tonic-gate 0, 12210Sstevel@tonic-gate (ill->ill_flags & ILLF_NONUD) ? NCE_F_NONUD : 0, 12220Sstevel@tonic-gate ND_REACHABLE, 12234714Ssowmini &nce); 12240Sstevel@tonic-gate 12250Sstevel@tonic-gate switch (err) { 12260Sstevel@tonic-gate case 0: 12270Sstevel@tonic-gate /* 12280Sstevel@tonic-gate * Cache entry with a proper resolver cookie was 12290Sstevel@tonic-gate * created. 12300Sstevel@tonic-gate */ 12310Sstevel@tonic-gate NCE_REFRELE(nce); 12320Sstevel@tonic-gate break; 12330Sstevel@tonic-gate case EEXIST: 12340Sstevel@tonic-gate err = 0; 12350Sstevel@tonic-gate NCE_REFRELE(nce); 12360Sstevel@tonic-gate break; 12370Sstevel@tonic-gate default: 12380Sstevel@tonic-gate ip1dbg(("ndp_noresolver: Can't create NCE %d\n", err)); 12390Sstevel@tonic-gate break; 12400Sstevel@tonic-gate } 12410Sstevel@tonic-gate return (err); 12420Sstevel@tonic-gate } 12430Sstevel@tonic-gate 12440Sstevel@tonic-gate /* 12450Sstevel@tonic-gate * For each interface an entry is added for the unspecified multicast group. 12460Sstevel@tonic-gate * Here that mapping is used to form the multicast cache entry for a particular 12470Sstevel@tonic-gate * multicast destination. 12480Sstevel@tonic-gate */ 12490Sstevel@tonic-gate static int 12500Sstevel@tonic-gate nce_set_multicast(ill_t *ill, const in6_addr_t *dst) 12510Sstevel@tonic-gate { 12520Sstevel@tonic-gate nce_t *mnce; /* Multicast mapping entry */ 12530Sstevel@tonic-gate nce_t *nce; 12540Sstevel@tonic-gate uchar_t *hw_addr = NULL; 12550Sstevel@tonic-gate int err = 0; 12563448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 12570Sstevel@tonic-gate 12580Sstevel@tonic-gate ASSERT(ill != NULL); 12592535Ssangeeta ASSERT(ill->ill_isv6); 12600Sstevel@tonic-gate ASSERT(!(IN6_IS_ADDR_UNSPECIFIED(dst))); 12610Sstevel@tonic-gate 12623448Sdh155122 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 12633448Sdh155122 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *dst)); 1264*8485SPeter.Memishian@Sun.COM nce = nce_lookup_addr(ill, B_FALSE, dst, nce); 12650Sstevel@tonic-gate if (nce != NULL) { 12663448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 12670Sstevel@tonic-gate NCE_REFRELE(nce); 12680Sstevel@tonic-gate return (0); 12690Sstevel@tonic-gate } 12700Sstevel@tonic-gate /* No entry, now lookup for a mapping this should never fail */ 12710Sstevel@tonic-gate mnce = nce_lookup_mapping(ill, dst); 12720Sstevel@tonic-gate if (mnce == NULL) { 12730Sstevel@tonic-gate /* Something broken for the interface. */ 12743448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 12750Sstevel@tonic-gate return (ESRCH); 12760Sstevel@tonic-gate } 12770Sstevel@tonic-gate ASSERT(mnce->nce_flags & NCE_F_MAPPING); 12780Sstevel@tonic-gate if (ill->ill_net_type == IRE_IF_RESOLVER) { 12790Sstevel@tonic-gate /* 12800Sstevel@tonic-gate * For IRE_IF_RESOLVER a hardware mapping can be 12810Sstevel@tonic-gate * generated, for IRE_IF_NORESOLVER, resolution cookie 12824714Ssowmini * in the ill is copied in ndp_add_v6(). 12830Sstevel@tonic-gate */ 12840Sstevel@tonic-gate hw_addr = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP); 12850Sstevel@tonic-gate if (hw_addr == NULL) { 12863448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 12870Sstevel@tonic-gate NCE_REFRELE(mnce); 12880Sstevel@tonic-gate return (ENOMEM); 12890Sstevel@tonic-gate } 12900Sstevel@tonic-gate nce_make_mapping(mnce, hw_addr, (uchar_t *)dst); 12910Sstevel@tonic-gate } 12920Sstevel@tonic-gate NCE_REFRELE(mnce); 12930Sstevel@tonic-gate /* 12940Sstevel@tonic-gate * IRE_IF_NORESOLVER type simply copies the resolution 12950Sstevel@tonic-gate * cookie passed in. So no hw_addr is needed. 12960Sstevel@tonic-gate */ 12974714Ssowmini err = ndp_add_v6(ill, 12980Sstevel@tonic-gate hw_addr, 12990Sstevel@tonic-gate dst, 13000Sstevel@tonic-gate &ipv6_all_ones, 13010Sstevel@tonic-gate &ipv6_all_zeros, 13020Sstevel@tonic-gate 0, 13030Sstevel@tonic-gate NCE_F_NONUD, 13040Sstevel@tonic-gate ND_REACHABLE, 13054714Ssowmini &nce); 13063448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 13070Sstevel@tonic-gate if (hw_addr != NULL) 13080Sstevel@tonic-gate kmem_free(hw_addr, ill->ill_nd_lla_len); 13090Sstevel@tonic-gate if (err != 0) { 13100Sstevel@tonic-gate ip1dbg(("nce_set_multicast: create failed" "%d\n", err)); 13110Sstevel@tonic-gate return (err); 13120Sstevel@tonic-gate } 13130Sstevel@tonic-gate NCE_REFRELE(nce); 13140Sstevel@tonic-gate return (0); 13150Sstevel@tonic-gate } 13160Sstevel@tonic-gate 13170Sstevel@tonic-gate /* 13180Sstevel@tonic-gate * Return the link layer address, and any flags of a nce. 13190Sstevel@tonic-gate */ 13200Sstevel@tonic-gate int 13210Sstevel@tonic-gate ndp_query(ill_t *ill, struct lif_nd_req *lnr) 13220Sstevel@tonic-gate { 13230Sstevel@tonic-gate nce_t *nce; 13240Sstevel@tonic-gate in6_addr_t *addr; 13250Sstevel@tonic-gate sin6_t *sin6; 13260Sstevel@tonic-gate dl_unitdata_req_t *dl; 13270Sstevel@tonic-gate 13282535Ssangeeta ASSERT(ill != NULL && ill->ill_isv6); 13290Sstevel@tonic-gate sin6 = (sin6_t *)&lnr->lnr_addr; 13300Sstevel@tonic-gate addr = &sin6->sin6_addr; 13310Sstevel@tonic-gate 1332*8485SPeter.Memishian@Sun.COM /* 1333*8485SPeter.Memishian@Sun.COM * NOTE: if the ill is an IPMP interface, then match against the whole 1334*8485SPeter.Memishian@Sun.COM * illgrp. This e.g. allows in.ndpd to retrieve the link layer 1335*8485SPeter.Memishian@Sun.COM * addresses for the data addresses on an IPMP interface even though 1336*8485SPeter.Memishian@Sun.COM * ipif_ndp_up() created them with an nce_ill of ipif_bound_ill. 1337*8485SPeter.Memishian@Sun.COM */ 1338*8485SPeter.Memishian@Sun.COM nce = ndp_lookup_v6(ill, IS_IPMP(ill), addr, B_FALSE); 13390Sstevel@tonic-gate if (nce == NULL) 13400Sstevel@tonic-gate return (ESRCH); 13410Sstevel@tonic-gate /* If in INCOMPLETE state, no link layer address is available yet */ 13420Sstevel@tonic-gate if (nce->nce_state == ND_INCOMPLETE) 13430Sstevel@tonic-gate goto done; 13440Sstevel@tonic-gate dl = (dl_unitdata_req_t *)nce->nce_res_mp->b_rptr; 13450Sstevel@tonic-gate if (ill->ill_flags & ILLF_XRESOLV) 13460Sstevel@tonic-gate lnr->lnr_hdw_len = dl->dl_dest_addr_length; 13470Sstevel@tonic-gate else 13480Sstevel@tonic-gate lnr->lnr_hdw_len = ill->ill_nd_lla_len; 13490Sstevel@tonic-gate ASSERT(NCE_LL_ADDR_OFFSET(ill) + lnr->lnr_hdw_len <= 13500Sstevel@tonic-gate sizeof (lnr->lnr_hdw_addr)); 13510Sstevel@tonic-gate bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 13520Sstevel@tonic-gate (uchar_t *)&lnr->lnr_hdw_addr, lnr->lnr_hdw_len); 13530Sstevel@tonic-gate if (nce->nce_flags & NCE_F_ISROUTER) 13540Sstevel@tonic-gate lnr->lnr_flags = NDF_ISROUTER_ON; 13550Sstevel@tonic-gate if (nce->nce_flags & NCE_F_ANYCAST) 13560Sstevel@tonic-gate lnr->lnr_flags |= NDF_ANYCAST_ON; 13570Sstevel@tonic-gate done: 13580Sstevel@tonic-gate NCE_REFRELE(nce); 13590Sstevel@tonic-gate return (0); 13600Sstevel@tonic-gate } 13610Sstevel@tonic-gate 13620Sstevel@tonic-gate /* 13630Sstevel@tonic-gate * Send Enable/Disable multicast reqs to driver. 13640Sstevel@tonic-gate */ 13650Sstevel@tonic-gate int 13660Sstevel@tonic-gate ndp_mcastreq(ill_t *ill, const in6_addr_t *addr, uint32_t hw_addr_len, 13670Sstevel@tonic-gate uint32_t hw_addr_offset, mblk_t *mp) 13680Sstevel@tonic-gate { 13690Sstevel@tonic-gate nce_t *nce; 13700Sstevel@tonic-gate uchar_t *hw_addr; 13713448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 13720Sstevel@tonic-gate 13732535Ssangeeta ASSERT(ill != NULL && ill->ill_isv6); 13740Sstevel@tonic-gate ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 13750Sstevel@tonic-gate hw_addr = mi_offset_paramc(mp, hw_addr_offset, hw_addr_len); 13760Sstevel@tonic-gate if (hw_addr == NULL || !IN6_IS_ADDR_MULTICAST(addr)) { 13770Sstevel@tonic-gate freemsg(mp); 13780Sstevel@tonic-gate return (EINVAL); 13790Sstevel@tonic-gate } 13803448Sdh155122 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 13810Sstevel@tonic-gate nce = nce_lookup_mapping(ill, addr); 13820Sstevel@tonic-gate if (nce == NULL) { 13833448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 13840Sstevel@tonic-gate freemsg(mp); 13850Sstevel@tonic-gate return (ESRCH); 13860Sstevel@tonic-gate } 13873448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 13880Sstevel@tonic-gate /* 13890Sstevel@tonic-gate * Update dl_addr_length and dl_addr_offset for primitives that 13900Sstevel@tonic-gate * have physical addresses as opposed to full saps 13910Sstevel@tonic-gate */ 13920Sstevel@tonic-gate switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 13930Sstevel@tonic-gate case DL_ENABMULTI_REQ: 13940Sstevel@tonic-gate /* Track the state if this is the first enabmulti */ 13952893Sja97890 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 13962893Sja97890 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 13970Sstevel@tonic-gate ip1dbg(("ndp_mcastreq: ENABMULTI\n")); 13980Sstevel@tonic-gate break; 13990Sstevel@tonic-gate case DL_DISABMULTI_REQ: 14000Sstevel@tonic-gate ip1dbg(("ndp_mcastreq: DISABMULTI\n")); 14010Sstevel@tonic-gate break; 14020Sstevel@tonic-gate default: 14030Sstevel@tonic-gate NCE_REFRELE(nce); 14040Sstevel@tonic-gate ip1dbg(("ndp_mcastreq: default\n")); 14050Sstevel@tonic-gate return (EINVAL); 14060Sstevel@tonic-gate } 14070Sstevel@tonic-gate nce_make_mapping(nce, hw_addr, (uchar_t *)addr); 14080Sstevel@tonic-gate NCE_REFRELE(nce); 14094360Smeem ill_dlpi_send(ill, mp); 14100Sstevel@tonic-gate return (0); 14110Sstevel@tonic-gate } 14120Sstevel@tonic-gate 14130Sstevel@tonic-gate /* 14140Sstevel@tonic-gate * Send a neighbor solicitation. 14150Sstevel@tonic-gate * Returns number of milliseconds after which we should either rexmit or abort. 14160Sstevel@tonic-gate * Return of zero means we should abort. 14170Sstevel@tonic-gate * The caller holds the nce_lock to protect nce_qd_mp and nce_rcnt. 14180Sstevel@tonic-gate * 14190Sstevel@tonic-gate * NOTE: This routine drops nce_lock (and later reacquires it) when sending 14200Sstevel@tonic-gate * the packet. 14210Sstevel@tonic-gate * NOTE: This routine does not consume mp. 14220Sstevel@tonic-gate */ 14230Sstevel@tonic-gate uint32_t 14240Sstevel@tonic-gate nce_solicit(nce_t *nce, mblk_t *mp) 14250Sstevel@tonic-gate { 14260Sstevel@tonic-gate ip6_t *ip6h; 1427*8485SPeter.Memishian@Sun.COM in6_addr_t sender; 1428*8485SPeter.Memishian@Sun.COM boolean_t dropped; 1429*8485SPeter.Memishian@Sun.COM 14300Sstevel@tonic-gate ASSERT(MUTEX_HELD(&nce->nce_lock)); 1431*8485SPeter.Memishian@Sun.COM 1432*8485SPeter.Memishian@Sun.COM if (nce->nce_rcnt == 0) 14330Sstevel@tonic-gate return (0); 14340Sstevel@tonic-gate 14350Sstevel@tonic-gate if (mp == NULL) { 14360Sstevel@tonic-gate ASSERT(nce->nce_qd_mp != NULL); 14370Sstevel@tonic-gate mp = nce->nce_qd_mp; 14380Sstevel@tonic-gate } else { 14390Sstevel@tonic-gate nce_queue_mp(nce, mp); 14400Sstevel@tonic-gate } 14410Sstevel@tonic-gate 14420Sstevel@tonic-gate /* Handle ip_newroute_v6 giving us IPSEC packets */ 14430Sstevel@tonic-gate if (mp->b_datap->db_type == M_CTL) 14440Sstevel@tonic-gate mp = mp->b_cont; 14450Sstevel@tonic-gate 14460Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 14470Sstevel@tonic-gate if (ip6h->ip6_nxt == IPPROTO_RAW) { 14480Sstevel@tonic-gate /* 14490Sstevel@tonic-gate * This message should have been pulled up already in 14500Sstevel@tonic-gate * ip_wput_v6. We can't do pullups here because the message 14510Sstevel@tonic-gate * could be from the nce_qd_mp which could have b_next/b_prev 14520Sstevel@tonic-gate * non-NULL. 14530Sstevel@tonic-gate */ 1454*8485SPeter.Memishian@Sun.COM ASSERT(MBLKL(mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 14550Sstevel@tonic-gate ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 14560Sstevel@tonic-gate } 1457*8485SPeter.Memishian@Sun.COM 14580Sstevel@tonic-gate /* 1459*8485SPeter.Memishian@Sun.COM * Need to copy the sender address into a local since `mp' can 1460*8485SPeter.Memishian@Sun.COM * go away once we drop nce_lock. 14610Sstevel@tonic-gate */ 1462*8485SPeter.Memishian@Sun.COM sender = ip6h->ip6_src; 14630Sstevel@tonic-gate nce->nce_rcnt--; 14640Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 1465*8485SPeter.Memishian@Sun.COM dropped = nce_xmit_solicit(nce, B_TRUE, &sender, 0); 14660Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 14670Sstevel@tonic-gate if (dropped) 14680Sstevel@tonic-gate nce->nce_rcnt++; 1469*8485SPeter.Memishian@Sun.COM return (nce->nce_ill->ill_reachable_retrans_time); 14700Sstevel@tonic-gate } 14710Sstevel@tonic-gate 14722546Scarlsonj /* 14732546Scarlsonj * Attempt to recover an address on an interface that's been marked as a 14742546Scarlsonj * duplicate. Because NCEs are destroyed when the interface goes down, there's 14752546Scarlsonj * no easy way to just probe the address and have the right thing happen if 14762546Scarlsonj * it's no longer in use. Instead, we just bring it up normally and allow the 14772546Scarlsonj * regular interface start-up logic to probe for a remaining duplicate and take 14782546Scarlsonj * us back down if necessary. 14792546Scarlsonj * Neither DHCP nor temporary addresses arrive here; they're excluded by 14802546Scarlsonj * ip_ndp_excl. 14812546Scarlsonj */ 14822546Scarlsonj /* ARGSUSED */ 14832546Scarlsonj static void 14842546Scarlsonj ip_ndp_recover(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 14852546Scarlsonj { 14862546Scarlsonj ill_t *ill = rq->q_ptr; 14872546Scarlsonj ipif_t *ipif; 14882546Scarlsonj in6_addr_t *addr = (in6_addr_t *)mp->b_rptr; 14892546Scarlsonj 14902546Scarlsonj for (ipif = ill->ill_ipif; ipif != NULL; ipif = ipif->ipif_next) { 14912546Scarlsonj /* 14922546Scarlsonj * We do not support recovery of proxy ARP'd interfaces, 14932546Scarlsonj * because the system lacks a complete proxy ARP mechanism. 14942546Scarlsonj */ 14952546Scarlsonj if ((ipif->ipif_flags & IPIF_POINTOPOINT) || 14962546Scarlsonj !IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, addr)) { 14972546Scarlsonj continue; 14982546Scarlsonj } 14992546Scarlsonj 15002546Scarlsonj /* 15013322Scarlsonj * If we have already recovered or if the interface is going 15023322Scarlsonj * away, then ignore. 15032546Scarlsonj */ 15042546Scarlsonj mutex_enter(&ill->ill_lock); 15053322Scarlsonj if (!(ipif->ipif_flags & IPIF_DUPLICATE) || 1506*8485SPeter.Memishian@Sun.COM (ipif->ipif_state_flags & IPIF_CONDEMNED)) { 15072546Scarlsonj mutex_exit(&ill->ill_lock); 15082546Scarlsonj continue; 15092546Scarlsonj } 15102546Scarlsonj 15112546Scarlsonj ipif->ipif_flags &= ~IPIF_DUPLICATE; 15122546Scarlsonj ill->ill_ipif_dup_count--; 15132546Scarlsonj mutex_exit(&ill->ill_lock); 15142546Scarlsonj ipif->ipif_was_dup = B_TRUE; 15152546Scarlsonj 1516*8485SPeter.Memishian@Sun.COM VERIFY(ipif_ndp_up(ipif, B_TRUE) != EINPROGRESS); 1517*8485SPeter.Memishian@Sun.COM (void) ipif_up_done_v6(ipif); 15182546Scarlsonj } 15192546Scarlsonj freeb(mp); 15202546Scarlsonj } 15212546Scarlsonj 15222546Scarlsonj /* 15232546Scarlsonj * Attempt to recover an IPv6 interface that's been shut down as a duplicate. 15242546Scarlsonj * As long as someone else holds the address, the interface will stay down. 15252546Scarlsonj * When that conflict goes away, the interface is brought back up. This is 15262546Scarlsonj * done so that accidental shutdowns of addresses aren't made permanent. Your 15272546Scarlsonj * server will recover from a failure. 15282546Scarlsonj * 15292546Scarlsonj * For DHCP and temporary addresses, recovery is not done in the kernel. 15302546Scarlsonj * Instead, it's handled by user space processes (dhcpagent and in.ndpd). 15312546Scarlsonj * 15322546Scarlsonj * This function is entered on a timer expiry; the ID is in ipif_recovery_id. 15332546Scarlsonj */ 15342546Scarlsonj static void 15352546Scarlsonj ipif6_dup_recovery(void *arg) 15362546Scarlsonj { 15372546Scarlsonj ipif_t *ipif = arg; 15382546Scarlsonj 15392546Scarlsonj ipif->ipif_recovery_id = 0; 15402546Scarlsonj if (!(ipif->ipif_flags & IPIF_DUPLICATE)) 15412546Scarlsonj return; 15422546Scarlsonj 15433322Scarlsonj /* 15443322Scarlsonj * No lock, because this is just an optimization. 15453322Scarlsonj */ 1546*8485SPeter.Memishian@Sun.COM if (ipif->ipif_state_flags & IPIF_CONDEMNED) 15473322Scarlsonj return; 15483322Scarlsonj 15492546Scarlsonj /* If the link is down, we'll retry this later */ 15502546Scarlsonj if (!(ipif->ipif_ill->ill_phyint->phyint_flags & PHYI_RUNNING)) 15512546Scarlsonj return; 15522546Scarlsonj 15532546Scarlsonj ndp_do_recovery(ipif); 15542546Scarlsonj } 15552546Scarlsonj 15562546Scarlsonj /* 15572546Scarlsonj * Perform interface recovery by forcing the duplicate interfaces up and 15582546Scarlsonj * allowing the system to determine which ones should stay up. 15592546Scarlsonj * 15602546Scarlsonj * Called both by recovery timer expiry and link-up notification. 15612546Scarlsonj */ 15620Sstevel@tonic-gate void 15632546Scarlsonj ndp_do_recovery(ipif_t *ipif) 15642546Scarlsonj { 15652546Scarlsonj ill_t *ill = ipif->ipif_ill; 15662546Scarlsonj mblk_t *mp; 15673448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 15682546Scarlsonj 15692546Scarlsonj mp = allocb(sizeof (ipif->ipif_v6lcl_addr), BPRI_MED); 15702546Scarlsonj if (mp == NULL) { 15713322Scarlsonj mutex_enter(&ill->ill_lock); 15723322Scarlsonj if (ipif->ipif_recovery_id == 0 && 1573*8485SPeter.Memishian@Sun.COM !(ipif->ipif_state_flags & IPIF_CONDEMNED)) { 15743322Scarlsonj ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 15753448Sdh155122 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 15763322Scarlsonj } 15773322Scarlsonj mutex_exit(&ill->ill_lock); 15782546Scarlsonj } else { 1579*8485SPeter.Memishian@Sun.COM /* 1580*8485SPeter.Memishian@Sun.COM * A recovery timer may still be running if we got here from 1581*8485SPeter.Memishian@Sun.COM * ill_restart_dad(); cancel that timer. 1582*8485SPeter.Memishian@Sun.COM */ 1583*8485SPeter.Memishian@Sun.COM if (ipif->ipif_recovery_id != 0) 1584*8485SPeter.Memishian@Sun.COM (void) untimeout(ipif->ipif_recovery_id); 1585*8485SPeter.Memishian@Sun.COM ipif->ipif_recovery_id = 0; 1586*8485SPeter.Memishian@Sun.COM 15872546Scarlsonj bcopy(&ipif->ipif_v6lcl_addr, mp->b_rptr, 15882546Scarlsonj sizeof (ipif->ipif_v6lcl_addr)); 15892546Scarlsonj ill_refhold(ill); 15904360Smeem qwriter_ip(ill, ill->ill_rq, mp, ip_ndp_recover, NEW_OP, 15914360Smeem B_FALSE); 15922546Scarlsonj } 15932546Scarlsonj } 15942546Scarlsonj 15952546Scarlsonj /* 1596*8485SPeter.Memishian@Sun.COM * Find the MAC and IP addresses in an NA/NS message. 15972546Scarlsonj */ 1598*8485SPeter.Memishian@Sun.COM static void 1599*8485SPeter.Memishian@Sun.COM ip_ndp_find_addresses(mblk_t *mp, mblk_t *dl_mp, ill_t *ill, in6_addr_t *targp, 1600*8485SPeter.Memishian@Sun.COM uchar_t **haddr, uint_t *haddrlenp) 16012546Scarlsonj { 1602*8485SPeter.Memishian@Sun.COM ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1603*8485SPeter.Memishian@Sun.COM icmp6_t *icmp6 = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 1604*8485SPeter.Memishian@Sun.COM nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 1605*8485SPeter.Memishian@Sun.COM nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 16062546Scarlsonj uchar_t *addr; 1607*8485SPeter.Memishian@Sun.COM int alen = 0; 1608*8485SPeter.Memishian@Sun.COM 16092546Scarlsonj if (dl_mp == NULL) { 16102546Scarlsonj nd_opt_hdr_t *opt; 1611*8485SPeter.Memishian@Sun.COM int len; 16122546Scarlsonj 16132546Scarlsonj /* 16142546Scarlsonj * If it's from the fast-path, then it can't be a probe 1615*8485SPeter.Memishian@Sun.COM * message, and thus must include a linkaddr option. 16162546Scarlsonj * Extract that here. 16172546Scarlsonj */ 1618*8485SPeter.Memishian@Sun.COM switch (icmp6->icmp6_type) { 1619*8485SPeter.Memishian@Sun.COM case ND_NEIGHBOR_SOLICIT: 1620*8485SPeter.Memishian@Sun.COM len = mp->b_wptr - (uchar_t *)ns; 1621*8485SPeter.Memishian@Sun.COM if ((len -= sizeof (*ns)) > 0) { 1622*8485SPeter.Memishian@Sun.COM opt = ndp_get_option((nd_opt_hdr_t *)(ns + 1), 1623*8485SPeter.Memishian@Sun.COM len, ND_OPT_SOURCE_LINKADDR); 16242546Scarlsonj } 1625*8485SPeter.Memishian@Sun.COM break; 1626*8485SPeter.Memishian@Sun.COM case ND_NEIGHBOR_ADVERT: 1627*8485SPeter.Memishian@Sun.COM len = mp->b_wptr - (uchar_t *)na; 1628*8485SPeter.Memishian@Sun.COM if ((len -= sizeof (*na)) > 0) { 1629*8485SPeter.Memishian@Sun.COM opt = ndp_get_option((nd_opt_hdr_t *)(na + 1), 1630*8485SPeter.Memishian@Sun.COM len, ND_OPT_TARGET_LINKADDR); 1631*8485SPeter.Memishian@Sun.COM } 1632*8485SPeter.Memishian@Sun.COM break; 16332546Scarlsonj } 1634*8485SPeter.Memishian@Sun.COM 1635*8485SPeter.Memishian@Sun.COM if (opt != NULL && opt->nd_opt_len * 8 - sizeof (*opt) >= 1636*8485SPeter.Memishian@Sun.COM ill->ill_nd_lla_len) { 1637*8485SPeter.Memishian@Sun.COM addr = (uchar_t *)(opt + 1); 1638*8485SPeter.Memishian@Sun.COM alen = ill->ill_nd_lla_len; 1639*8485SPeter.Memishian@Sun.COM } 1640*8485SPeter.Memishian@Sun.COM 16412546Scarlsonj /* 16422546Scarlsonj * We cheat a bit here for the sake of printing usable log 16432546Scarlsonj * messages in the rare case where the reply we got was unicast 16442546Scarlsonj * without a source linkaddr option, and the interface is in 16452546Scarlsonj * fastpath mode. (Sigh.) 16462546Scarlsonj */ 16472546Scarlsonj if (alen == 0 && ill->ill_type == IFT_ETHER && 16482546Scarlsonj MBLKHEAD(mp) >= sizeof (struct ether_header)) { 16492546Scarlsonj struct ether_header *pether; 16502546Scarlsonj 16512546Scarlsonj pether = (struct ether_header *)((char *)ip6h - 16522546Scarlsonj sizeof (*pether)); 16532546Scarlsonj addr = pether->ether_shost.ether_addr_octet; 16542546Scarlsonj alen = ETHERADDRL; 16552546Scarlsonj } 16562546Scarlsonj } else { 16572546Scarlsonj dl_unitdata_ind_t *dlu; 16582546Scarlsonj 16592546Scarlsonj dlu = (dl_unitdata_ind_t *)dl_mp->b_rptr; 16602546Scarlsonj alen = dlu->dl_src_addr_length; 16612546Scarlsonj if (alen > 0 && dlu->dl_src_addr_offset >= sizeof (*dlu) && 16622546Scarlsonj dlu->dl_src_addr_offset + alen <= MBLKL(dl_mp)) { 16632546Scarlsonj addr = dl_mp->b_rptr + dlu->dl_src_addr_offset; 16642546Scarlsonj if (ill->ill_sap_length < 0) { 16652546Scarlsonj alen += ill->ill_sap_length; 16662546Scarlsonj } else { 16672546Scarlsonj addr += ill->ill_sap_length; 16682546Scarlsonj alen -= ill->ill_sap_length; 16692546Scarlsonj } 16702546Scarlsonj } 16712546Scarlsonj } 1672*8485SPeter.Memishian@Sun.COM 16732546Scarlsonj if (alen > 0) { 16742546Scarlsonj *haddr = addr; 1675*8485SPeter.Memishian@Sun.COM *haddrlenp = alen; 16762546Scarlsonj } else { 16772546Scarlsonj *haddr = NULL; 1678*8485SPeter.Memishian@Sun.COM *haddrlenp = 0; 16792546Scarlsonj } 1680*8485SPeter.Memishian@Sun.COM 1681*8485SPeter.Memishian@Sun.COM /* nd_ns_target and nd_na_target are at the same offset, so we cheat */ 1682*8485SPeter.Memishian@Sun.COM *targp = ns->nd_ns_target; 16832546Scarlsonj } 16842546Scarlsonj 16852546Scarlsonj /* 16862546Scarlsonj * This is for exclusive changes due to NDP duplicate address detection 16872546Scarlsonj * failure. 16882546Scarlsonj */ 16892546Scarlsonj /* ARGSUSED */ 16902546Scarlsonj static void 16912546Scarlsonj ip_ndp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 16922546Scarlsonj { 16932546Scarlsonj ill_t *ill = rq->q_ptr; 16942546Scarlsonj ipif_t *ipif; 1695*8485SPeter.Memishian@Sun.COM mblk_t *dl_mp = NULL; 1696*8485SPeter.Memishian@Sun.COM uchar_t *haddr; 1697*8485SPeter.Memishian@Sun.COM uint_t haddrlen; 16983448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 1699*8485SPeter.Memishian@Sun.COM in6_addr_t targ; 17002546Scarlsonj 17012546Scarlsonj if (DB_TYPE(mp) != M_DATA) { 17022546Scarlsonj dl_mp = mp; 17032546Scarlsonj mp = mp->b_cont; 17042546Scarlsonj } 1705*8485SPeter.Memishian@Sun.COM 1706*8485SPeter.Memishian@Sun.COM ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 1707*8485SPeter.Memishian@Sun.COM if (haddr != NULL && haddrlen == ill->ill_phys_addr_length) { 17082546Scarlsonj /* 1709*8485SPeter.Memishian@Sun.COM * Ignore conflicts generated by misbehaving switches that 1710*8485SPeter.Memishian@Sun.COM * just reflect our own messages back to us. For IPMP, we may 1711*8485SPeter.Memishian@Sun.COM * see reflections across any ill in the illgrp. 17122546Scarlsonj */ 1713*8485SPeter.Memishian@Sun.COM if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 1714*8485SPeter.Memishian@Sun.COM IS_UNDER_IPMP(ill) && 1715*8485SPeter.Memishian@Sun.COM ipmp_illgrp_find_ill(ill->ill_grp, haddr, haddrlen) != NULL) 1716*8485SPeter.Memishian@Sun.COM goto ignore_conflict; 1717*8485SPeter.Memishian@Sun.COM } 1718*8485SPeter.Memishian@Sun.COM 1719*8485SPeter.Memishian@Sun.COM /* 1720*8485SPeter.Memishian@Sun.COM * Look up the appropriate ipif. 1721*8485SPeter.Memishian@Sun.COM */ 1722*8485SPeter.Memishian@Sun.COM ipif = ipif_lookup_addr_v6(&targ, ill, ALL_ZONES, NULL, NULL, NULL, 1723*8485SPeter.Memishian@Sun.COM NULL, ipst); 1724*8485SPeter.Memishian@Sun.COM if (ipif == NULL) 1725*8485SPeter.Memishian@Sun.COM goto ignore_conflict; 1726*8485SPeter.Memishian@Sun.COM 1727*8485SPeter.Memishian@Sun.COM /* Reload the ill to match the ipif */ 1728*8485SPeter.Memishian@Sun.COM ill = ipif->ipif_ill; 1729*8485SPeter.Memishian@Sun.COM 1730*8485SPeter.Memishian@Sun.COM /* If it's already duplicate or ineligible, then don't do anything. */ 1731*8485SPeter.Memishian@Sun.COM if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 1732*8485SPeter.Memishian@Sun.COM ipif_refrele(ipif); 17332546Scarlsonj goto ignore_conflict; 17342546Scarlsonj } 17354972Smeem 1736*8485SPeter.Memishian@Sun.COM /* 1737*8485SPeter.Memishian@Sun.COM * If this is a failure during duplicate recovery, then don't 1738*8485SPeter.Memishian@Sun.COM * complain. It may take a long time to recover. 1739*8485SPeter.Memishian@Sun.COM */ 1740*8485SPeter.Memishian@Sun.COM if (!ipif->ipif_was_dup) { 1741*8485SPeter.Memishian@Sun.COM char ibuf[LIFNAMSIZ]; 1742*8485SPeter.Memishian@Sun.COM char hbuf[MAC_STR_LEN]; 1743*8485SPeter.Memishian@Sun.COM char sbuf[INET6_ADDRSTRLEN]; 1744*8485SPeter.Memishian@Sun.COM 1745*8485SPeter.Memishian@Sun.COM ipif_get_name(ipif, ibuf, sizeof (ibuf)); 1746*8485SPeter.Memishian@Sun.COM cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 1747*8485SPeter.Memishian@Sun.COM " disabled", ibuf, 1748*8485SPeter.Memishian@Sun.COM inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1749*8485SPeter.Memishian@Sun.COM mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf))); 17502546Scarlsonj } 1751*8485SPeter.Memishian@Sun.COM mutex_enter(&ill->ill_lock); 1752*8485SPeter.Memishian@Sun.COM ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 1753*8485SPeter.Memishian@Sun.COM ipif->ipif_flags |= IPIF_DUPLICATE; 1754*8485SPeter.Memishian@Sun.COM ill->ill_ipif_dup_count++; 1755*8485SPeter.Memishian@Sun.COM mutex_exit(&ill->ill_lock); 1756*8485SPeter.Memishian@Sun.COM (void) ipif_down(ipif, NULL, NULL); 1757*8485SPeter.Memishian@Sun.COM ipif_down_tail(ipif); 1758*8485SPeter.Memishian@Sun.COM mutex_enter(&ill->ill_lock); 1759*8485SPeter.Memishian@Sun.COM if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 1760*8485SPeter.Memishian@Sun.COM ill->ill_net_type == IRE_IF_RESOLVER && 1761*8485SPeter.Memishian@Sun.COM !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1762*8485SPeter.Memishian@Sun.COM ipst->ips_ip_dup_recovery > 0) { 1763*8485SPeter.Memishian@Sun.COM ASSERT(ipif->ipif_recovery_id == 0); 1764*8485SPeter.Memishian@Sun.COM ipif->ipif_recovery_id = timeout(ipif6_dup_recovery, 1765*8485SPeter.Memishian@Sun.COM ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 1766*8485SPeter.Memishian@Sun.COM } 1767*8485SPeter.Memishian@Sun.COM mutex_exit(&ill->ill_lock); 1768*8485SPeter.Memishian@Sun.COM ipif_refrele(ipif); 17692546Scarlsonj ignore_conflict: 17702546Scarlsonj if (dl_mp != NULL) 17712546Scarlsonj freeb(dl_mp); 17722546Scarlsonj freemsg(mp); 17732546Scarlsonj } 17742546Scarlsonj 17752546Scarlsonj /* 17762546Scarlsonj * Handle failure by tearing down the ipifs with the specified address. Note 17772546Scarlsonj * that tearing down the ipif also means deleting the nce through ipif_down, so 17782546Scarlsonj * it's not possible to do recovery by just restarting the nce timer. Instead, 17792546Scarlsonj * we start a timer on the ipif. 17802546Scarlsonj */ 17812546Scarlsonj static void 1782*8485SPeter.Memishian@Sun.COM ip_ndp_failure(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 17832546Scarlsonj { 17842546Scarlsonj if ((mp = copymsg(mp)) != NULL) { 17852546Scarlsonj if (dl_mp == NULL) 17862546Scarlsonj dl_mp = mp; 17872546Scarlsonj else if ((dl_mp = copyb(dl_mp)) != NULL) 17882546Scarlsonj dl_mp->b_cont = mp; 17892546Scarlsonj if (dl_mp == NULL) { 17902546Scarlsonj freemsg(mp); 17912546Scarlsonj } else { 17922546Scarlsonj ill_refhold(ill); 17934360Smeem qwriter_ip(ill, ill->ill_rq, dl_mp, ip_ndp_excl, NEW_OP, 17944360Smeem B_FALSE); 17952546Scarlsonj } 17962546Scarlsonj } 17972546Scarlsonj } 17982546Scarlsonj 17992546Scarlsonj /* 18002546Scarlsonj * Handle a discovered conflict: some other system is advertising that it owns 18012546Scarlsonj * one of our IP addresses. We need to defend ourselves, or just shut down the 18022546Scarlsonj * interface. 18032546Scarlsonj */ 18042546Scarlsonj static void 18052546Scarlsonj ip_ndp_conflict(ill_t *ill, mblk_t *mp, mblk_t *dl_mp, nce_t *nce) 18062546Scarlsonj { 18072546Scarlsonj ipif_t *ipif; 18082546Scarlsonj uint32_t now; 18092546Scarlsonj uint_t maxdefense; 18102546Scarlsonj uint_t defs; 18113448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 18122546Scarlsonj 18132546Scarlsonj ipif = ipif_lookup_addr_v6(&nce->nce_addr, ill, ALL_ZONES, NULL, NULL, 18143448Sdh155122 NULL, NULL, ipst); 18152546Scarlsonj if (ipif == NULL) 18162546Scarlsonj return; 1817*8485SPeter.Memishian@Sun.COM 18182546Scarlsonj /* 18192546Scarlsonj * First, figure out if this address is disposable. 18202546Scarlsonj */ 18212546Scarlsonj if (ipif->ipif_flags & (IPIF_DHCPRUNNING | IPIF_TEMPORARY)) 18223448Sdh155122 maxdefense = ipst->ips_ip_max_temp_defend; 18232546Scarlsonj else 18243448Sdh155122 maxdefense = ipst->ips_ip_max_defend; 18252546Scarlsonj 18262546Scarlsonj /* 18272546Scarlsonj * Now figure out how many times we've defended ourselves. Ignore 18282546Scarlsonj * defenses that happened long in the past. 18292546Scarlsonj */ 18302546Scarlsonj now = gethrestime_sec(); 18312546Scarlsonj mutex_enter(&nce->nce_lock); 18322546Scarlsonj if ((defs = nce->nce_defense_count) > 0 && 18333448Sdh155122 now - nce->nce_defense_time > ipst->ips_ip_defend_interval) { 18342546Scarlsonj nce->nce_defense_count = defs = 0; 18352546Scarlsonj } 18362546Scarlsonj nce->nce_defense_count++; 18372546Scarlsonj nce->nce_defense_time = now; 18382546Scarlsonj mutex_exit(&nce->nce_lock); 18392546Scarlsonj ipif_refrele(ipif); 18402546Scarlsonj 18412546Scarlsonj /* 18422546Scarlsonj * If we've defended ourselves too many times already, then give up and 18432546Scarlsonj * tear down the interface(s) using this address. Otherwise, defend by 18442546Scarlsonj * sending out an unsolicited Neighbor Advertisement. 18452546Scarlsonj */ 18462546Scarlsonj if (defs >= maxdefense) { 1847*8485SPeter.Memishian@Sun.COM ip_ndp_failure(ill, mp, dl_mp); 18482546Scarlsonj } else { 18492546Scarlsonj char hbuf[MAC_STR_LEN]; 18502546Scarlsonj char sbuf[INET6_ADDRSTRLEN]; 18512546Scarlsonj uchar_t *haddr; 1852*8485SPeter.Memishian@Sun.COM uint_t haddrlen; 1853*8485SPeter.Memishian@Sun.COM in6_addr_t targ; 1854*8485SPeter.Memishian@Sun.COM 1855*8485SPeter.Memishian@Sun.COM ip_ndp_find_addresses(mp, dl_mp, ill, &targ, &haddr, &haddrlen); 18562546Scarlsonj cmn_err(CE_WARN, "node %s is using our IP address %s on %s", 1857*8485SPeter.Memishian@Sun.COM mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)), 1858*8485SPeter.Memishian@Sun.COM inet_ntop(AF_INET6, &targ, sbuf, sizeof (sbuf)), 1859*8485SPeter.Memishian@Sun.COM ill->ill_name); 1860*8485SPeter.Memishian@Sun.COM 1861*8485SPeter.Memishian@Sun.COM (void) nce_xmit_advert(nce, B_FALSE, &ipv6_all_hosts_mcast, 0); 18622546Scarlsonj } 18632546Scarlsonj } 18642546Scarlsonj 18652546Scarlsonj static void 18662546Scarlsonj ndp_input_solicit(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 18670Sstevel@tonic-gate { 18680Sstevel@tonic-gate nd_neighbor_solicit_t *ns; 18690Sstevel@tonic-gate uint32_t hlen = ill->ill_nd_lla_len; 18700Sstevel@tonic-gate uchar_t *haddr = NULL; 18710Sstevel@tonic-gate icmp6_t *icmp_nd; 18720Sstevel@tonic-gate ip6_t *ip6h; 18730Sstevel@tonic-gate nce_t *our_nce = NULL; 18740Sstevel@tonic-gate in6_addr_t target; 18750Sstevel@tonic-gate in6_addr_t src; 18760Sstevel@tonic-gate int len; 18770Sstevel@tonic-gate int flag = 0; 18780Sstevel@tonic-gate nd_opt_hdr_t *opt = NULL; 18790Sstevel@tonic-gate boolean_t bad_solicit = B_FALSE; 18800Sstevel@tonic-gate mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 18810Sstevel@tonic-gate 18820Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 18830Sstevel@tonic-gate icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 18840Sstevel@tonic-gate len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 18850Sstevel@tonic-gate src = ip6h->ip6_src; 18860Sstevel@tonic-gate ns = (nd_neighbor_solicit_t *)icmp_nd; 18870Sstevel@tonic-gate target = ns->nd_ns_target; 18880Sstevel@tonic-gate if (IN6_IS_ADDR_MULTICAST(&target)) { 18890Sstevel@tonic-gate if (ip_debug > 2) { 18900Sstevel@tonic-gate /* ip1dbg */ 18910Sstevel@tonic-gate pr_addr_dbg("ndp_input_solicit: Target is" 18920Sstevel@tonic-gate " multicast! %s\n", AF_INET6, &target); 18930Sstevel@tonic-gate } 18940Sstevel@tonic-gate bad_solicit = B_TRUE; 18950Sstevel@tonic-gate goto done; 18960Sstevel@tonic-gate } 18970Sstevel@tonic-gate if (len > sizeof (nd_neighbor_solicit_t)) { 18980Sstevel@tonic-gate /* Options present */ 18990Sstevel@tonic-gate opt = (nd_opt_hdr_t *)&ns[1]; 19000Sstevel@tonic-gate len -= sizeof (nd_neighbor_solicit_t); 19010Sstevel@tonic-gate if (!ndp_verify_optlen(opt, len)) { 19020Sstevel@tonic-gate ip1dbg(("ndp_input_solicit: Bad opt len\n")); 19030Sstevel@tonic-gate bad_solicit = B_TRUE; 19040Sstevel@tonic-gate goto done; 19050Sstevel@tonic-gate } 1906*8485SPeter.Memishian@Sun.COM 19070Sstevel@tonic-gate } 19080Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&src)) { 19090Sstevel@tonic-gate /* Check to see if this is a valid DAD solicitation */ 19100Sstevel@tonic-gate if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&ip6h->ip6_dst)) { 19110Sstevel@tonic-gate if (ip_debug > 2) { 19120Sstevel@tonic-gate /* ip1dbg */ 19130Sstevel@tonic-gate pr_addr_dbg("ndp_input_solicit: IPv6 " 19140Sstevel@tonic-gate "Destination is not solicited node " 19150Sstevel@tonic-gate "multicast %s\n", AF_INET6, 19160Sstevel@tonic-gate &ip6h->ip6_dst); 19170Sstevel@tonic-gate } 19180Sstevel@tonic-gate bad_solicit = B_TRUE; 19190Sstevel@tonic-gate goto done; 19200Sstevel@tonic-gate } 19210Sstevel@tonic-gate } 19220Sstevel@tonic-gate 1923*8485SPeter.Memishian@Sun.COM /* 1924*8485SPeter.Memishian@Sun.COM * NOTE: with IPMP, it's possible the nominated multicast ill (which 1925*8485SPeter.Memishian@Sun.COM * received this packet if it's multicast) is not the ill tied to 1926*8485SPeter.Memishian@Sun.COM * e.g. the IPMP ill's data link-local. So we match across the illgrp 1927*8485SPeter.Memishian@Sun.COM * to ensure we find the associated NCE. 1928*8485SPeter.Memishian@Sun.COM */ 1929*8485SPeter.Memishian@Sun.COM our_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE); 19300Sstevel@tonic-gate /* 19310Sstevel@tonic-gate * If this is a valid Solicitation, a permanent 19320Sstevel@tonic-gate * entry should exist in the cache 19330Sstevel@tonic-gate */ 19340Sstevel@tonic-gate if (our_nce == NULL || 19350Sstevel@tonic-gate !(our_nce->nce_flags & NCE_F_PERMANENT)) { 19360Sstevel@tonic-gate ip1dbg(("ndp_input_solicit: Wrong target in NS?!" 19370Sstevel@tonic-gate "ifname=%s ", ill->ill_name)); 19380Sstevel@tonic-gate if (ip_debug > 2) { 19390Sstevel@tonic-gate /* ip1dbg */ 19400Sstevel@tonic-gate pr_addr_dbg(" dst %s\n", AF_INET6, &target); 19410Sstevel@tonic-gate } 19420Sstevel@tonic-gate bad_solicit = B_TRUE; 19430Sstevel@tonic-gate goto done; 19440Sstevel@tonic-gate } 19450Sstevel@tonic-gate 19460Sstevel@tonic-gate /* At this point we should have a verified NS per spec */ 19470Sstevel@tonic-gate if (opt != NULL) { 19480Sstevel@tonic-gate opt = ndp_get_option(opt, len, ND_OPT_SOURCE_LINKADDR); 19490Sstevel@tonic-gate if (opt != NULL) { 19500Sstevel@tonic-gate haddr = (uchar_t *)&opt[1]; 19512546Scarlsonj if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 19520Sstevel@tonic-gate hlen == 0) { 1953*8485SPeter.Memishian@Sun.COM ip1dbg(("ndp_input_solicit: bad SLLA\n")); 19540Sstevel@tonic-gate bad_solicit = B_TRUE; 19550Sstevel@tonic-gate goto done; 19560Sstevel@tonic-gate } 19570Sstevel@tonic-gate } 19580Sstevel@tonic-gate } 19592546Scarlsonj 19602699Scarlsonj /* If sending directly to peer, set the unicast flag */ 19612699Scarlsonj if (!IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) 19620Sstevel@tonic-gate flag |= NDP_UNICAST; 19630Sstevel@tonic-gate 19640Sstevel@tonic-gate /* 19650Sstevel@tonic-gate * Create/update the entry for the soliciting node. 19660Sstevel@tonic-gate * or respond to outstanding queries, don't if 19670Sstevel@tonic-gate * the source is unspecified address. 19680Sstevel@tonic-gate */ 19690Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&src)) { 19702546Scarlsonj int err; 19710Sstevel@tonic-gate nce_t *nnce; 19720Sstevel@tonic-gate 19732535Ssangeeta ASSERT(ill->ill_isv6); 19742546Scarlsonj /* 19752546Scarlsonj * Regular solicitations *must* include the Source Link-Layer 19762546Scarlsonj * Address option. Ignore messages that do not. 19772546Scarlsonj */ 19782546Scarlsonj if (haddr == NULL && IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst)) { 19792546Scarlsonj ip1dbg(("ndp_input_solicit: source link-layer address " 19802546Scarlsonj "option missing with a specified source.\n")); 19812546Scarlsonj bad_solicit = B_TRUE; 19822546Scarlsonj goto done; 19832546Scarlsonj } 19842546Scarlsonj 19852546Scarlsonj /* 19862546Scarlsonj * This is a regular solicitation. If we're still in the 19872546Scarlsonj * process of verifying the address, then don't respond at all 19882546Scarlsonj * and don't keep track of the sender. 19892546Scarlsonj */ 19902546Scarlsonj if (our_nce->nce_state == ND_PROBE) 19912546Scarlsonj goto done; 19922546Scarlsonj 19932546Scarlsonj /* 19942546Scarlsonj * If the solicitation doesn't have sender hardware address 19952546Scarlsonj * (legal for unicast solicitation), then process without 19962546Scarlsonj * installing the return NCE. Either we already know it, or 19972546Scarlsonj * we'll be forced to look it up when (and if) we reply to the 19982546Scarlsonj * packet. 19992546Scarlsonj */ 20002546Scarlsonj if (haddr == NULL) 20012546Scarlsonj goto no_source; 20022546Scarlsonj 20034714Ssowmini err = ndp_lookup_then_add_v6(ill, 2004*8485SPeter.Memishian@Sun.COM B_FALSE, 20050Sstevel@tonic-gate haddr, 20060Sstevel@tonic-gate &src, /* Soliciting nodes address */ 20070Sstevel@tonic-gate &ipv6_all_ones, 20080Sstevel@tonic-gate &ipv6_all_zeros, 20090Sstevel@tonic-gate 0, 20100Sstevel@tonic-gate 0, 20110Sstevel@tonic-gate ND_STALE, 20124714Ssowmini &nnce); 20130Sstevel@tonic-gate switch (err) { 20140Sstevel@tonic-gate case 0: 20150Sstevel@tonic-gate /* done with this entry */ 20160Sstevel@tonic-gate NCE_REFRELE(nnce); 20170Sstevel@tonic-gate break; 20180Sstevel@tonic-gate case EEXIST: 20190Sstevel@tonic-gate /* 2020*8485SPeter.Memishian@Sun.COM * B_FALSE indicates this is not an an advertisement. 20210Sstevel@tonic-gate */ 20220Sstevel@tonic-gate ndp_process(nnce, haddr, 0, B_FALSE); 20230Sstevel@tonic-gate NCE_REFRELE(nnce); 20240Sstevel@tonic-gate break; 20250Sstevel@tonic-gate default: 20260Sstevel@tonic-gate ip1dbg(("ndp_input_solicit: Can't create NCE %d\n", 20270Sstevel@tonic-gate err)); 20280Sstevel@tonic-gate goto done; 20290Sstevel@tonic-gate } 20302546Scarlsonj no_source: 20310Sstevel@tonic-gate flag |= NDP_SOLICITED; 20320Sstevel@tonic-gate } else { 20330Sstevel@tonic-gate /* 20342546Scarlsonj * No source link layer address option should be present in a 20352546Scarlsonj * valid DAD request. 20362546Scarlsonj */ 20372546Scarlsonj if (haddr != NULL) { 20382546Scarlsonj ip1dbg(("ndp_input_solicit: source link-layer address " 20392546Scarlsonj "option present with an unspecified source.\n")); 20402546Scarlsonj bad_solicit = B_TRUE; 20412546Scarlsonj goto done; 20422546Scarlsonj } 20432546Scarlsonj if (our_nce->nce_state == ND_PROBE) { 20442546Scarlsonj /* 20452546Scarlsonj * Internally looped-back probes won't have DLPI 20462546Scarlsonj * attached to them. External ones (which are sent by 20472546Scarlsonj * multicast) always will. Just ignore our own 20482546Scarlsonj * transmissions. 20492546Scarlsonj */ 20502546Scarlsonj if (dl_mp != NULL) { 20512546Scarlsonj /* 20522546Scarlsonj * If someone else is probing our address, then 20532546Scarlsonj * we've crossed wires. Declare failure. 20542546Scarlsonj */ 2055*8485SPeter.Memishian@Sun.COM ip_ndp_failure(ill, mp, dl_mp); 20562546Scarlsonj } 20572546Scarlsonj goto done; 20582546Scarlsonj } 20592546Scarlsonj /* 20602546Scarlsonj * This is a DAD probe. Multicast the advertisement to the 20612546Scarlsonj * all-nodes address. 20620Sstevel@tonic-gate */ 20630Sstevel@tonic-gate src = ipv6_all_hosts_mcast; 20640Sstevel@tonic-gate } 20650Sstevel@tonic-gate /* Response to a solicitation */ 2066*8485SPeter.Memishian@Sun.COM (void) nce_xmit_advert(our_nce, B_TRUE, &src, flag); 20670Sstevel@tonic-gate done: 20680Sstevel@tonic-gate if (bad_solicit) 20690Sstevel@tonic-gate BUMP_MIB(mib, ipv6IfIcmpInBadNeighborSolicitations); 20700Sstevel@tonic-gate if (our_nce != NULL) 20710Sstevel@tonic-gate NCE_REFRELE(our_nce); 20720Sstevel@tonic-gate } 20730Sstevel@tonic-gate 20740Sstevel@tonic-gate void 20752546Scarlsonj ndp_input_advert(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 20760Sstevel@tonic-gate { 20770Sstevel@tonic-gate nd_neighbor_advert_t *na; 20780Sstevel@tonic-gate uint32_t hlen = ill->ill_nd_lla_len; 20790Sstevel@tonic-gate uchar_t *haddr = NULL; 20800Sstevel@tonic-gate icmp6_t *icmp_nd; 20810Sstevel@tonic-gate ip6_t *ip6h; 20820Sstevel@tonic-gate nce_t *dst_nce = NULL; 20830Sstevel@tonic-gate in6_addr_t target; 20840Sstevel@tonic-gate nd_opt_hdr_t *opt = NULL; 20850Sstevel@tonic-gate int len; 2086*8485SPeter.Memishian@Sun.COM ip_stack_t *ipst = ill->ill_ipst; 20870Sstevel@tonic-gate mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 20880Sstevel@tonic-gate 20890Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 20900Sstevel@tonic-gate icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 20910Sstevel@tonic-gate len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 20920Sstevel@tonic-gate na = (nd_neighbor_advert_t *)icmp_nd; 20930Sstevel@tonic-gate if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 20940Sstevel@tonic-gate (na->nd_na_flags_reserved & ND_NA_FLAG_SOLICITED)) { 20950Sstevel@tonic-gate ip1dbg(("ndp_input_advert: Target is multicast but the " 20960Sstevel@tonic-gate "solicited flag is not zero\n")); 20970Sstevel@tonic-gate BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 20980Sstevel@tonic-gate return; 20990Sstevel@tonic-gate } 21000Sstevel@tonic-gate target = na->nd_na_target; 21010Sstevel@tonic-gate if (IN6_IS_ADDR_MULTICAST(&target)) { 21020Sstevel@tonic-gate ip1dbg(("ndp_input_advert: Target is multicast!\n")); 21030Sstevel@tonic-gate BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 21040Sstevel@tonic-gate return; 21050Sstevel@tonic-gate } 21060Sstevel@tonic-gate if (len > sizeof (nd_neighbor_advert_t)) { 21070Sstevel@tonic-gate opt = (nd_opt_hdr_t *)&na[1]; 21080Sstevel@tonic-gate if (!ndp_verify_optlen(opt, 21090Sstevel@tonic-gate len - sizeof (nd_neighbor_advert_t))) { 21102546Scarlsonj ip1dbg(("ndp_input_advert: cannot verify SLLA\n")); 21110Sstevel@tonic-gate BUMP_MIB(mib, ipv6IfIcmpInBadNeighborAdvertisements); 21120Sstevel@tonic-gate return; 21130Sstevel@tonic-gate } 21140Sstevel@tonic-gate /* At this point we have a verified NA per spec */ 21150Sstevel@tonic-gate len -= sizeof (nd_neighbor_advert_t); 21160Sstevel@tonic-gate opt = ndp_get_option(opt, len, ND_OPT_TARGET_LINKADDR); 21170Sstevel@tonic-gate if (opt != NULL) { 21180Sstevel@tonic-gate haddr = (uchar_t *)&opt[1]; 21192546Scarlsonj if (hlen > opt->nd_opt_len * 8 - sizeof (*opt) || 21200Sstevel@tonic-gate hlen == 0) { 21212546Scarlsonj ip1dbg(("ndp_input_advert: bad SLLA\n")); 21220Sstevel@tonic-gate BUMP_MIB(mib, 21230Sstevel@tonic-gate ipv6IfIcmpInBadNeighborAdvertisements); 21240Sstevel@tonic-gate return; 21250Sstevel@tonic-gate } 21260Sstevel@tonic-gate } 21270Sstevel@tonic-gate } 21280Sstevel@tonic-gate 21290Sstevel@tonic-gate /* 2130*8485SPeter.Memishian@Sun.COM * NOTE: we match across the illgrp since we need to do DAD for all of 2131*8485SPeter.Memishian@Sun.COM * our local addresses, and those are spread across all the active 21320Sstevel@tonic-gate * ills in the group. 21330Sstevel@tonic-gate */ 2134*8485SPeter.Memishian@Sun.COM if ((dst_nce = ndp_lookup_v6(ill, B_TRUE, &target, B_FALSE)) == NULL) 2135*8485SPeter.Memishian@Sun.COM return; 2136*8485SPeter.Memishian@Sun.COM 2137*8485SPeter.Memishian@Sun.COM if (dst_nce->nce_flags & NCE_F_PERMANENT) { 2138*8485SPeter.Memishian@Sun.COM /* 2139*8485SPeter.Memishian@Sun.COM * Someone just advertised one of our local addresses. First, 2140*8485SPeter.Memishian@Sun.COM * check it it was us -- if so, we can safely ignore it. 2141*8485SPeter.Memishian@Sun.COM */ 2142*8485SPeter.Memishian@Sun.COM if (haddr != NULL) { 2143*8485SPeter.Memishian@Sun.COM if (!nce_cmp_ll_addr(dst_nce, haddr, hlen)) 2144*8485SPeter.Memishian@Sun.COM goto out; /* from us -- no conflict */ 2145*8485SPeter.Memishian@Sun.COM 2146*8485SPeter.Memishian@Sun.COM /* 2147*8485SPeter.Memishian@Sun.COM * If we're in an IPMP group, check if this is an echo 2148*8485SPeter.Memishian@Sun.COM * from another ill in the group. Use the double- 2149*8485SPeter.Memishian@Sun.COM * checked locking pattern to avoid grabbing 2150*8485SPeter.Memishian@Sun.COM * ill_g_lock in the non-IPMP case. 2151*8485SPeter.Memishian@Sun.COM */ 2152*8485SPeter.Memishian@Sun.COM if (IS_UNDER_IPMP(ill)) { 2153*8485SPeter.Memishian@Sun.COM rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2154*8485SPeter.Memishian@Sun.COM if (IS_UNDER_IPMP(ill) && ipmp_illgrp_find_ill( 2155*8485SPeter.Memishian@Sun.COM ill->ill_grp, haddr, hlen) != NULL) { 2156*8485SPeter.Memishian@Sun.COM rw_exit(&ipst->ips_ill_g_lock); 2157*8485SPeter.Memishian@Sun.COM goto out; 2158*8485SPeter.Memishian@Sun.COM } 2159*8485SPeter.Memishian@Sun.COM rw_exit(&ipst->ips_ill_g_lock); 2160*8485SPeter.Memishian@Sun.COM } 21610Sstevel@tonic-gate } 2162*8485SPeter.Memishian@Sun.COM 2163*8485SPeter.Memishian@Sun.COM /* 2164*8485SPeter.Memishian@Sun.COM * This appears to be a real conflict. If we're trying to 2165*8485SPeter.Memishian@Sun.COM * configure this NCE (ND_PROBE), then shut it down. 2166*8485SPeter.Memishian@Sun.COM * Otherwise, handle the discovered conflict. 2167*8485SPeter.Memishian@Sun.COM * 2168*8485SPeter.Memishian@Sun.COM * Note that dl_mp might be NULL if we're getting a unicast 2169*8485SPeter.Memishian@Sun.COM * reply. This isn't typically done (multicast is the norm in 2170*8485SPeter.Memishian@Sun.COM * response to a probe), but we can handle the dl_mp == NULL 2171*8485SPeter.Memishian@Sun.COM * case as well. 2172*8485SPeter.Memishian@Sun.COM */ 2173*8485SPeter.Memishian@Sun.COM if (dst_nce->nce_state == ND_PROBE) 2174*8485SPeter.Memishian@Sun.COM ip_ndp_failure(ill, mp, dl_mp); 2175*8485SPeter.Memishian@Sun.COM else 2176*8485SPeter.Memishian@Sun.COM ip_ndp_conflict(ill, mp, dl_mp, dst_nce); 2177*8485SPeter.Memishian@Sun.COM } else { 2178*8485SPeter.Memishian@Sun.COM if (na->nd_na_flags_reserved & ND_NA_FLAG_ROUTER) 2179*8485SPeter.Memishian@Sun.COM dst_nce->nce_flags |= NCE_F_ISROUTER; 2180*8485SPeter.Memishian@Sun.COM 2181*8485SPeter.Memishian@Sun.COM /* B_TRUE indicates this an advertisement */ 2182*8485SPeter.Memishian@Sun.COM ndp_process(dst_nce, haddr, na->nd_na_flags_reserved, B_TRUE); 21830Sstevel@tonic-gate } 2184*8485SPeter.Memishian@Sun.COM out: 2185*8485SPeter.Memishian@Sun.COM NCE_REFRELE(dst_nce); 21860Sstevel@tonic-gate } 21870Sstevel@tonic-gate 21880Sstevel@tonic-gate /* 21890Sstevel@tonic-gate * Process NDP neighbor solicitation/advertisement messages. 21900Sstevel@tonic-gate * The checksum has already checked o.k before reaching here. 21910Sstevel@tonic-gate */ 21920Sstevel@tonic-gate void 21932546Scarlsonj ndp_input(ill_t *ill, mblk_t *mp, mblk_t *dl_mp) 21940Sstevel@tonic-gate { 21950Sstevel@tonic-gate icmp6_t *icmp_nd; 21960Sstevel@tonic-gate ip6_t *ip6h; 21970Sstevel@tonic-gate int len; 21980Sstevel@tonic-gate mib2_ipv6IfIcmpEntry_t *mib = ill->ill_icmp6_mib; 21990Sstevel@tonic-gate 22000Sstevel@tonic-gate 22010Sstevel@tonic-gate if (!pullupmsg(mp, -1)) { 22020Sstevel@tonic-gate ip1dbg(("ndp_input: pullupmsg failed\n")); 22033284Sapersson BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 22040Sstevel@tonic-gate goto done; 22050Sstevel@tonic-gate } 22060Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 22070Sstevel@tonic-gate if (ip6h->ip6_hops != IPV6_MAX_HOPS) { 22080Sstevel@tonic-gate ip1dbg(("ndp_input: hoplimit != IPV6_MAX_HOPS\n")); 22090Sstevel@tonic-gate BUMP_MIB(mib, ipv6IfIcmpBadHoplimit); 22100Sstevel@tonic-gate goto done; 22110Sstevel@tonic-gate } 22120Sstevel@tonic-gate /* 22130Sstevel@tonic-gate * NDP does not accept any extension headers between the 22140Sstevel@tonic-gate * IP header and the ICMP header since e.g. a routing 22150Sstevel@tonic-gate * header could be dangerous. 22160Sstevel@tonic-gate * This assumes that any AH or ESP headers are removed 22170Sstevel@tonic-gate * by ip prior to passing the packet to ndp_input. 22180Sstevel@tonic-gate */ 22190Sstevel@tonic-gate if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 22200Sstevel@tonic-gate ip1dbg(("ndp_input: Wrong next header 0x%x\n", 22210Sstevel@tonic-gate ip6h->ip6_nxt)); 22220Sstevel@tonic-gate BUMP_MIB(mib, ipv6IfIcmpInErrors); 22230Sstevel@tonic-gate goto done; 22240Sstevel@tonic-gate } 22250Sstevel@tonic-gate icmp_nd = (icmp6_t *)(mp->b_rptr + IPV6_HDR_LEN); 22260Sstevel@tonic-gate ASSERT(icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT || 22270Sstevel@tonic-gate icmp_nd->icmp6_type == ND_NEIGHBOR_ADVERT); 22280Sstevel@tonic-gate if (icmp_nd->icmp6_code != 0) { 22290Sstevel@tonic-gate ip1dbg(("ndp_input: icmp6 code != 0 \n")); 22300Sstevel@tonic-gate BUMP_MIB(mib, ipv6IfIcmpInErrors); 22310Sstevel@tonic-gate goto done; 22320Sstevel@tonic-gate } 22330Sstevel@tonic-gate len = mp->b_wptr - mp->b_rptr - IPV6_HDR_LEN; 22340Sstevel@tonic-gate /* 22350Sstevel@tonic-gate * Make sure packet length is large enough for either 22360Sstevel@tonic-gate * a NS or a NA icmp packet. 22370Sstevel@tonic-gate */ 22380Sstevel@tonic-gate if (len < sizeof (struct icmp6_hdr) + sizeof (struct in6_addr)) { 22390Sstevel@tonic-gate ip1dbg(("ndp_input: packet too short\n")); 22400Sstevel@tonic-gate BUMP_MIB(mib, ipv6IfIcmpInErrors); 22410Sstevel@tonic-gate goto done; 22420Sstevel@tonic-gate } 22430Sstevel@tonic-gate if (icmp_nd->icmp6_type == ND_NEIGHBOR_SOLICIT) { 22442546Scarlsonj ndp_input_solicit(ill, mp, dl_mp); 22450Sstevel@tonic-gate } else { 22462546Scarlsonj ndp_input_advert(ill, mp, dl_mp); 22470Sstevel@tonic-gate } 22480Sstevel@tonic-gate done: 22490Sstevel@tonic-gate freemsg(mp); 22500Sstevel@tonic-gate } 22510Sstevel@tonic-gate 22520Sstevel@tonic-gate /* 2253*8485SPeter.Memishian@Sun.COM * Utility routine to send an advertisement. Assumes that the NCE cannot 2254*8485SPeter.Memishian@Sun.COM * go away (e.g., because it's refheld). 2255*8485SPeter.Memishian@Sun.COM */ 2256*8485SPeter.Memishian@Sun.COM static boolean_t 2257*8485SPeter.Memishian@Sun.COM nce_xmit_advert(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *target, 2258*8485SPeter.Memishian@Sun.COM uint_t flags) 2259*8485SPeter.Memishian@Sun.COM { 2260*8485SPeter.Memishian@Sun.COM ASSERT((flags & NDP_PROBE) == 0); 2261*8485SPeter.Memishian@Sun.COM 2262*8485SPeter.Memishian@Sun.COM if (nce->nce_flags & NCE_F_ISROUTER) 2263*8485SPeter.Memishian@Sun.COM flags |= NDP_ISROUTER; 2264*8485SPeter.Memishian@Sun.COM if (!(nce->nce_flags & NCE_F_ANYCAST)) 2265*8485SPeter.Memishian@Sun.COM flags |= NDP_ORIDE; 2266*8485SPeter.Memishian@Sun.COM 2267*8485SPeter.Memishian@Sun.COM return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_ADVERT, use_nd_lla, 2268*8485SPeter.Memishian@Sun.COM &nce->nce_addr, target, flags)); 2269*8485SPeter.Memishian@Sun.COM } 2270*8485SPeter.Memishian@Sun.COM 2271*8485SPeter.Memishian@Sun.COM /* 2272*8485SPeter.Memishian@Sun.COM * Utility routine to send a solicitation. Assumes that the NCE cannot 2273*8485SPeter.Memishian@Sun.COM * go away (e.g., because it's refheld). 2274*8485SPeter.Memishian@Sun.COM */ 2275*8485SPeter.Memishian@Sun.COM static boolean_t 2276*8485SPeter.Memishian@Sun.COM nce_xmit_solicit(nce_t *nce, boolean_t use_nd_lla, const in6_addr_t *sender, 2277*8485SPeter.Memishian@Sun.COM uint_t flags) 2278*8485SPeter.Memishian@Sun.COM { 2279*8485SPeter.Memishian@Sun.COM if (flags & NDP_PROBE) 2280*8485SPeter.Memishian@Sun.COM sender = &ipv6_all_zeros; 2281*8485SPeter.Memishian@Sun.COM 2282*8485SPeter.Memishian@Sun.COM return (nce_xmit(nce->nce_ill, ND_NEIGHBOR_SOLICIT, use_nd_lla, 2283*8485SPeter.Memishian@Sun.COM sender, &nce->nce_addr, flags)); 2284*8485SPeter.Memishian@Sun.COM } 2285*8485SPeter.Memishian@Sun.COM 2286*8485SPeter.Memishian@Sun.COM /* 22870Sstevel@tonic-gate * nce_xmit is called to form and transmit a ND solicitation or 22880Sstevel@tonic-gate * advertisement ICMP packet. 22892546Scarlsonj * 22902546Scarlsonj * If the source address is unspecified and this isn't a probe (used for 22912546Scarlsonj * duplicate address detection), an appropriate source address and link layer 22922546Scarlsonj * address will be chosen here. The link layer address option is included if 22932546Scarlsonj * the source is specified (i.e., all non-probe packets), and omitted (per the 22942546Scarlsonj * specification) otherwise. 22952546Scarlsonj * 22960Sstevel@tonic-gate * It returns B_FALSE only if it does a successful put() to the 22970Sstevel@tonic-gate * corresponding ill's ill_wq otherwise returns B_TRUE. 22980Sstevel@tonic-gate */ 22990Sstevel@tonic-gate static boolean_t 2300*8485SPeter.Memishian@Sun.COM nce_xmit(ill_t *ill, uint8_t type, boolean_t use_nd_lla, 2301*8485SPeter.Memishian@Sun.COM const in6_addr_t *sender, const in6_addr_t *target, int flag) 23020Sstevel@tonic-gate { 2303*8485SPeter.Memishian@Sun.COM ill_t *hwaddr_ill; 23040Sstevel@tonic-gate uint32_t len; 23050Sstevel@tonic-gate icmp6_t *icmp6; 23060Sstevel@tonic-gate mblk_t *mp; 23070Sstevel@tonic-gate ip6_t *ip6h; 23080Sstevel@tonic-gate nd_opt_hdr_t *opt; 2309*8485SPeter.Memishian@Sun.COM uint_t plen, maxplen; 23100Sstevel@tonic-gate ip6i_t *ip6i; 23110Sstevel@tonic-gate ipif_t *src_ipif = NULL; 23122598Scarlsonj uint8_t *hw_addr; 23133909Sja97890 zoneid_t zoneid = GLOBAL_ZONEID; 2314*8485SPeter.Memishian@Sun.COM char buf[INET6_ADDRSTRLEN]; 2315*8485SPeter.Memishian@Sun.COM 2316*8485SPeter.Memishian@Sun.COM ASSERT(!IS_IPMP(ill)); 23170Sstevel@tonic-gate 23180Sstevel@tonic-gate /* 2319*8485SPeter.Memishian@Sun.COM * Check that the sender is actually a usable address on `ill', and if 2320*8485SPeter.Memishian@Sun.COM * so, track that as the src_ipif. If not, for solicitations, set the 2321*8485SPeter.Memishian@Sun.COM * sender to :: so that a new one will be picked below; for adverts, 2322*8485SPeter.Memishian@Sun.COM * drop the packet since we expect nce_xmit_advert() to always provide 2323*8485SPeter.Memishian@Sun.COM * a valid sender. 23240Sstevel@tonic-gate */ 2325*8485SPeter.Memishian@Sun.COM if (!IN6_IS_ADDR_UNSPECIFIED(sender)) { 2326*8485SPeter.Memishian@Sun.COM if ((src_ipif = ip_ndp_lookup_addr_v6(sender, ill)) == NULL || 2327*8485SPeter.Memishian@Sun.COM !src_ipif->ipif_addr_ready) { 2328*8485SPeter.Memishian@Sun.COM if (src_ipif != NULL) { 2329*8485SPeter.Memishian@Sun.COM ipif_refrele(src_ipif); 2330*8485SPeter.Memishian@Sun.COM src_ipif = NULL; 2331*8485SPeter.Memishian@Sun.COM } 2332*8485SPeter.Memishian@Sun.COM if (type == ND_NEIGHBOR_ADVERT) { 2333*8485SPeter.Memishian@Sun.COM ip1dbg(("nce_xmit: No source ipif for src %s\n", 2334*8485SPeter.Memishian@Sun.COM inet_ntop(AF_INET6, sender, buf, 2335*8485SPeter.Memishian@Sun.COM sizeof (buf)))); 2336*8485SPeter.Memishian@Sun.COM return (B_TRUE); 2337*8485SPeter.Memishian@Sun.COM } 2338*8485SPeter.Memishian@Sun.COM sender = &ipv6_all_zeros; 2339*8485SPeter.Memishian@Sun.COM } 2340*8485SPeter.Memishian@Sun.COM } 2341*8485SPeter.Memishian@Sun.COM 2342*8485SPeter.Memishian@Sun.COM /* 2343*8485SPeter.Memishian@Sun.COM * If we still have an unspecified source (sender) address and this 2344*8485SPeter.Memishian@Sun.COM * isn't a probe, select a source address from `ill'. 2345*8485SPeter.Memishian@Sun.COM */ 23462546Scarlsonj if (IN6_IS_ADDR_UNSPECIFIED(sender) && !(flag & NDP_PROBE)) { 2347*8485SPeter.Memishian@Sun.COM ASSERT(type != ND_NEIGHBOR_ADVERT); 23480Sstevel@tonic-gate /* 2349*8485SPeter.Memishian@Sun.COM * Pick a source address for this solicitation, but restrict 2350*8485SPeter.Memishian@Sun.COM * the selection to addresses assigned to the output 2351*8485SPeter.Memishian@Sun.COM * interface. We do this because the destination will create 2352*8485SPeter.Memishian@Sun.COM * a neighbor cache entry for the source address of this 2353*8485SPeter.Memishian@Sun.COM * packet, so the source address needs to be a valid neighbor. 23540Sstevel@tonic-gate */ 2355*8485SPeter.Memishian@Sun.COM src_ipif = ipif_select_source_v6(ill, target, B_TRUE, 23563909Sja97890 IPV6_PREFER_SRC_DEFAULT, ALL_ZONES); 23570Sstevel@tonic-gate if (src_ipif == NULL) { 23582202Srk129064 ip1dbg(("nce_xmit: No source ipif for dst %s\n", 2359*8485SPeter.Memishian@Sun.COM inet_ntop(AF_INET6, target, buf, sizeof (buf)))); 23600Sstevel@tonic-gate return (B_TRUE); 23610Sstevel@tonic-gate } 23620Sstevel@tonic-gate sender = &src_ipif->ipif_v6src_addr; 23630Sstevel@tonic-gate } 23640Sstevel@tonic-gate 23650Sstevel@tonic-gate /* 2366*8485SPeter.Memishian@Sun.COM * We're either sending a probe or we have a source address. 23670Sstevel@tonic-gate */ 2368*8485SPeter.Memishian@Sun.COM ASSERT((flag & NDP_PROBE) || src_ipif != NULL); 2369*8485SPeter.Memishian@Sun.COM 2370*8485SPeter.Memishian@Sun.COM maxplen = roundup(sizeof (nd_opt_hdr_t) + ND_MAX_HDW_LEN, 8); 23710Sstevel@tonic-gate len = IPV6_HDR_LEN + sizeof (ip6i_t) + sizeof (nd_neighbor_advert_t) + 2372*8485SPeter.Memishian@Sun.COM maxplen; 23730Sstevel@tonic-gate mp = allocb(len, BPRI_LO); 23740Sstevel@tonic-gate if (mp == NULL) { 23750Sstevel@tonic-gate if (src_ipif != NULL) 23760Sstevel@tonic-gate ipif_refrele(src_ipif); 23770Sstevel@tonic-gate return (B_TRUE); 23780Sstevel@tonic-gate } 23790Sstevel@tonic-gate bzero((char *)mp->b_rptr, len); 23800Sstevel@tonic-gate mp->b_wptr = mp->b_rptr + len; 23810Sstevel@tonic-gate 23820Sstevel@tonic-gate ip6i = (ip6i_t *)mp->b_rptr; 23830Sstevel@tonic-gate ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 23840Sstevel@tonic-gate ip6i->ip6i_nxt = IPPROTO_RAW; 2385*8485SPeter.Memishian@Sun.COM ip6i->ip6i_flags = IP6I_HOPLIMIT; 23862546Scarlsonj if (flag & NDP_PROBE) 23872546Scarlsonj ip6i->ip6i_flags |= IP6I_UNSPEC_SRC; 23880Sstevel@tonic-gate 23890Sstevel@tonic-gate ip6h = (ip6_t *)(mp->b_rptr + sizeof (ip6i_t)); 23900Sstevel@tonic-gate ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 23910Sstevel@tonic-gate ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 23920Sstevel@tonic-gate ip6h->ip6_nxt = IPPROTO_ICMPV6; 23930Sstevel@tonic-gate ip6h->ip6_hops = IPV6_MAX_HOPS; 2394*8485SPeter.Memishian@Sun.COM ip6h->ip6_src = *sender; 23950Sstevel@tonic-gate ip6h->ip6_dst = *target; 23960Sstevel@tonic-gate icmp6 = (icmp6_t *)&ip6h[1]; 23970Sstevel@tonic-gate 23980Sstevel@tonic-gate opt = (nd_opt_hdr_t *)((uint8_t *)ip6h + IPV6_HDR_LEN + 23990Sstevel@tonic-gate sizeof (nd_neighbor_advert_t)); 24000Sstevel@tonic-gate 2401*8485SPeter.Memishian@Sun.COM if (type == ND_NEIGHBOR_SOLICIT) { 24020Sstevel@tonic-gate nd_neighbor_solicit_t *ns = (nd_neighbor_solicit_t *)icmp6; 24030Sstevel@tonic-gate 24042546Scarlsonj if (!(flag & NDP_PROBE)) 24052546Scarlsonj opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; 24060Sstevel@tonic-gate ns->nd_ns_target = *target; 24070Sstevel@tonic-gate if (!(flag & NDP_UNICAST)) { 24080Sstevel@tonic-gate /* Form multicast address of the target */ 24090Sstevel@tonic-gate ip6h->ip6_dst = ipv6_solicited_node_mcast; 24100Sstevel@tonic-gate ip6h->ip6_dst.s6_addr32[3] |= 24110Sstevel@tonic-gate ns->nd_ns_target.s6_addr32[3]; 24120Sstevel@tonic-gate } 24130Sstevel@tonic-gate } else { 24140Sstevel@tonic-gate nd_neighbor_advert_t *na = (nd_neighbor_advert_t *)icmp6; 24150Sstevel@tonic-gate 24162546Scarlsonj ASSERT(!(flag & NDP_PROBE)); 24170Sstevel@tonic-gate opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 24180Sstevel@tonic-gate na->nd_na_target = *sender; 24190Sstevel@tonic-gate if (flag & NDP_ISROUTER) 24200Sstevel@tonic-gate na->nd_na_flags_reserved |= ND_NA_FLAG_ROUTER; 24210Sstevel@tonic-gate if (flag & NDP_SOLICITED) 24220Sstevel@tonic-gate na->nd_na_flags_reserved |= ND_NA_FLAG_SOLICITED; 24230Sstevel@tonic-gate if (flag & NDP_ORIDE) 24240Sstevel@tonic-gate na->nd_na_flags_reserved |= ND_NA_FLAG_OVERRIDE; 24250Sstevel@tonic-gate } 24262546Scarlsonj 24272598Scarlsonj hw_addr = NULL; 24282546Scarlsonj if (!(flag & NDP_PROBE)) { 2429*8485SPeter.Memishian@Sun.COM /* 2430*8485SPeter.Memishian@Sun.COM * Use our source address to find the hardware address to put 2431*8485SPeter.Memishian@Sun.COM * in the packet, so that the hardware address and IP address 2432*8485SPeter.Memishian@Sun.COM * will match up -- even if that hardware address doesn't 2433*8485SPeter.Memishian@Sun.COM * match the ill we actually transmit the packet through. 2434*8485SPeter.Memishian@Sun.COM */ 2435*8485SPeter.Memishian@Sun.COM if (IS_IPMP(src_ipif->ipif_ill)) { 2436*8485SPeter.Memishian@Sun.COM hwaddr_ill = ipmp_ipif_hold_bound_ill(src_ipif); 2437*8485SPeter.Memishian@Sun.COM if (hwaddr_ill == NULL) { 2438*8485SPeter.Memishian@Sun.COM ip1dbg(("nce_xmit: no bound ill!\n")); 2439*8485SPeter.Memishian@Sun.COM ipif_refrele(src_ipif); 2440*8485SPeter.Memishian@Sun.COM freemsg(mp); 2441*8485SPeter.Memishian@Sun.COM return (B_TRUE); 2442*8485SPeter.Memishian@Sun.COM } 2443*8485SPeter.Memishian@Sun.COM } else { 2444*8485SPeter.Memishian@Sun.COM hwaddr_ill = src_ipif->ipif_ill; 2445*8485SPeter.Memishian@Sun.COM ill_refhold(hwaddr_ill); /* for symmetry */ 2446*8485SPeter.Memishian@Sun.COM } 2447*8485SPeter.Memishian@Sun.COM 2448*8485SPeter.Memishian@Sun.COM plen = roundup(sizeof (nd_opt_hdr_t) + 2449*8485SPeter.Memishian@Sun.COM hwaddr_ill->ill_nd_lla_len, 8); 2450*8485SPeter.Memishian@Sun.COM 24512598Scarlsonj hw_addr = use_nd_lla ? hwaddr_ill->ill_nd_lla : 24522598Scarlsonj hwaddr_ill->ill_phys_addr; 24532598Scarlsonj if (hw_addr != NULL) { 24542598Scarlsonj /* Fill in link layer address and option len */ 2455*8485SPeter.Memishian@Sun.COM opt->nd_opt_len = (uint8_t)(plen / 8); 24562598Scarlsonj bcopy(hw_addr, &opt[1], hwaddr_ill->ill_nd_lla_len); 24572598Scarlsonj } 2458*8485SPeter.Memishian@Sun.COM 2459*8485SPeter.Memishian@Sun.COM ill_refrele(hwaddr_ill); 24602546Scarlsonj } 2461*8485SPeter.Memishian@Sun.COM 2462*8485SPeter.Memishian@Sun.COM if (hw_addr == NULL) 2463*8485SPeter.Memishian@Sun.COM plen = 0; 2464*8485SPeter.Memishian@Sun.COM 2465*8485SPeter.Memishian@Sun.COM /* Fix up the length of the packet now that plen is known */ 2466*8485SPeter.Memishian@Sun.COM len -= (maxplen - plen); 2467*8485SPeter.Memishian@Sun.COM mp->b_wptr = mp->b_rptr + len; 2468*8485SPeter.Memishian@Sun.COM ip6h->ip6_plen = htons(len - IPV6_HDR_LEN - sizeof (ip6i_t)); 2469*8485SPeter.Memishian@Sun.COM 2470*8485SPeter.Memishian@Sun.COM icmp6->icmp6_type = type; 24710Sstevel@tonic-gate icmp6->icmp6_code = 0; 24720Sstevel@tonic-gate /* 24730Sstevel@tonic-gate * Prepare for checksum by putting icmp length in the icmp 24740Sstevel@tonic-gate * checksum field. The checksum is calculated in ip_wput_v6. 24750Sstevel@tonic-gate */ 24760Sstevel@tonic-gate icmp6->icmp6_cksum = ip6h->ip6_plen; 24770Sstevel@tonic-gate 2478*8485SPeter.Memishian@Sun.COM /* 2479*8485SPeter.Memishian@Sun.COM * Before we toss the src_ipif, look up the zoneid to pass to 2480*8485SPeter.Memishian@Sun.COM * ip_output_v6(). This is to ensure unicast ND_NEIGHBOR_ADVERT 2481*8485SPeter.Memishian@Sun.COM * packets to be routed correctly by IP (we cannot guarantee that the 2482*8485SPeter.Memishian@Sun.COM * global zone has an interface route to the destination). 2483*8485SPeter.Memishian@Sun.COM */ 2484*8485SPeter.Memishian@Sun.COM if (src_ipif != NULL) { 2485*8485SPeter.Memishian@Sun.COM if ((zoneid = src_ipif->ipif_zoneid) == ALL_ZONES) 2486*8485SPeter.Memishian@Sun.COM zoneid = GLOBAL_ZONEID; 24870Sstevel@tonic-gate ipif_refrele(src_ipif); 2488*8485SPeter.Memishian@Sun.COM } 24893909Sja97890 24903909Sja97890 ip_output_v6((void *)(uintptr_t)zoneid, mp, ill->ill_wq, IP_WPUT); 24913909Sja97890 return (B_FALSE); 24920Sstevel@tonic-gate } 24930Sstevel@tonic-gate 24940Sstevel@tonic-gate /* 24950Sstevel@tonic-gate * Make a link layer address (does not include the SAP) from an nce. 24960Sstevel@tonic-gate * To form the link layer address, use the last four bytes of ipv6 24970Sstevel@tonic-gate * address passed in and the fixed offset stored in nce. 24980Sstevel@tonic-gate */ 24990Sstevel@tonic-gate static void 25000Sstevel@tonic-gate nce_make_mapping(nce_t *nce, uchar_t *addrpos, uchar_t *addr) 25010Sstevel@tonic-gate { 25020Sstevel@tonic-gate uchar_t *mask, *to; 25030Sstevel@tonic-gate ill_t *ill = nce->nce_ill; 25040Sstevel@tonic-gate int len; 25050Sstevel@tonic-gate 25060Sstevel@tonic-gate if (ill->ill_net_type == IRE_IF_NORESOLVER) 25070Sstevel@tonic-gate return; 25080Sstevel@tonic-gate ASSERT(nce->nce_res_mp != NULL); 25090Sstevel@tonic-gate ASSERT(ill->ill_net_type == IRE_IF_RESOLVER); 25100Sstevel@tonic-gate ASSERT(nce->nce_flags & NCE_F_MAPPING); 25110Sstevel@tonic-gate ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&nce->nce_extract_mask)); 25120Sstevel@tonic-gate ASSERT(addr != NULL); 25130Sstevel@tonic-gate bcopy(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill), 25140Sstevel@tonic-gate addrpos, ill->ill_nd_lla_len); 25150Sstevel@tonic-gate len = MIN((int)ill->ill_nd_lla_len - nce->nce_ll_extract_start, 25160Sstevel@tonic-gate IPV6_ADDR_LEN); 25170Sstevel@tonic-gate mask = (uchar_t *)&nce->nce_extract_mask; 25180Sstevel@tonic-gate mask += (IPV6_ADDR_LEN - len); 25190Sstevel@tonic-gate addr += (IPV6_ADDR_LEN - len); 25200Sstevel@tonic-gate to = addrpos + nce->nce_ll_extract_start; 25210Sstevel@tonic-gate while (len-- > 0) 25220Sstevel@tonic-gate *to++ |= *mask++ & *addr++; 25230Sstevel@tonic-gate } 25240Sstevel@tonic-gate 25250Sstevel@tonic-gate mblk_t * 25260Sstevel@tonic-gate nce_udreq_alloc(ill_t *ill) 25270Sstevel@tonic-gate { 25280Sstevel@tonic-gate mblk_t *template_mp = NULL; 25290Sstevel@tonic-gate dl_unitdata_req_t *dlur; 25300Sstevel@tonic-gate int sap_length; 25310Sstevel@tonic-gate 25322535Ssangeeta ASSERT(ill->ill_isv6); 25332535Ssangeeta 25340Sstevel@tonic-gate sap_length = ill->ill_sap_length; 25350Sstevel@tonic-gate template_mp = ip_dlpi_alloc(sizeof (dl_unitdata_req_t) + 25360Sstevel@tonic-gate ill->ill_nd_lla_len + ABS(sap_length), DL_UNITDATA_REQ); 25370Sstevel@tonic-gate if (template_mp == NULL) 25380Sstevel@tonic-gate return (NULL); 25390Sstevel@tonic-gate 25400Sstevel@tonic-gate dlur = (dl_unitdata_req_t *)template_mp->b_rptr; 25410Sstevel@tonic-gate dlur->dl_priority.dl_min = 0; 25420Sstevel@tonic-gate dlur->dl_priority.dl_max = 0; 25430Sstevel@tonic-gate dlur->dl_dest_addr_length = ABS(sap_length) + ill->ill_nd_lla_len; 25440Sstevel@tonic-gate dlur->dl_dest_addr_offset = sizeof (dl_unitdata_req_t); 25450Sstevel@tonic-gate 25460Sstevel@tonic-gate /* Copy in the SAP value. */ 25470Sstevel@tonic-gate NCE_LL_SAP_COPY(ill, template_mp); 25480Sstevel@tonic-gate 25490Sstevel@tonic-gate return (template_mp); 25500Sstevel@tonic-gate } 25510Sstevel@tonic-gate 25520Sstevel@tonic-gate /* 25530Sstevel@tonic-gate * NDP retransmit timer. 25540Sstevel@tonic-gate * This timer goes off when: 25550Sstevel@tonic-gate * a. It is time to retransmit NS for resolver. 25560Sstevel@tonic-gate * b. It is time to send reachability probes. 25570Sstevel@tonic-gate */ 25580Sstevel@tonic-gate void 25590Sstevel@tonic-gate ndp_timer(void *arg) 25600Sstevel@tonic-gate { 25610Sstevel@tonic-gate nce_t *nce = arg; 25620Sstevel@tonic-gate ill_t *ill = nce->nce_ill; 25630Sstevel@tonic-gate uint32_t ms; 25640Sstevel@tonic-gate char addrbuf[INET6_ADDRSTRLEN]; 25650Sstevel@tonic-gate boolean_t dropped = B_FALSE; 25663448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 25670Sstevel@tonic-gate 25680Sstevel@tonic-gate /* 25690Sstevel@tonic-gate * The timer has to be cancelled by ndp_delete before doing the final 25700Sstevel@tonic-gate * refrele. So the NCE is guaranteed to exist when the timer runs 25710Sstevel@tonic-gate * until it clears the timeout_id. Before clearing the timeout_id 25720Sstevel@tonic-gate * bump up the refcnt so that we can continue to use the nce 25730Sstevel@tonic-gate */ 25740Sstevel@tonic-gate ASSERT(nce != NULL); 25750Sstevel@tonic-gate 25760Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 25770Sstevel@tonic-gate NCE_REFHOLD_LOCKED(nce); 25780Sstevel@tonic-gate nce->nce_timeout_id = 0; 25790Sstevel@tonic-gate 25800Sstevel@tonic-gate /* 25810Sstevel@tonic-gate * Check the reachability state first. 25820Sstevel@tonic-gate */ 25830Sstevel@tonic-gate switch (nce->nce_state) { 25840Sstevel@tonic-gate case ND_DELAY: 25850Sstevel@tonic-gate nce->nce_state = ND_PROBE; 25860Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 2587*8485SPeter.Memishian@Sun.COM (void) nce_xmit_solicit(nce, B_FALSE, &ipv6_all_zeros, 2588*8485SPeter.Memishian@Sun.COM NDP_UNICAST); 25890Sstevel@tonic-gate if (ip_debug > 3) { 25900Sstevel@tonic-gate /* ip2dbg */ 25910Sstevel@tonic-gate pr_addr_dbg("ndp_timer: state for %s changed " 25920Sstevel@tonic-gate "to PROBE\n", AF_INET6, &nce->nce_addr); 25930Sstevel@tonic-gate } 25940Sstevel@tonic-gate NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 25950Sstevel@tonic-gate NCE_REFRELE(nce); 25960Sstevel@tonic-gate return; 25970Sstevel@tonic-gate case ND_PROBE: 25980Sstevel@tonic-gate /* must be retransmit timer */ 25990Sstevel@tonic-gate nce->nce_pcnt--; 26000Sstevel@tonic-gate ASSERT(nce->nce_pcnt < ND_MAX_UNICAST_SOLICIT && 26010Sstevel@tonic-gate nce->nce_pcnt >= -1); 26022546Scarlsonj if (nce->nce_pcnt > 0) { 26030Sstevel@tonic-gate /* 26040Sstevel@tonic-gate * As per RFC2461, the nce gets deleted after 26050Sstevel@tonic-gate * MAX_UNICAST_SOLICIT unsuccessful re-transmissions. 26060Sstevel@tonic-gate * Note that the first unicast solicitation is sent 26070Sstevel@tonic-gate * during the DELAY state. 26080Sstevel@tonic-gate */ 26092546Scarlsonj ip2dbg(("ndp_timer: pcount=%x dst %s\n", 26102546Scarlsonj nce->nce_pcnt, inet_ntop(AF_INET6, &nce->nce_addr, 26112546Scarlsonj addrbuf, sizeof (addrbuf)))); 26122546Scarlsonj mutex_exit(&nce->nce_lock); 2613*8485SPeter.Memishian@Sun.COM dropped = nce_xmit_solicit(nce, B_FALSE, 2614*8485SPeter.Memishian@Sun.COM &ipv6_all_zeros, 26152546Scarlsonj (nce->nce_flags & NCE_F_PERMANENT) ? NDP_PROBE : 26162546Scarlsonj NDP_UNICAST); 26172546Scarlsonj if (dropped) { 26182546Scarlsonj mutex_enter(&nce->nce_lock); 26192546Scarlsonj nce->nce_pcnt++; 26200Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 26212546Scarlsonj } 26222546Scarlsonj NDP_RESTART_TIMER(nce, ILL_PROBE_INTERVAL(ill)); 26232546Scarlsonj } else if (nce->nce_pcnt < 0) { 26242546Scarlsonj /* No hope, delete the nce */ 26252546Scarlsonj nce->nce_state = ND_UNREACHABLE; 26262546Scarlsonj mutex_exit(&nce->nce_lock); 26272546Scarlsonj if (ip_debug > 2) { 26282546Scarlsonj /* ip1dbg */ 26292546Scarlsonj pr_addr_dbg("ndp_timer: Delete IRE for" 26302546Scarlsonj " dst %s\n", AF_INET6, &nce->nce_addr); 26312546Scarlsonj } 26322546Scarlsonj ndp_delete(nce); 26332546Scarlsonj } else if (!(nce->nce_flags & NCE_F_PERMANENT)) { 26342546Scarlsonj /* Wait RetransTimer, before deleting the entry */ 26352546Scarlsonj ip2dbg(("ndp_timer: pcount=%x dst %s\n", 26362546Scarlsonj nce->nce_pcnt, inet_ntop(AF_INET6, 26372546Scarlsonj &nce->nce_addr, addrbuf, sizeof (addrbuf)))); 26382546Scarlsonj mutex_exit(&nce->nce_lock); 26392546Scarlsonj /* Wait one interval before killing */ 26402546Scarlsonj NDP_RESTART_TIMER(nce, ill->ill_reachable_retrans_time); 26412546Scarlsonj } else if (ill->ill_phyint->phyint_flags & PHYI_RUNNING) { 26422546Scarlsonj ipif_t *ipif; 26432546Scarlsonj 26442546Scarlsonj /* 26452546Scarlsonj * We're done probing, and we can now declare this 26462546Scarlsonj * address to be usable. Let IP know that it's ok to 26472546Scarlsonj * use. 26482546Scarlsonj */ 26492546Scarlsonj nce->nce_state = ND_REACHABLE; 26502546Scarlsonj mutex_exit(&nce->nce_lock); 2651*8485SPeter.Memishian@Sun.COM ipif = ip_ndp_lookup_addr_v6(&nce->nce_addr, 2652*8485SPeter.Memishian@Sun.COM nce->nce_ill); 26532546Scarlsonj if (ipif != NULL) { 26542546Scarlsonj if (ipif->ipif_was_dup) { 26552546Scarlsonj char ibuf[LIFNAMSIZ + 10]; 26562546Scarlsonj char sbuf[INET6_ADDRSTRLEN]; 26572546Scarlsonj 26582546Scarlsonj ipif->ipif_was_dup = B_FALSE; 26592546Scarlsonj (void) inet_ntop(AF_INET6, 26602546Scarlsonj &ipif->ipif_v6lcl_addr, 26612546Scarlsonj sbuf, sizeof (sbuf)); 26624972Smeem ipif_get_name(ipif, ibuf, 26634972Smeem sizeof (ibuf)); 26642546Scarlsonj cmn_err(CE_NOTE, "recovered address " 26652546Scarlsonj "%s on %s", sbuf, ibuf); 26660Sstevel@tonic-gate } 26672546Scarlsonj if ((ipif->ipif_flags & IPIF_UP) && 26688023SPhil.Kirk@Sun.COM !ipif->ipif_addr_ready) 26698023SPhil.Kirk@Sun.COM ipif_up_notify(ipif); 26702546Scarlsonj ipif->ipif_addr_ready = 1; 26712546Scarlsonj ipif_refrele(ipif); 26722546Scarlsonj } 26732546Scarlsonj /* Begin defending our new address */ 26742546Scarlsonj nce->nce_unsolicit_count = 0; 2675*8485SPeter.Memishian@Sun.COM dropped = nce_xmit_advert(nce, B_FALSE, 2676*8485SPeter.Memishian@Sun.COM &ipv6_all_hosts_mcast, 0); 26772546Scarlsonj if (dropped) { 26782546Scarlsonj nce->nce_unsolicit_count = 1; 26790Sstevel@tonic-gate NDP_RESTART_TIMER(nce, 26803448Sdh155122 ipst->ips_ip_ndp_unsolicit_interval); 26813448Sdh155122 } else if (ipst->ips_ip_ndp_defense_interval != 0) { 26823448Sdh155122 NDP_RESTART_TIMER(nce, 26833448Sdh155122 ipst->ips_ip_ndp_defense_interval); 26840Sstevel@tonic-gate } 26852546Scarlsonj } else { 26862546Scarlsonj /* 26872546Scarlsonj * This is an address we're probing to be our own, but 26882546Scarlsonj * the ill is down. Wait until it comes back before 26892546Scarlsonj * doing anything, but switch to reachable state so 26902546Scarlsonj * that the restart will work. 26912546Scarlsonj */ 26922546Scarlsonj nce->nce_state = ND_REACHABLE; 26932546Scarlsonj mutex_exit(&nce->nce_lock); 26940Sstevel@tonic-gate } 26950Sstevel@tonic-gate NCE_REFRELE(nce); 26960Sstevel@tonic-gate return; 2697*8485SPeter.Memishian@Sun.COM case ND_INCOMPLETE: { 2698*8485SPeter.Memishian@Sun.COM ip6_t *ip6h; 2699*8485SPeter.Memishian@Sun.COM ip6i_t *ip6i; 2700*8485SPeter.Memishian@Sun.COM mblk_t *mp, *datamp, *nextmp, **prevmpp; 2701*8485SPeter.Memishian@Sun.COM 27020Sstevel@tonic-gate /* 2703*8485SPeter.Memishian@Sun.COM * Per case (2) in the nce_queue_mp() comments, scan nce_qd_mp 2704*8485SPeter.Memishian@Sun.COM * for any IPMP probe packets, and toss 'em. IPMP probe 2705*8485SPeter.Memishian@Sun.COM * packets will always be at the head of nce_qd_mp and always 2706*8485SPeter.Memishian@Sun.COM * have an ip6i_t header, so we can stop at the first queued 2707*8485SPeter.Memishian@Sun.COM * ND packet without an ip6i_t. 27080Sstevel@tonic-gate */ 2709*8485SPeter.Memishian@Sun.COM prevmpp = &nce->nce_qd_mp; 2710*8485SPeter.Memishian@Sun.COM for (mp = nce->nce_qd_mp; mp != NULL; mp = nextmp) { 2711*8485SPeter.Memishian@Sun.COM nextmp = mp->b_next; 2712*8485SPeter.Memishian@Sun.COM datamp = (DB_TYPE(mp) == M_CTL) ? mp->b_cont : mp; 2713*8485SPeter.Memishian@Sun.COM ip6h = (ip6_t *)datamp->b_rptr; 27140Sstevel@tonic-gate if (ip6h->ip6_nxt != IPPROTO_RAW) 27150Sstevel@tonic-gate break; 27160Sstevel@tonic-gate 27170Sstevel@tonic-gate ip6i = (ip6i_t *)ip6h; 2718*8485SPeter.Memishian@Sun.COM if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) { 2719*8485SPeter.Memishian@Sun.COM inet_freemsg(mp); 2720*8485SPeter.Memishian@Sun.COM *prevmpp = nextmp; 2721*8485SPeter.Memishian@Sun.COM } else { 2722*8485SPeter.Memishian@Sun.COM prevmpp = &mp->b_next; 2723*8485SPeter.Memishian@Sun.COM } 27240Sstevel@tonic-gate } 2725*8485SPeter.Memishian@Sun.COM 2726*8485SPeter.Memishian@Sun.COM /* 2727*8485SPeter.Memishian@Sun.COM * Must be resolver's retransmit timer. 2728*8485SPeter.Memishian@Sun.COM */ 27290Sstevel@tonic-gate if (nce->nce_qd_mp != NULL) { 2730*8485SPeter.Memishian@Sun.COM if ((ms = nce_solicit(nce, NULL)) == 0) { 27310Sstevel@tonic-gate if (nce->nce_state != ND_REACHABLE) { 27320Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 27330Sstevel@tonic-gate nce_resolv_failed(nce); 27340Sstevel@tonic-gate ndp_delete(nce); 27350Sstevel@tonic-gate } else { 27360Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 27370Sstevel@tonic-gate } 27380Sstevel@tonic-gate } else { 27390Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 27400Sstevel@tonic-gate NDP_RESTART_TIMER(nce, (clock_t)ms); 27410Sstevel@tonic-gate } 27420Sstevel@tonic-gate NCE_REFRELE(nce); 27430Sstevel@tonic-gate return; 27440Sstevel@tonic-gate } 27450Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 27460Sstevel@tonic-gate NCE_REFRELE(nce); 27470Sstevel@tonic-gate break; 2748*8485SPeter.Memishian@Sun.COM } 2749*8485SPeter.Memishian@Sun.COM case ND_REACHABLE: 27502546Scarlsonj if (((nce->nce_flags & NCE_F_UNSOL_ADV) && 27512546Scarlsonj nce->nce_unsolicit_count != 0) || 27522546Scarlsonj ((nce->nce_flags & NCE_F_PERMANENT) && 27533448Sdh155122 ipst->ips_ip_ndp_defense_interval != 0)) { 27542546Scarlsonj if (nce->nce_unsolicit_count > 0) 27552546Scarlsonj nce->nce_unsolicit_count--; 27560Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 2757*8485SPeter.Memishian@Sun.COM dropped = nce_xmit_advert(nce, B_FALSE, 2758*8485SPeter.Memishian@Sun.COM &ipv6_all_hosts_mcast, 0); 27590Sstevel@tonic-gate if (dropped) { 27600Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 27610Sstevel@tonic-gate nce->nce_unsolicit_count++; 27620Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 27630Sstevel@tonic-gate } 27640Sstevel@tonic-gate if (nce->nce_unsolicit_count != 0) { 27650Sstevel@tonic-gate NDP_RESTART_TIMER(nce, 27663448Sdh155122 ipst->ips_ip_ndp_unsolicit_interval); 27672546Scarlsonj } else { 27682546Scarlsonj NDP_RESTART_TIMER(nce, 27693448Sdh155122 ipst->ips_ip_ndp_defense_interval); 27700Sstevel@tonic-gate } 27710Sstevel@tonic-gate } else { 27720Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 27730Sstevel@tonic-gate } 27740Sstevel@tonic-gate NCE_REFRELE(nce); 27750Sstevel@tonic-gate break; 27760Sstevel@tonic-gate default: 27770Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 27780Sstevel@tonic-gate NCE_REFRELE(nce); 27790Sstevel@tonic-gate break; 27800Sstevel@tonic-gate } 27810Sstevel@tonic-gate } 27820Sstevel@tonic-gate 27830Sstevel@tonic-gate /* 27840Sstevel@tonic-gate * Set a link layer address from the ll_addr passed in. 27850Sstevel@tonic-gate * Copy SAP from ill. 27860Sstevel@tonic-gate */ 27870Sstevel@tonic-gate static void 27880Sstevel@tonic-gate nce_set_ll(nce_t *nce, uchar_t *ll_addr) 27890Sstevel@tonic-gate { 27900Sstevel@tonic-gate ill_t *ill = nce->nce_ill; 27910Sstevel@tonic-gate uchar_t *woffset; 27920Sstevel@tonic-gate 27930Sstevel@tonic-gate ASSERT(ll_addr != NULL); 27940Sstevel@tonic-gate /* Always called before fast_path_probe */ 2795741Smasputra ASSERT(nce->nce_fp_mp == NULL); 27960Sstevel@tonic-gate if (ill->ill_sap_length != 0) { 27970Sstevel@tonic-gate /* 27980Sstevel@tonic-gate * Copy the SAP type specified in the 27990Sstevel@tonic-gate * request into the xmit template. 28000Sstevel@tonic-gate */ 28010Sstevel@tonic-gate NCE_LL_SAP_COPY(ill, nce->nce_res_mp); 28020Sstevel@tonic-gate } 28030Sstevel@tonic-gate if (ill->ill_phys_addr_length > 0) { 28040Sstevel@tonic-gate /* 28050Sstevel@tonic-gate * The bcopy() below used to be called for the physical address 28060Sstevel@tonic-gate * length rather than the link layer address length. For 28070Sstevel@tonic-gate * ethernet and many other media, the phys_addr and lla are 28080Sstevel@tonic-gate * identical. 28090Sstevel@tonic-gate * However, with xresolv interfaces being introduced, the 28100Sstevel@tonic-gate * phys_addr and lla are no longer the same, and the physical 28110Sstevel@tonic-gate * address may not have any useful meaning, so we use the lla 28120Sstevel@tonic-gate * for IPv6 address resolution and destination addressing. 28130Sstevel@tonic-gate * 28140Sstevel@tonic-gate * For PPP or other interfaces with a zero length 28150Sstevel@tonic-gate * physical address, don't do anything here. 28160Sstevel@tonic-gate * The bcopy() with a zero phys_addr length was previously 28170Sstevel@tonic-gate * a no-op for interfaces with a zero-length physical address. 28180Sstevel@tonic-gate * Using the lla for them would change the way they operate. 28190Sstevel@tonic-gate * Doing nothing in such cases preserves expected behavior. 28200Sstevel@tonic-gate */ 28210Sstevel@tonic-gate woffset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 28220Sstevel@tonic-gate bcopy(ll_addr, woffset, ill->ill_nd_lla_len); 28230Sstevel@tonic-gate } 28240Sstevel@tonic-gate } 28250Sstevel@tonic-gate 28260Sstevel@tonic-gate static boolean_t 28272546Scarlsonj nce_cmp_ll_addr(const nce_t *nce, const uchar_t *ll_addr, uint32_t ll_addr_len) 28280Sstevel@tonic-gate { 28290Sstevel@tonic-gate ill_t *ill = nce->nce_ill; 28300Sstevel@tonic-gate uchar_t *ll_offset; 28310Sstevel@tonic-gate 28320Sstevel@tonic-gate ASSERT(nce->nce_res_mp != NULL); 28330Sstevel@tonic-gate if (ll_addr == NULL) 28340Sstevel@tonic-gate return (B_FALSE); 28350Sstevel@tonic-gate ll_offset = nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 28362546Scarlsonj if (bcmp(ll_addr, ll_offset, ll_addr_len) != 0) 28370Sstevel@tonic-gate return (B_TRUE); 28380Sstevel@tonic-gate return (B_FALSE); 28390Sstevel@tonic-gate } 28400Sstevel@tonic-gate 28410Sstevel@tonic-gate /* 28420Sstevel@tonic-gate * Updates the link layer address or the reachability state of 28430Sstevel@tonic-gate * a cache entry. Reset probe counter if needed. 28440Sstevel@tonic-gate */ 28450Sstevel@tonic-gate static void 28460Sstevel@tonic-gate nce_update(nce_t *nce, uint16_t new_state, uchar_t *new_ll_addr) 28470Sstevel@tonic-gate { 28480Sstevel@tonic-gate ill_t *ill = nce->nce_ill; 28490Sstevel@tonic-gate boolean_t need_stop_timer = B_FALSE; 28500Sstevel@tonic-gate boolean_t need_fastpath_update = B_FALSE; 28510Sstevel@tonic-gate 28520Sstevel@tonic-gate ASSERT(MUTEX_HELD(&nce->nce_lock)); 28532535Ssangeeta ASSERT(nce->nce_ipversion == IPV6_VERSION); 28540Sstevel@tonic-gate /* 28550Sstevel@tonic-gate * If this interface does not do NUD, there is no point 28560Sstevel@tonic-gate * in allowing an update to the cache entry. Although 28570Sstevel@tonic-gate * we will respond to NS. 28580Sstevel@tonic-gate * The only time we accept an update for a resolver when 28590Sstevel@tonic-gate * NUD is turned off is when it has just been created. 28600Sstevel@tonic-gate * Non-Resolvers will always be created as REACHABLE. 28610Sstevel@tonic-gate */ 28620Sstevel@tonic-gate if (new_state != ND_UNCHANGED) { 28630Sstevel@tonic-gate if ((nce->nce_flags & NCE_F_NONUD) && 28640Sstevel@tonic-gate (nce->nce_state != ND_INCOMPLETE)) 28650Sstevel@tonic-gate return; 28660Sstevel@tonic-gate ASSERT((int16_t)new_state >= ND_STATE_VALID_MIN); 28670Sstevel@tonic-gate ASSERT((int16_t)new_state <= ND_STATE_VALID_MAX); 28680Sstevel@tonic-gate need_stop_timer = B_TRUE; 28690Sstevel@tonic-gate if (new_state == ND_REACHABLE) 28700Sstevel@tonic-gate nce->nce_last = TICK_TO_MSEC(lbolt64); 28710Sstevel@tonic-gate else { 28720Sstevel@tonic-gate /* We force NUD in this case */ 28730Sstevel@tonic-gate nce->nce_last = 0; 28740Sstevel@tonic-gate } 28750Sstevel@tonic-gate nce->nce_state = new_state; 28760Sstevel@tonic-gate nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 28770Sstevel@tonic-gate } 28780Sstevel@tonic-gate /* 28790Sstevel@tonic-gate * In case of fast path we need to free the the fastpath 28800Sstevel@tonic-gate * M_DATA and do another probe. Otherwise we can just 28810Sstevel@tonic-gate * overwrite the DL_UNITDATA_REQ data, noting we'll lose 28820Sstevel@tonic-gate * whatever packets that happens to be transmitting at the time. 28830Sstevel@tonic-gate */ 28840Sstevel@tonic-gate if (new_ll_addr != NULL) { 28850Sstevel@tonic-gate ASSERT(nce->nce_res_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill) + 28860Sstevel@tonic-gate ill->ill_nd_lla_len <= nce->nce_res_mp->b_wptr); 28870Sstevel@tonic-gate bcopy(new_ll_addr, nce->nce_res_mp->b_rptr + 28880Sstevel@tonic-gate NCE_LL_ADDR_OFFSET(ill), ill->ill_nd_lla_len); 28890Sstevel@tonic-gate if (nce->nce_fp_mp != NULL) { 28900Sstevel@tonic-gate freemsg(nce->nce_fp_mp); 28910Sstevel@tonic-gate nce->nce_fp_mp = NULL; 28920Sstevel@tonic-gate } 2893741Smasputra need_fastpath_update = B_TRUE; 28940Sstevel@tonic-gate } 28950Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 28960Sstevel@tonic-gate if (need_stop_timer) { 28970Sstevel@tonic-gate (void) untimeout(nce->nce_timeout_id); 28980Sstevel@tonic-gate nce->nce_timeout_id = 0; 28990Sstevel@tonic-gate } 29000Sstevel@tonic-gate if (need_fastpath_update) 29010Sstevel@tonic-gate nce_fastpath(nce); 29020Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 29030Sstevel@tonic-gate } 29040Sstevel@tonic-gate 29052535Ssangeeta void 29062535Ssangeeta nce_queue_mp_common(nce_t *nce, mblk_t *mp, boolean_t head_insert) 29072535Ssangeeta { 29082535Ssangeeta uint_t count = 0; 2909*8485SPeter.Memishian@Sun.COM mblk_t **mpp, *tmp; 29102535Ssangeeta 29112535Ssangeeta ASSERT(MUTEX_HELD(&nce->nce_lock)); 29122535Ssangeeta 2913*8485SPeter.Memishian@Sun.COM for (mpp = &nce->nce_qd_mp; *mpp != NULL; mpp = &(*mpp)->b_next) { 2914*8485SPeter.Memishian@Sun.COM if (++count > nce->nce_ill->ill_max_buf) { 2915*8485SPeter.Memishian@Sun.COM tmp = nce->nce_qd_mp->b_next; 29162535Ssangeeta nce->nce_qd_mp->b_next = NULL; 29172535Ssangeeta nce->nce_qd_mp->b_prev = NULL; 29182535Ssangeeta freemsg(nce->nce_qd_mp); 29192535Ssangeeta nce->nce_qd_mp = tmp; 29202535Ssangeeta } 29212535Ssangeeta } 2922*8485SPeter.Memishian@Sun.COM 29232535Ssangeeta if (head_insert) { 29242535Ssangeeta mp->b_next = nce->nce_qd_mp; 29252535Ssangeeta nce->nce_qd_mp = mp; 29262535Ssangeeta } else { 29272535Ssangeeta *mpp = mp; 29282535Ssangeeta } 29292535Ssangeeta } 29302535Ssangeeta 29310Sstevel@tonic-gate static void 29320Sstevel@tonic-gate nce_queue_mp(nce_t *nce, mblk_t *mp) 29330Sstevel@tonic-gate { 29340Sstevel@tonic-gate boolean_t head_insert = B_FALSE; 29350Sstevel@tonic-gate ip6_t *ip6h; 2936*8485SPeter.Memishian@Sun.COM ip6i_t *ip6i; 2937*8485SPeter.Memishian@Sun.COM mblk_t *data_mp; 29380Sstevel@tonic-gate 29390Sstevel@tonic-gate ASSERT(MUTEX_HELD(&nce->nce_lock)); 29400Sstevel@tonic-gate 29410Sstevel@tonic-gate if (mp->b_datap->db_type == M_CTL) 29420Sstevel@tonic-gate data_mp = mp->b_cont; 29430Sstevel@tonic-gate else 29440Sstevel@tonic-gate data_mp = mp; 29450Sstevel@tonic-gate ip6h = (ip6_t *)data_mp->b_rptr; 29460Sstevel@tonic-gate if (ip6h->ip6_nxt == IPPROTO_RAW) { 29470Sstevel@tonic-gate /* 29480Sstevel@tonic-gate * This message should have been pulled up already in 29490Sstevel@tonic-gate * ip_wput_v6. We can't do pullups here because the message 29500Sstevel@tonic-gate * could be from the nce_qd_mp which could have b_next/b_prev 29510Sstevel@tonic-gate * non-NULL. 29520Sstevel@tonic-gate */ 29530Sstevel@tonic-gate ip6i = (ip6i_t *)ip6h; 2954*8485SPeter.Memishian@Sun.COM ASSERT(MBLKL(data_mp) >= sizeof (ip6i_t) + IPV6_HDR_LEN); 2955*8485SPeter.Memishian@Sun.COM 29560Sstevel@tonic-gate /* 2957*8485SPeter.Memishian@Sun.COM * If this packet is marked IP6I_IPMP_PROBE, then we need to: 2958*8485SPeter.Memishian@Sun.COM * 2959*8485SPeter.Memishian@Sun.COM * 1. Insert it at the head of the nce_qd_mp list. Consider 2960*8485SPeter.Memishian@Sun.COM * the normal (non-probe) load-speading case where the 2961*8485SPeter.Memishian@Sun.COM * source address of the ND packet is not tied to nce_ill. 2962*8485SPeter.Memishian@Sun.COM * If the ill bound to the source address cannot receive, 2963*8485SPeter.Memishian@Sun.COM * the response to the ND packet will not be received. 2964*8485SPeter.Memishian@Sun.COM * However, if ND packets for nce_ill's probes are queued 2965*8485SPeter.Memishian@Sun.COM * behind that ND packet, those probes will also fail to 2966*8485SPeter.Memishian@Sun.COM * be sent, and thus in.mpathd will erroneously conclude 2967*8485SPeter.Memishian@Sun.COM * that nce_ill has also failed. 2968*8485SPeter.Memishian@Sun.COM * 2969*8485SPeter.Memishian@Sun.COM * 2. Drop the probe packet in ndp_timer() if the ND did 2970*8485SPeter.Memishian@Sun.COM * not succeed on the first attempt. This ensures that 2971*8485SPeter.Memishian@Sun.COM * ND problems do not manifest as probe RTT spikes. 29720Sstevel@tonic-gate */ 2973*8485SPeter.Memishian@Sun.COM if (ip6i->ip6i_flags & IP6I_IPMP_PROBE) 29740Sstevel@tonic-gate head_insert = B_TRUE; 29750Sstevel@tonic-gate } 29762535Ssangeeta nce_queue_mp_common(nce, mp, head_insert); 29770Sstevel@tonic-gate } 29780Sstevel@tonic-gate 29790Sstevel@tonic-gate /* 29800Sstevel@tonic-gate * Called when address resolution failed due to a timeout. 29810Sstevel@tonic-gate * Send an ICMP unreachable in response to all queued packets. 29820Sstevel@tonic-gate */ 29830Sstevel@tonic-gate void 29840Sstevel@tonic-gate nce_resolv_failed(nce_t *nce) 29850Sstevel@tonic-gate { 29860Sstevel@tonic-gate mblk_t *mp, *nxt_mp, *first_mp; 29870Sstevel@tonic-gate char buf[INET6_ADDRSTRLEN]; 29880Sstevel@tonic-gate ip6_t *ip6h; 29890Sstevel@tonic-gate zoneid_t zoneid = GLOBAL_ZONEID; 29903448Sdh155122 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 29910Sstevel@tonic-gate 29920Sstevel@tonic-gate ip1dbg(("nce_resolv_failed: dst %s\n", 29930Sstevel@tonic-gate inet_ntop(AF_INET6, (char *)&nce->nce_addr, buf, sizeof (buf)))); 29940Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 29950Sstevel@tonic-gate mp = nce->nce_qd_mp; 29960Sstevel@tonic-gate nce->nce_qd_mp = NULL; 29970Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 29980Sstevel@tonic-gate while (mp != NULL) { 29990Sstevel@tonic-gate nxt_mp = mp->b_next; 30000Sstevel@tonic-gate mp->b_next = NULL; 30010Sstevel@tonic-gate mp->b_prev = NULL; 30020Sstevel@tonic-gate 30030Sstevel@tonic-gate first_mp = mp; 30040Sstevel@tonic-gate if (mp->b_datap->db_type == M_CTL) { 30050Sstevel@tonic-gate ipsec_out_t *io = (ipsec_out_t *)mp->b_rptr; 30060Sstevel@tonic-gate ASSERT(io->ipsec_out_type == IPSEC_OUT); 30070Sstevel@tonic-gate zoneid = io->ipsec_out_zoneid; 30080Sstevel@tonic-gate ASSERT(zoneid != ALL_ZONES); 30090Sstevel@tonic-gate mp = mp->b_cont; 30106851Skp158701 mp->b_next = NULL; 30116851Skp158701 mp->b_prev = NULL; 30120Sstevel@tonic-gate } 30130Sstevel@tonic-gate 30140Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 30150Sstevel@tonic-gate if (ip6h->ip6_nxt == IPPROTO_RAW) { 30160Sstevel@tonic-gate ip6i_t *ip6i; 30170Sstevel@tonic-gate /* 30180Sstevel@tonic-gate * This message should have been pulled up already 30190Sstevel@tonic-gate * in ip_wput_v6. ip_hdr_complete_v6 assumes that 30200Sstevel@tonic-gate * the header is pulled up. 30210Sstevel@tonic-gate */ 30220Sstevel@tonic-gate ip6i = (ip6i_t *)ip6h; 30230Sstevel@tonic-gate ASSERT((mp->b_wptr - (uchar_t *)ip6i) >= 30240Sstevel@tonic-gate sizeof (ip6i_t) + IPV6_HDR_LEN); 30250Sstevel@tonic-gate mp->b_rptr += sizeof (ip6i_t); 30260Sstevel@tonic-gate } 30270Sstevel@tonic-gate /* 30280Sstevel@tonic-gate * Ignore failure since icmp_unreachable_v6 will silently 30290Sstevel@tonic-gate * drop packets with an unspecified source address. 30300Sstevel@tonic-gate */ 30313448Sdh155122 (void) ip_hdr_complete_v6((ip6_t *)mp->b_rptr, zoneid, ipst); 30320Sstevel@tonic-gate icmp_unreachable_v6(nce->nce_ill->ill_wq, first_mp, 30333448Sdh155122 ICMP6_DST_UNREACH_ADDR, B_FALSE, B_FALSE, zoneid, ipst); 30340Sstevel@tonic-gate mp = nxt_mp; 30350Sstevel@tonic-gate } 30360Sstevel@tonic-gate } 30370Sstevel@tonic-gate 30380Sstevel@tonic-gate /* 30390Sstevel@tonic-gate * Called by SIOCSNDP* ioctl to add/change an nce entry 30400Sstevel@tonic-gate * and the corresponding attributes. 30410Sstevel@tonic-gate * Disallow states other than ND_REACHABLE or ND_STALE. 30420Sstevel@tonic-gate */ 30430Sstevel@tonic-gate int 30440Sstevel@tonic-gate ndp_sioc_update(ill_t *ill, lif_nd_req_t *lnr) 30450Sstevel@tonic-gate { 30460Sstevel@tonic-gate sin6_t *sin6; 30470Sstevel@tonic-gate in6_addr_t *addr; 30480Sstevel@tonic-gate nce_t *nce; 30490Sstevel@tonic-gate int err; 30500Sstevel@tonic-gate uint16_t new_flags = 0; 30510Sstevel@tonic-gate uint16_t old_flags = 0; 30520Sstevel@tonic-gate int inflags = lnr->lnr_flags; 30533448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 30540Sstevel@tonic-gate 30552535Ssangeeta ASSERT(ill->ill_isv6); 30560Sstevel@tonic-gate if ((lnr->lnr_state_create != ND_REACHABLE) && 30570Sstevel@tonic-gate (lnr->lnr_state_create != ND_STALE)) 30580Sstevel@tonic-gate return (EINVAL); 30590Sstevel@tonic-gate 3060*8485SPeter.Memishian@Sun.COM if (lnr->lnr_hdw_len > ND_MAX_HDW_LEN) 3061*8485SPeter.Memishian@Sun.COM return (EINVAL); 3062*8485SPeter.Memishian@Sun.COM 30630Sstevel@tonic-gate sin6 = (sin6_t *)&lnr->lnr_addr; 30640Sstevel@tonic-gate addr = &sin6->sin6_addr; 30650Sstevel@tonic-gate 30663448Sdh155122 mutex_enter(&ipst->ips_ndp6->ndp_g_lock); 30670Sstevel@tonic-gate /* We know it can not be mapping so just look in the hash table */ 30683448Sdh155122 nce = *((nce_t **)NCE_HASH_PTR_V6(ipst, *addr)); 3069*8485SPeter.Memishian@Sun.COM /* See comment in ndp_query() regarding IS_IPMP(ill) usage */ 3070*8485SPeter.Memishian@Sun.COM nce = nce_lookup_addr(ill, IS_IPMP(ill), addr, nce); 30710Sstevel@tonic-gate if (nce != NULL) 30720Sstevel@tonic-gate new_flags = nce->nce_flags; 30730Sstevel@tonic-gate 30740Sstevel@tonic-gate switch (inflags & (NDF_ISROUTER_ON|NDF_ISROUTER_OFF)) { 30750Sstevel@tonic-gate case NDF_ISROUTER_ON: 30760Sstevel@tonic-gate new_flags |= NCE_F_ISROUTER; 30770Sstevel@tonic-gate break; 30780Sstevel@tonic-gate case NDF_ISROUTER_OFF: 30790Sstevel@tonic-gate new_flags &= ~NCE_F_ISROUTER; 30800Sstevel@tonic-gate break; 30810Sstevel@tonic-gate case (NDF_ISROUTER_OFF|NDF_ISROUTER_ON): 30823448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 30830Sstevel@tonic-gate if (nce != NULL) 30840Sstevel@tonic-gate NCE_REFRELE(nce); 30850Sstevel@tonic-gate return (EINVAL); 30860Sstevel@tonic-gate } 30870Sstevel@tonic-gate 30880Sstevel@tonic-gate switch (inflags & (NDF_ANYCAST_ON|NDF_ANYCAST_OFF)) { 30890Sstevel@tonic-gate case NDF_ANYCAST_ON: 30900Sstevel@tonic-gate new_flags |= NCE_F_ANYCAST; 30910Sstevel@tonic-gate break; 30920Sstevel@tonic-gate case NDF_ANYCAST_OFF: 30930Sstevel@tonic-gate new_flags &= ~NCE_F_ANYCAST; 30940Sstevel@tonic-gate break; 30950Sstevel@tonic-gate case (NDF_ANYCAST_OFF|NDF_ANYCAST_ON): 30963448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 30970Sstevel@tonic-gate if (nce != NULL) 30980Sstevel@tonic-gate NCE_REFRELE(nce); 30990Sstevel@tonic-gate return (EINVAL); 31000Sstevel@tonic-gate } 31010Sstevel@tonic-gate 31020Sstevel@tonic-gate if (nce == NULL) { 31034714Ssowmini err = ndp_add_v6(ill, 31040Sstevel@tonic-gate (uchar_t *)lnr->lnr_hdw_addr, 31050Sstevel@tonic-gate addr, 31060Sstevel@tonic-gate &ipv6_all_ones, 31070Sstevel@tonic-gate &ipv6_all_zeros, 31080Sstevel@tonic-gate 0, 31090Sstevel@tonic-gate new_flags, 31100Sstevel@tonic-gate lnr->lnr_state_create, 31114714Ssowmini &nce); 31120Sstevel@tonic-gate if (err != 0) { 31133448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 31140Sstevel@tonic-gate ip1dbg(("ndp_sioc_update: Can't create NCE %d\n", err)); 31150Sstevel@tonic-gate return (err); 31160Sstevel@tonic-gate } 31170Sstevel@tonic-gate } 31180Sstevel@tonic-gate old_flags = nce->nce_flags; 31190Sstevel@tonic-gate if (old_flags & NCE_F_ISROUTER && !(new_flags & NCE_F_ISROUTER)) { 31200Sstevel@tonic-gate /* 31210Sstevel@tonic-gate * Router turned to host, delete all ires. 31220Sstevel@tonic-gate * XXX Just delete the entry, but we need to add too. 31230Sstevel@tonic-gate */ 31240Sstevel@tonic-gate nce->nce_flags &= ~NCE_F_ISROUTER; 31253448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 31260Sstevel@tonic-gate ndp_delete(nce); 31270Sstevel@tonic-gate NCE_REFRELE(nce); 31280Sstevel@tonic-gate return (0); 31290Sstevel@tonic-gate } 31303448Sdh155122 mutex_exit(&ipst->ips_ndp6->ndp_g_lock); 31310Sstevel@tonic-gate 31320Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 31330Sstevel@tonic-gate nce->nce_flags = new_flags; 31340Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 31350Sstevel@tonic-gate /* 31360Sstevel@tonic-gate * Note that we ignore the state at this point, which 31370Sstevel@tonic-gate * should be either STALE or REACHABLE. Instead we let 31380Sstevel@tonic-gate * the link layer address passed in to determine the state 31390Sstevel@tonic-gate * much like incoming packets. 31400Sstevel@tonic-gate */ 3141*8485SPeter.Memishian@Sun.COM nce_process(nce, (uchar_t *)lnr->lnr_hdw_addr, 0, B_FALSE); 31420Sstevel@tonic-gate NCE_REFRELE(nce); 31430Sstevel@tonic-gate return (0); 31440Sstevel@tonic-gate } 31450Sstevel@tonic-gate 31460Sstevel@tonic-gate /* 31470Sstevel@tonic-gate * If the device driver supports it, we make nce_fp_mp to have 31480Sstevel@tonic-gate * an M_DATA prepend. Otherwise nce_fp_mp will be null. 31494714Ssowmini * The caller ensures there is hold on nce for this function. 31500Sstevel@tonic-gate * Note that since ill_fastpath_probe() copies the mblk there is 31510Sstevel@tonic-gate * no need for the hold beyond this function. 31520Sstevel@tonic-gate */ 31533425Ssowmini void 31540Sstevel@tonic-gate nce_fastpath(nce_t *nce) 31550Sstevel@tonic-gate { 31560Sstevel@tonic-gate ill_t *ill = nce->nce_ill; 31570Sstevel@tonic-gate int res; 31580Sstevel@tonic-gate 31590Sstevel@tonic-gate ASSERT(ill != NULL); 31604714Ssowmini ASSERT(nce->nce_state != ND_INITIAL && nce->nce_state != ND_INCOMPLETE); 31614714Ssowmini 31624714Ssowmini if (nce->nce_fp_mp != NULL) { 31634714Ssowmini /* Already contains fastpath info */ 31640Sstevel@tonic-gate return; 31650Sstevel@tonic-gate } 31660Sstevel@tonic-gate if (nce->nce_res_mp != NULL) { 31670Sstevel@tonic-gate nce_fastpath_list_add(nce); 31680Sstevel@tonic-gate res = ill_fastpath_probe(ill, nce->nce_res_mp); 31690Sstevel@tonic-gate /* 31700Sstevel@tonic-gate * EAGAIN is an indication of a transient error 31710Sstevel@tonic-gate * i.e. allocation failure etc. leave the nce in the list it 31720Sstevel@tonic-gate * will be updated when another probe happens for another ire 31730Sstevel@tonic-gate * if not it will be taken out of the list when the ire is 31740Sstevel@tonic-gate * deleted. 31750Sstevel@tonic-gate */ 31760Sstevel@tonic-gate 31770Sstevel@tonic-gate if (res != 0 && res != EAGAIN) 31780Sstevel@tonic-gate nce_fastpath_list_delete(nce); 31790Sstevel@tonic-gate } 31800Sstevel@tonic-gate } 31810Sstevel@tonic-gate 31820Sstevel@tonic-gate /* 31830Sstevel@tonic-gate * Drain the list of nce's waiting for fastpath response. 31840Sstevel@tonic-gate */ 31850Sstevel@tonic-gate void 31860Sstevel@tonic-gate nce_fastpath_list_dispatch(ill_t *ill, boolean_t (*func)(nce_t *, void *), 31870Sstevel@tonic-gate void *arg) 31880Sstevel@tonic-gate { 31890Sstevel@tonic-gate 31900Sstevel@tonic-gate nce_t *next_nce; 31910Sstevel@tonic-gate nce_t *current_nce; 31920Sstevel@tonic-gate nce_t *first_nce; 31930Sstevel@tonic-gate nce_t *prev_nce = NULL; 31940Sstevel@tonic-gate 31950Sstevel@tonic-gate mutex_enter(&ill->ill_lock); 31960Sstevel@tonic-gate first_nce = current_nce = (nce_t *)ill->ill_fastpath_list; 31970Sstevel@tonic-gate while (current_nce != (nce_t *)&ill->ill_fastpath_list) { 31980Sstevel@tonic-gate next_nce = current_nce->nce_fastpath; 31990Sstevel@tonic-gate /* 32000Sstevel@tonic-gate * Take it off the list if we're flushing, or if the callback 32010Sstevel@tonic-gate * routine tells us to do so. Otherwise, leave the nce in the 32020Sstevel@tonic-gate * fastpath list to handle any pending response from the lower 32030Sstevel@tonic-gate * layer. We can't drain the list when the callback routine 32040Sstevel@tonic-gate * comparison failed, because the response is asynchronous in 32050Sstevel@tonic-gate * nature, and may not arrive in the same order as the list 32060Sstevel@tonic-gate * insertion. 32070Sstevel@tonic-gate */ 32080Sstevel@tonic-gate if (func == NULL || func(current_nce, arg)) { 32090Sstevel@tonic-gate current_nce->nce_fastpath = NULL; 32100Sstevel@tonic-gate if (current_nce == first_nce) 32110Sstevel@tonic-gate ill->ill_fastpath_list = first_nce = next_nce; 32120Sstevel@tonic-gate else 32130Sstevel@tonic-gate prev_nce->nce_fastpath = next_nce; 32140Sstevel@tonic-gate } else { 32150Sstevel@tonic-gate /* previous element that is still in the list */ 32160Sstevel@tonic-gate prev_nce = current_nce; 32170Sstevel@tonic-gate } 32180Sstevel@tonic-gate current_nce = next_nce; 32190Sstevel@tonic-gate } 32200Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 32210Sstevel@tonic-gate } 32220Sstevel@tonic-gate 32230Sstevel@tonic-gate /* 32240Sstevel@tonic-gate * Add nce to the nce fastpath list. 32250Sstevel@tonic-gate */ 32260Sstevel@tonic-gate void 32270Sstevel@tonic-gate nce_fastpath_list_add(nce_t *nce) 32280Sstevel@tonic-gate { 32290Sstevel@tonic-gate ill_t *ill; 32300Sstevel@tonic-gate 32310Sstevel@tonic-gate ill = nce->nce_ill; 32320Sstevel@tonic-gate 32330Sstevel@tonic-gate mutex_enter(&ill->ill_lock); 32340Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 32350Sstevel@tonic-gate 32360Sstevel@tonic-gate /* 32370Sstevel@tonic-gate * if nce has not been deleted and 32380Sstevel@tonic-gate * is not already in the list add it. 32390Sstevel@tonic-gate */ 32400Sstevel@tonic-gate if (!(nce->nce_flags & NCE_F_CONDEMNED) && 32410Sstevel@tonic-gate (nce->nce_fastpath == NULL)) { 32420Sstevel@tonic-gate nce->nce_fastpath = (nce_t *)ill->ill_fastpath_list; 32430Sstevel@tonic-gate ill->ill_fastpath_list = nce; 32440Sstevel@tonic-gate } 32450Sstevel@tonic-gate 32460Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 32470Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 32480Sstevel@tonic-gate } 32490Sstevel@tonic-gate 32500Sstevel@tonic-gate /* 32510Sstevel@tonic-gate * remove nce from the nce fastpath list. 32520Sstevel@tonic-gate */ 32530Sstevel@tonic-gate void 32540Sstevel@tonic-gate nce_fastpath_list_delete(nce_t *nce) 32550Sstevel@tonic-gate { 32560Sstevel@tonic-gate nce_t *nce_ptr; 32570Sstevel@tonic-gate 32580Sstevel@tonic-gate ill_t *ill; 32590Sstevel@tonic-gate 32600Sstevel@tonic-gate ill = nce->nce_ill; 32610Sstevel@tonic-gate ASSERT(ill != NULL); 32620Sstevel@tonic-gate 32630Sstevel@tonic-gate mutex_enter(&ill->ill_lock); 32640Sstevel@tonic-gate if (nce->nce_fastpath == NULL) 32650Sstevel@tonic-gate goto done; 32660Sstevel@tonic-gate 32670Sstevel@tonic-gate ASSERT(ill->ill_fastpath_list != &ill->ill_fastpath_list); 32680Sstevel@tonic-gate 32690Sstevel@tonic-gate if (ill->ill_fastpath_list == nce) { 32700Sstevel@tonic-gate ill->ill_fastpath_list = nce->nce_fastpath; 32710Sstevel@tonic-gate } else { 32720Sstevel@tonic-gate nce_ptr = ill->ill_fastpath_list; 32730Sstevel@tonic-gate while (nce_ptr != (nce_t *)&ill->ill_fastpath_list) { 32740Sstevel@tonic-gate if (nce_ptr->nce_fastpath == nce) { 32750Sstevel@tonic-gate nce_ptr->nce_fastpath = nce->nce_fastpath; 32760Sstevel@tonic-gate break; 32770Sstevel@tonic-gate } 32780Sstevel@tonic-gate nce_ptr = nce_ptr->nce_fastpath; 32790Sstevel@tonic-gate } 32800Sstevel@tonic-gate } 32810Sstevel@tonic-gate 32820Sstevel@tonic-gate nce->nce_fastpath = NULL; 32830Sstevel@tonic-gate done: 32840Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 32850Sstevel@tonic-gate } 32860Sstevel@tonic-gate 32870Sstevel@tonic-gate /* 32880Sstevel@tonic-gate * Update all NCE's that are not in fastpath mode and 32890Sstevel@tonic-gate * have an nce_fp_mp that matches mp. mp->b_cont contains 32900Sstevel@tonic-gate * the fastpath header. 32910Sstevel@tonic-gate * 32920Sstevel@tonic-gate * Returns TRUE if entry should be dequeued, or FALSE otherwise. 32930Sstevel@tonic-gate */ 32940Sstevel@tonic-gate boolean_t 32950Sstevel@tonic-gate ndp_fastpath_update(nce_t *nce, void *arg) 32960Sstevel@tonic-gate { 32970Sstevel@tonic-gate mblk_t *mp, *fp_mp; 32980Sstevel@tonic-gate uchar_t *mp_rptr, *ud_mp_rptr; 32990Sstevel@tonic-gate mblk_t *ud_mp = nce->nce_res_mp; 33000Sstevel@tonic-gate ptrdiff_t cmplen; 33010Sstevel@tonic-gate 33020Sstevel@tonic-gate if (nce->nce_flags & NCE_F_MAPPING) 33030Sstevel@tonic-gate return (B_TRUE); 33040Sstevel@tonic-gate if ((nce->nce_fp_mp != NULL) || (ud_mp == NULL)) 33050Sstevel@tonic-gate return (B_TRUE); 33060Sstevel@tonic-gate 33070Sstevel@tonic-gate ip2dbg(("ndp_fastpath_update: trying\n")); 33080Sstevel@tonic-gate mp = (mblk_t *)arg; 33090Sstevel@tonic-gate mp_rptr = mp->b_rptr; 33100Sstevel@tonic-gate cmplen = mp->b_wptr - mp_rptr; 33110Sstevel@tonic-gate ASSERT(cmplen >= 0); 33120Sstevel@tonic-gate ud_mp_rptr = ud_mp->b_rptr; 33130Sstevel@tonic-gate /* 33140Sstevel@tonic-gate * The nce is locked here to prevent any other threads 33150Sstevel@tonic-gate * from accessing and changing nce_res_mp when the IPv6 address 33160Sstevel@tonic-gate * becomes resolved to an lla while we're in the middle 33170Sstevel@tonic-gate * of looking at and comparing the hardware address (lla). 33180Sstevel@tonic-gate * It is also locked to prevent multiple threads in nce_fastpath_update 33190Sstevel@tonic-gate * from examining nce_res_mp atthe same time. 33200Sstevel@tonic-gate */ 33210Sstevel@tonic-gate mutex_enter(&nce->nce_lock); 33220Sstevel@tonic-gate if (ud_mp->b_wptr - ud_mp_rptr != cmplen || 33230Sstevel@tonic-gate bcmp((char *)mp_rptr, (char *)ud_mp_rptr, cmplen) != 0) { 33240Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 33250Sstevel@tonic-gate /* 33260Sstevel@tonic-gate * Don't take the ire off the fastpath list yet, 33270Sstevel@tonic-gate * since the response may come later. 33280Sstevel@tonic-gate */ 33290Sstevel@tonic-gate return (B_FALSE); 33300Sstevel@tonic-gate } 33310Sstevel@tonic-gate /* Matched - install mp as the fastpath mp */ 33320Sstevel@tonic-gate ip1dbg(("ndp_fastpath_update: match\n")); 33330Sstevel@tonic-gate fp_mp = dupb(mp->b_cont); 33340Sstevel@tonic-gate if (fp_mp != NULL) { 33350Sstevel@tonic-gate nce->nce_fp_mp = fp_mp; 33360Sstevel@tonic-gate } 33370Sstevel@tonic-gate mutex_exit(&nce->nce_lock); 33380Sstevel@tonic-gate return (B_TRUE); 33390Sstevel@tonic-gate } 33400Sstevel@tonic-gate 33410Sstevel@tonic-gate /* 33420Sstevel@tonic-gate * This function handles the DL_NOTE_FASTPATH_FLUSH notification from 33430Sstevel@tonic-gate * driver. Note that it assumes IP is exclusive... 33440Sstevel@tonic-gate */ 33450Sstevel@tonic-gate /* ARGSUSED */ 33460Sstevel@tonic-gate void 33470Sstevel@tonic-gate ndp_fastpath_flush(nce_t *nce, char *arg) 33480Sstevel@tonic-gate { 33490Sstevel@tonic-gate if (nce->nce_flags & NCE_F_MAPPING) 33500Sstevel@tonic-gate return; 33510Sstevel@tonic-gate /* No fastpath info? */ 33520Sstevel@tonic-gate if (nce->nce_fp_mp == NULL || nce->nce_res_mp == NULL) 33530Sstevel@tonic-gate return; 33540Sstevel@tonic-gate 33553425Ssowmini if (nce->nce_ipversion == IPV4_VERSION && 33563425Ssowmini nce->nce_flags & NCE_F_BCAST) { 33573425Ssowmini /* 33583425Ssowmini * IPv4 BROADCAST entries: 33593425Ssowmini * We can't delete the nce since it is difficult to 33603425Ssowmini * recreate these without going through the 33613425Ssowmini * ipif down/up dance. 33623425Ssowmini * 33633425Ssowmini * All access to nce->nce_fp_mp in the case of these 33643425Ssowmini * is protected by nce_lock. 33653425Ssowmini */ 33663425Ssowmini mutex_enter(&nce->nce_lock); 33673425Ssowmini if (nce->nce_fp_mp != NULL) { 33683425Ssowmini freeb(nce->nce_fp_mp); 33693425Ssowmini nce->nce_fp_mp = NULL; 33703425Ssowmini mutex_exit(&nce->nce_lock); 33713425Ssowmini nce_fastpath(nce); 33723425Ssowmini } else { 33733425Ssowmini mutex_exit(&nce->nce_lock); 33743425Ssowmini } 33753425Ssowmini } else { 33763425Ssowmini /* Just delete the NCE... */ 33773425Ssowmini ndp_delete(nce); 33783425Ssowmini } 33790Sstevel@tonic-gate } 33800Sstevel@tonic-gate 33810Sstevel@tonic-gate /* 33820Sstevel@tonic-gate * Return a pointer to a given option in the packet. 33830Sstevel@tonic-gate * Assumes that option part of the packet have already been validated. 33840Sstevel@tonic-gate */ 33850Sstevel@tonic-gate nd_opt_hdr_t * 33860Sstevel@tonic-gate ndp_get_option(nd_opt_hdr_t *opt, int optlen, int opt_type) 33870Sstevel@tonic-gate { 33880Sstevel@tonic-gate while (optlen > 0) { 33890Sstevel@tonic-gate if (opt->nd_opt_type == opt_type) 33900Sstevel@tonic-gate return (opt); 33910Sstevel@tonic-gate optlen -= 8 * opt->nd_opt_len; 33920Sstevel@tonic-gate opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 33930Sstevel@tonic-gate } 33940Sstevel@tonic-gate return (NULL); 33950Sstevel@tonic-gate } 33960Sstevel@tonic-gate 33970Sstevel@tonic-gate /* 33980Sstevel@tonic-gate * Verify all option lengths present are > 0, also check to see 33990Sstevel@tonic-gate * if the option lengths and packet length are consistent. 34000Sstevel@tonic-gate */ 34010Sstevel@tonic-gate boolean_t 34020Sstevel@tonic-gate ndp_verify_optlen(nd_opt_hdr_t *opt, int optlen) 34030Sstevel@tonic-gate { 34040Sstevel@tonic-gate ASSERT(opt != NULL); 34050Sstevel@tonic-gate while (optlen > 0) { 34060Sstevel@tonic-gate if (opt->nd_opt_len == 0) 34070Sstevel@tonic-gate return (B_FALSE); 34080Sstevel@tonic-gate optlen -= 8 * opt->nd_opt_len; 34090Sstevel@tonic-gate if (optlen < 0) 34100Sstevel@tonic-gate return (B_FALSE); 34110Sstevel@tonic-gate opt = (struct nd_opt_hdr *)((char *)opt + 8 * opt->nd_opt_len); 34120Sstevel@tonic-gate } 34130Sstevel@tonic-gate return (B_TRUE); 34140Sstevel@tonic-gate } 34150Sstevel@tonic-gate 34160Sstevel@tonic-gate /* 34170Sstevel@tonic-gate * ndp_walk function. 34180Sstevel@tonic-gate * Free a fraction of the NCE cache entries. 34190Sstevel@tonic-gate * A fraction of zero means to not free any in that category. 34200Sstevel@tonic-gate */ 34210Sstevel@tonic-gate void 34220Sstevel@tonic-gate ndp_cache_reclaim(nce_t *nce, char *arg) 34230Sstevel@tonic-gate { 34240Sstevel@tonic-gate nce_cache_reclaim_t *ncr = (nce_cache_reclaim_t *)arg; 34250Sstevel@tonic-gate uint_t rand; 34260Sstevel@tonic-gate 34270Sstevel@tonic-gate if (nce->nce_flags & NCE_F_PERMANENT) 34280Sstevel@tonic-gate return; 34290Sstevel@tonic-gate 34300Sstevel@tonic-gate rand = (uint_t)lbolt + 34310Sstevel@tonic-gate NCE_ADDR_HASH_V6(nce->nce_addr, NCE_TABLE_SIZE); 34320Sstevel@tonic-gate if (ncr->ncr_host != 0 && 34330Sstevel@tonic-gate (rand/ncr->ncr_host)*ncr->ncr_host == rand) { 34340Sstevel@tonic-gate ndp_delete(nce); 34350Sstevel@tonic-gate return; 34360Sstevel@tonic-gate } 34370Sstevel@tonic-gate } 34380Sstevel@tonic-gate 34390Sstevel@tonic-gate /* 34400Sstevel@tonic-gate * ndp_walk function. 34410Sstevel@tonic-gate * Count the number of NCEs that can be deleted. 34420Sstevel@tonic-gate * These would be hosts but not routers. 34430Sstevel@tonic-gate */ 34440Sstevel@tonic-gate void 34450Sstevel@tonic-gate ndp_cache_count(nce_t *nce, char *arg) 34460Sstevel@tonic-gate { 34470Sstevel@tonic-gate ncc_cache_count_t *ncc = (ncc_cache_count_t *)arg; 34480Sstevel@tonic-gate 34490Sstevel@tonic-gate if (nce->nce_flags & NCE_F_PERMANENT) 34500Sstevel@tonic-gate return; 34510Sstevel@tonic-gate 34520Sstevel@tonic-gate ncc->ncc_total++; 34530Sstevel@tonic-gate if (!(nce->nce_flags & NCE_F_ISROUTER)) 34540Sstevel@tonic-gate ncc->ncc_host++; 34550Sstevel@tonic-gate } 34560Sstevel@tonic-gate 34575023Scarlsonj #ifdef DEBUG 34580Sstevel@tonic-gate void 34590Sstevel@tonic-gate nce_trace_ref(nce_t *nce) 34600Sstevel@tonic-gate { 34610Sstevel@tonic-gate ASSERT(MUTEX_HELD(&nce->nce_lock)); 34620Sstevel@tonic-gate 34635023Scarlsonj if (nce->nce_trace_disable) 34640Sstevel@tonic-gate return; 34650Sstevel@tonic-gate 34665023Scarlsonj if (!th_trace_ref(nce, nce->nce_ill->ill_ipst)) { 34675023Scarlsonj nce->nce_trace_disable = B_TRUE; 34685023Scarlsonj nce_trace_cleanup(nce); 34690Sstevel@tonic-gate } 34700Sstevel@tonic-gate } 34710Sstevel@tonic-gate 34720Sstevel@tonic-gate void 34730Sstevel@tonic-gate nce_untrace_ref(nce_t *nce) 34740Sstevel@tonic-gate { 34750Sstevel@tonic-gate ASSERT(MUTEX_HELD(&nce->nce_lock)); 34760Sstevel@tonic-gate 34775023Scarlsonj if (!nce->nce_trace_disable) 34785023Scarlsonj th_trace_unref(nce); 34790Sstevel@tonic-gate } 34800Sstevel@tonic-gate 34815023Scarlsonj static void 34825023Scarlsonj nce_trace_cleanup(const nce_t *nce) 34830Sstevel@tonic-gate { 34845023Scarlsonj th_trace_cleanup(nce, nce->nce_trace_disable); 34850Sstevel@tonic-gate } 34860Sstevel@tonic-gate #endif 34872535Ssangeeta 34882535Ssangeeta /* 34892535Ssangeeta * Called when address resolution fails due to a timeout. 34902535Ssangeeta * Send an ICMP unreachable in response to all queued packets. 34912535Ssangeeta */ 34922535Ssangeeta void 34932535Ssangeeta arp_resolv_failed(nce_t *nce) 34942535Ssangeeta { 34952535Ssangeeta mblk_t *mp, *nxt_mp, *first_mp; 34962535Ssangeeta char buf[INET6_ADDRSTRLEN]; 34972535Ssangeeta zoneid_t zoneid = GLOBAL_ZONEID; 34982535Ssangeeta struct in_addr ipv4addr; 34993448Sdh155122 ip_stack_t *ipst = nce->nce_ill->ill_ipst; 35002535Ssangeeta 35012535Ssangeeta IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &ipv4addr); 35022535Ssangeeta ip3dbg(("arp_resolv_failed: dst %s\n", 35032535Ssangeeta inet_ntop(AF_INET, &ipv4addr, buf, sizeof (buf)))); 35042535Ssangeeta mutex_enter(&nce->nce_lock); 35052535Ssangeeta mp = nce->nce_qd_mp; 35062535Ssangeeta nce->nce_qd_mp = NULL; 35072535Ssangeeta mutex_exit(&nce->nce_lock); 35082535Ssangeeta 35092535Ssangeeta while (mp != NULL) { 35102535Ssangeeta nxt_mp = mp->b_next; 35112535Ssangeeta mp->b_next = NULL; 35122535Ssangeeta mp->b_prev = NULL; 35132535Ssangeeta 35142535Ssangeeta first_mp = mp; 35152535Ssangeeta /* 35162535Ssangeeta * Send icmp unreachable messages 35172535Ssangeeta * to the hosts. 35182535Ssangeeta */ 35193448Sdh155122 (void) ip_hdr_complete((ipha_t *)mp->b_rptr, zoneid, ipst); 35202535Ssangeeta ip3dbg(("arp_resolv_failed: Calling icmp_unreachable\n")); 35212535Ssangeeta icmp_unreachable(nce->nce_ill->ill_wq, first_mp, 35223448Sdh155122 ICMP_HOST_UNREACHABLE, zoneid, ipst); 35232535Ssangeeta mp = nxt_mp; 35242535Ssangeeta } 35252535Ssangeeta } 35262535Ssangeeta 35274714Ssowmini int 35284714Ssowmini ndp_lookup_then_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 35294714Ssowmini nce_t **newnce, nce_t *src_nce) 35302535Ssangeeta { 35314714Ssowmini int err; 35322535Ssangeeta nce_t *nce; 35332535Ssangeeta in6_addr_t addr6; 35343448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 35353448Sdh155122 35363448Sdh155122 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 35373448Sdh155122 nce = *((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 35382535Ssangeeta IN6_IPADDR_TO_V4MAPPED(*addr, &addr6); 3539*8485SPeter.Memishian@Sun.COM /* 3540*8485SPeter.Memishian@Sun.COM * NOTE: IPv4 never matches across the illgrp since the NCE's we're 3541*8485SPeter.Memishian@Sun.COM * looking up have fastpath headers that are inherently per-ill. 3542*8485SPeter.Memishian@Sun.COM */ 3543*8485SPeter.Memishian@Sun.COM nce = nce_lookup_addr(ill, B_FALSE, &addr6, nce); 35442535Ssangeeta if (nce == NULL) { 35454714Ssowmini err = ndp_add_v4(ill, addr, flags, newnce, src_nce); 35462535Ssangeeta } else { 35472535Ssangeeta *newnce = nce; 35482535Ssangeeta err = EEXIST; 35492535Ssangeeta } 35503448Sdh155122 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 35512535Ssangeeta return (err); 35522535Ssangeeta } 35532535Ssangeeta 35542535Ssangeeta /* 35552535Ssangeeta * NDP Cache Entry creation routine for IPv4. 35562535Ssangeeta * Mapped entries are handled in arp. 35573448Sdh155122 * This routine must always be called with ndp4->ndp_g_lock held. 35582535Ssangeeta * Prior to return, nce_refcnt is incremented. 35592535Ssangeeta */ 35602535Ssangeeta static int 35614714Ssowmini ndp_add_v4(ill_t *ill, const in_addr_t *addr, uint16_t flags, 35624714Ssowmini nce_t **newnce, nce_t *src_nce) 35632535Ssangeeta { 35642535Ssangeeta static nce_t nce_nil; 35652535Ssangeeta nce_t *nce; 35662535Ssangeeta mblk_t *mp; 35674714Ssowmini mblk_t *template = NULL; 35682535Ssangeeta nce_t **ncep; 35693448Sdh155122 ip_stack_t *ipst = ill->ill_ipst; 35704714Ssowmini uint16_t state = ND_INITIAL; 35714714Ssowmini int err; 35723448Sdh155122 35733448Sdh155122 ASSERT(MUTEX_HELD(&ipst->ips_ndp4->ndp_g_lock)); 35744714Ssowmini ASSERT(!ill->ill_isv6); 35754714Ssowmini ASSERT((flags & NCE_F_MAPPING) == 0); 35764714Ssowmini 35774714Ssowmini if (ill->ill_resolver_mp == NULL) 35782535Ssangeeta return (EINVAL); 35792535Ssangeeta /* 35802535Ssangeeta * Allocate the mblk to hold the nce. 35812535Ssangeeta */ 35822535Ssangeeta mp = allocb(sizeof (nce_t), BPRI_MED); 35832535Ssangeeta if (mp == NULL) 35842535Ssangeeta return (ENOMEM); 35852535Ssangeeta 35862535Ssangeeta nce = (nce_t *)mp->b_rptr; 35872535Ssangeeta mp->b_wptr = (uchar_t *)&nce[1]; 35882535Ssangeeta *nce = nce_nil; 35892535Ssangeeta nce->nce_ill = ill; 35902535Ssangeeta nce->nce_ipversion = IPV4_VERSION; 35912535Ssangeeta nce->nce_flags = flags; 35922535Ssangeeta nce->nce_pcnt = ND_MAX_UNICAST_SOLICIT; 35932535Ssangeeta nce->nce_rcnt = ill->ill_xmit_count; 35942535Ssangeeta IN6_IPADDR_TO_V4MAPPED(*addr, &nce->nce_addr); 35954714Ssowmini nce->nce_mask = ipv6_all_ones; 35962535Ssangeeta nce->nce_extract_mask = ipv6_all_zeros; 35974714Ssowmini nce->nce_ll_extract_start = 0; 35982535Ssangeeta nce->nce_qd_mp = NULL; 35992535Ssangeeta nce->nce_mp = mp; 36002535Ssangeeta /* This one is for nce getting created */ 36012535Ssangeeta nce->nce_refcnt = 1; 36022535Ssangeeta mutex_init(&nce->nce_lock, NULL, MUTEX_DEFAULT, NULL); 36033448Sdh155122 ncep = ((nce_t **)NCE_HASH_PTR_V4(ipst, *addr)); 36042535Ssangeeta 36055023Scarlsonj nce->nce_trace_disable = B_FALSE; 36065023Scarlsonj 36074714Ssowmini if (src_nce != NULL) { 36084714Ssowmini /* 36094714Ssowmini * src_nce has been provided by the caller. The only 36104714Ssowmini * caller who provides a non-null, non-broadcast 36114714Ssowmini * src_nce is from ip_newroute() which must pass in 36124714Ssowmini * a ND_REACHABLE src_nce (this condition is verified 36134714Ssowmini * via an ASSERT for the save_ire->ire_nce in ip_newroute()) 36144714Ssowmini */ 36154714Ssowmini mutex_enter(&src_nce->nce_lock); 36164714Ssowmini state = src_nce->nce_state; 36174714Ssowmini if ((src_nce->nce_flags & NCE_F_CONDEMNED) || 36184714Ssowmini (ipst->ips_ndp4->ndp_g_hw_change > 0)) { 36194714Ssowmini /* 36204714Ssowmini * src_nce has been deleted, or 36214714Ssowmini * ip_arp_news is in the middle of 36224714Ssowmini * flushing entries in the the nce. 36234714Ssowmini * Fail the add, since we don't know 36244714Ssowmini * if it is safe to copy the contents of 36254714Ssowmini * src_nce 36264714Ssowmini */ 36274714Ssowmini DTRACE_PROBE2(nce__bad__src__nce, 36284714Ssowmini nce_t *, src_nce, ill_t *, ill); 36294714Ssowmini mutex_exit(&src_nce->nce_lock); 36304714Ssowmini err = EINVAL; 36314714Ssowmini goto err_ret; 36324714Ssowmini } 36334714Ssowmini template = copyb(src_nce->nce_res_mp); 36344714Ssowmini mutex_exit(&src_nce->nce_lock); 36354714Ssowmini if (template == NULL) { 36364714Ssowmini err = ENOMEM; 36374714Ssowmini goto err_ret; 36384714Ssowmini } 36394714Ssowmini } else if (flags & NCE_F_BCAST) { 36404714Ssowmini /* 36414714Ssowmini * broadcast nce. 36424714Ssowmini */ 36434714Ssowmini template = copyb(ill->ill_bcast_mp); 36444714Ssowmini if (template == NULL) { 36454714Ssowmini err = ENOMEM; 36464714Ssowmini goto err_ret; 36474714Ssowmini } 36484714Ssowmini state = ND_REACHABLE; 36494714Ssowmini } else if (ill->ill_net_type == IRE_IF_NORESOLVER) { 36504714Ssowmini /* 36514714Ssowmini * NORESOLVER entries are always created in the REACHABLE 36524714Ssowmini * state. We create a nce_res_mp with the IP nexthop address 36534714Ssowmini * in the destination address in the DLPI hdr if the 36544714Ssowmini * physical length is exactly 4 bytes. 36554714Ssowmini * 36564714Ssowmini * XXX not clear which drivers set ill_phys_addr_length to 36574714Ssowmini * IP_ADDR_LEN. 36584714Ssowmini */ 36594714Ssowmini if (ill->ill_phys_addr_length == IP_ADDR_LEN) { 36604714Ssowmini template = ill_dlur_gen((uchar_t *)addr, 36614714Ssowmini ill->ill_phys_addr_length, 36624714Ssowmini ill->ill_sap, ill->ill_sap_length); 36634714Ssowmini } else { 36644714Ssowmini template = copyb(ill->ill_resolver_mp); 36654714Ssowmini } 36664714Ssowmini if (template == NULL) { 36674714Ssowmini err = ENOMEM; 36684714Ssowmini goto err_ret; 36694714Ssowmini } 36704714Ssowmini state = ND_REACHABLE; 36714714Ssowmini } 36724714Ssowmini nce->nce_fp_mp = NULL; 36734714Ssowmini nce->nce_res_mp = template; 36744714Ssowmini nce->nce_state = state; 36754714Ssowmini if (state == ND_REACHABLE) { 36764714Ssowmini nce->nce_last = TICK_TO_MSEC(lbolt64); 36774714Ssowmini nce->nce_init_time = TICK_TO_MSEC(lbolt64); 36784714Ssowmini } else { 36794714Ssowmini nce->nce_last = 0; 36804714Ssowmini if (state == ND_INITIAL) 36814714Ssowmini nce->nce_init_time = TICK_TO_MSEC(lbolt64); 36824714Ssowmini } 36834714Ssowmini 36844714Ssowmini ASSERT((nce->nce_res_mp == NULL && nce->nce_state == ND_INITIAL) || 36854714Ssowmini (nce->nce_res_mp != NULL && nce->nce_state == ND_REACHABLE)); 36862535Ssangeeta /* 36872535Ssangeeta * Atomically ensure that the ill is not CONDEMNED, before 36882535Ssangeeta * adding the NCE. 36892535Ssangeeta */ 36902535Ssangeeta mutex_enter(&ill->ill_lock); 36912535Ssangeeta if (ill->ill_state_flags & ILL_CONDEMNED) { 36922535Ssangeeta mutex_exit(&ill->ill_lock); 36934714Ssowmini err = EINVAL; 36944714Ssowmini goto err_ret; 36952535Ssangeeta } 36962535Ssangeeta if ((nce->nce_next = *ncep) != NULL) 36972535Ssangeeta nce->nce_next->nce_ptpn = &nce->nce_next; 36982535Ssangeeta *ncep = nce; 36992535Ssangeeta nce->nce_ptpn = ncep; 37002535Ssangeeta *newnce = nce; 37012535Ssangeeta /* This one is for nce being used by an active thread */ 37022535Ssangeeta NCE_REFHOLD(*newnce); 37032535Ssangeeta 37042535Ssangeeta /* Bump up the number of nce's referencing this ill */ 37056255Ssowmini DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 37066255Ssowmini (char *), "nce", (void *), nce); 37076379Ssowmini ill->ill_nce_cnt++; 37082535Ssangeeta mutex_exit(&ill->ill_lock); 37094714Ssowmini DTRACE_PROBE1(ndp__add__v4, nce_t *, nce); 37102535Ssangeeta return (0); 37114714Ssowmini err_ret: 37124714Ssowmini freeb(mp); 37134714Ssowmini freemsg(template); 37144714Ssowmini return (err); 37152535Ssangeeta } 37162535Ssangeeta 37172546Scarlsonj /* 37182546Scarlsonj * ndp_walk routine to delete all entries that have a given destination or 37192546Scarlsonj * gateway address and cached link layer (MAC) address. This is used when ARP 37202546Scarlsonj * informs us that a network-to-link-layer mapping may have changed. 37212546Scarlsonj */ 37222546Scarlsonj void 37232546Scarlsonj nce_delete_hw_changed(nce_t *nce, void *arg) 37242546Scarlsonj { 37252546Scarlsonj nce_hw_map_t *hwm = arg; 37262546Scarlsonj mblk_t *mp; 37272546Scarlsonj dl_unitdata_req_t *dlu; 37282546Scarlsonj uchar_t *macaddr; 37292546Scarlsonj ill_t *ill; 37302546Scarlsonj int saplen; 37312546Scarlsonj ipaddr_t nce_addr; 37322546Scarlsonj 37332546Scarlsonj if (nce->nce_state != ND_REACHABLE) 37342546Scarlsonj return; 37352546Scarlsonj 37362546Scarlsonj IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr); 37372546Scarlsonj if (nce_addr != hwm->hwm_addr) 37382546Scarlsonj return; 37392546Scarlsonj 37402546Scarlsonj mutex_enter(&nce->nce_lock); 37412546Scarlsonj if ((mp = nce->nce_res_mp) == NULL) { 37422546Scarlsonj mutex_exit(&nce->nce_lock); 37432546Scarlsonj return; 37442546Scarlsonj } 37452546Scarlsonj dlu = (dl_unitdata_req_t *)mp->b_rptr; 37462546Scarlsonj macaddr = (uchar_t *)(dlu + 1); 37472546Scarlsonj ill = nce->nce_ill; 37482546Scarlsonj if ((saplen = ill->ill_sap_length) > 0) 37492546Scarlsonj macaddr += saplen; 37502546Scarlsonj else 37512546Scarlsonj saplen = -saplen; 37522546Scarlsonj 37532546Scarlsonj /* 37542546Scarlsonj * If the hardware address is unchanged, then leave this one alone. 37552546Scarlsonj * Note that saplen == abs(saplen) now. 37562546Scarlsonj */ 37572546Scarlsonj if (hwm->hwm_hwlen == dlu->dl_dest_addr_length - saplen && 37582546Scarlsonj bcmp(hwm->hwm_hwaddr, macaddr, hwm->hwm_hwlen) == 0) { 37592546Scarlsonj mutex_exit(&nce->nce_lock); 37602546Scarlsonj return; 37612546Scarlsonj } 37622546Scarlsonj mutex_exit(&nce->nce_lock); 37632546Scarlsonj 37642546Scarlsonj DTRACE_PROBE1(nce__hw__deleted, nce_t *, nce); 37652546Scarlsonj ndp_delete(nce); 37662546Scarlsonj } 37672546Scarlsonj 37682546Scarlsonj /* 37692546Scarlsonj * This function verifies whether a given IPv4 address is potentially known to 37702546Scarlsonj * the NCE subsystem. If so, then ARP must not delete the corresponding ace_t, 37712546Scarlsonj * so that it can continue to look for hardware changes on that address. 37722546Scarlsonj */ 37732546Scarlsonj boolean_t 37743448Sdh155122 ndp_lookup_ipaddr(in_addr_t addr, netstack_t *ns) 37752546Scarlsonj { 37762546Scarlsonj nce_t *nce; 37772546Scarlsonj struct in_addr nceaddr; 37783448Sdh155122 ip_stack_t *ipst = ns->netstack_ip; 37792546Scarlsonj 37802546Scarlsonj if (addr == INADDR_ANY) 37812546Scarlsonj return (B_FALSE); 37822546Scarlsonj 37833448Sdh155122 mutex_enter(&ipst->ips_ndp4->ndp_g_lock); 37843448Sdh155122 nce = *(nce_t **)NCE_HASH_PTR_V4(ipst, addr); 37852546Scarlsonj for (; nce != NULL; nce = nce->nce_next) { 37862546Scarlsonj /* Note that only v4 mapped entries are in the table. */ 37872546Scarlsonj IN6_V4MAPPED_TO_INADDR(&nce->nce_addr, &nceaddr); 37882546Scarlsonj if (addr == nceaddr.s_addr && 37892546Scarlsonj IN6_ARE_ADDR_EQUAL(&nce->nce_mask, &ipv6_all_ones)) { 37902546Scarlsonj /* Single flag check; no lock needed */ 37912546Scarlsonj if (!(nce->nce_flags & NCE_F_CONDEMNED)) 37922546Scarlsonj break; 37932546Scarlsonj } 37942546Scarlsonj } 37953448Sdh155122 mutex_exit(&ipst->ips_ndp4->ndp_g_lock); 37962546Scarlsonj return (nce != NULL); 37972546Scarlsonj } 3798*8485SPeter.Memishian@Sun.COM 3799*8485SPeter.Memishian@Sun.COM /* 3800*8485SPeter.Memishian@Sun.COM * Wrapper around ipif_lookup_addr_exact_v6() that allows ND to work properly 3801*8485SPeter.Memishian@Sun.COM * with IPMP. Specifically, since neighbor discovery is always done on 3802*8485SPeter.Memishian@Sun.COM * underlying interfaces (even for addresses owned by an IPMP interface), we 3803*8485SPeter.Memishian@Sun.COM * need to check for `v6addrp' on both `ill' and on the IPMP meta-interface 3804*8485SPeter.Memishian@Sun.COM * associated with `ill' (if it exists). 3805*8485SPeter.Memishian@Sun.COM */ 3806*8485SPeter.Memishian@Sun.COM static ipif_t * 3807*8485SPeter.Memishian@Sun.COM ip_ndp_lookup_addr_v6(const in6_addr_t *v6addrp, ill_t *ill) 3808*8485SPeter.Memishian@Sun.COM { 3809*8485SPeter.Memishian@Sun.COM ipif_t *ipif; 3810*8485SPeter.Memishian@Sun.COM ip_stack_t *ipst = ill->ill_ipst; 3811*8485SPeter.Memishian@Sun.COM 3812*8485SPeter.Memishian@Sun.COM ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3813*8485SPeter.Memishian@Sun.COM if (ipif == NULL && IS_UNDER_IPMP(ill)) { 3814*8485SPeter.Memishian@Sun.COM if ((ill = ipmp_ill_hold_ipmp_ill(ill)) != NULL) { 3815*8485SPeter.Memishian@Sun.COM ipif = ipif_lookup_addr_exact_v6(v6addrp, ill, ipst); 3816*8485SPeter.Memishian@Sun.COM ill_refrele(ill); 3817*8485SPeter.Memishian@Sun.COM } 3818*8485SPeter.Memishian@Sun.COM } 3819*8485SPeter.Memishian@Sun.COM return (ipif); 3820*8485SPeter.Memishian@Sun.COM } 3821