1*1676Sjpk /* 2*1676Sjpk * CDDL HEADER START 3*1676Sjpk * 4*1676Sjpk * The contents of this file are subject to the terms of the 5*1676Sjpk * Common Development and Distribution License (the "License"). 6*1676Sjpk * You may not use this file except in compliance with the License. 7*1676Sjpk * 8*1676Sjpk * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*1676Sjpk * or http://www.opensolaris.org/os/licensing. 10*1676Sjpk * See the License for the specific language governing permissions 11*1676Sjpk * and limitations under the License. 12*1676Sjpk * 13*1676Sjpk * When distributing Covered Code, include this CDDL HEADER in each 14*1676Sjpk * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*1676Sjpk * If applicable, add the following below this CDDL HEADER, with the 16*1676Sjpk * fields enclosed by brackets "[]" replaced with your own identifying 17*1676Sjpk * information: Portions Copyright [yyyy] [name of copyright owner] 18*1676Sjpk * 19*1676Sjpk * CDDL HEADER END 20*1676Sjpk */ 21*1676Sjpk /* 22*1676Sjpk * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23*1676Sjpk * Use is subject to license terms. 24*1676Sjpk */ 25*1676Sjpk 26*1676Sjpk #pragma ident "%Z%%M% %I% %E% SMI" 27*1676Sjpk 28*1676Sjpk #include <sys/types.h> 29*1676Sjpk #include <sys/stream.h> 30*1676Sjpk #include <sys/strsubr.h> 31*1676Sjpk #include <sys/stropts.h> 32*1676Sjpk #include <sys/sunddi.h> 33*1676Sjpk #include <sys/cred.h> 34*1676Sjpk #include <sys/debug.h> 35*1676Sjpk #include <sys/kmem.h> 36*1676Sjpk #include <sys/errno.h> 37*1676Sjpk #include <sys/disp.h> 38*1676Sjpk #include <netinet/in.h> 39*1676Sjpk #include <netinet/in_systm.h> 40*1676Sjpk #include <netinet/ip.h> 41*1676Sjpk #include <netinet/ip_icmp.h> 42*1676Sjpk #include <netinet/tcp.h> 43*1676Sjpk #include <inet/common.h> 44*1676Sjpk #include <inet/ipclassifier.h> 45*1676Sjpk #include <inet/ip.h> 46*1676Sjpk #include <inet/mib2.h> 47*1676Sjpk #include <inet/nd.h> 48*1676Sjpk #include <inet/tcp.h> 49*1676Sjpk #include <inet/ip_rts.h> 50*1676Sjpk #include <inet/ip_ire.h> 51*1676Sjpk #include <inet/ip_if.h> 52*1676Sjpk #include <sys/modhash.h> 53*1676Sjpk 54*1676Sjpk #include <sys/tsol/label.h> 55*1676Sjpk #include <sys/tsol/label_macro.h> 56*1676Sjpk #include <sys/tsol/tnet.h> 57*1676Sjpk #include <sys/tsol/tndb.h> 58*1676Sjpk #include <sys/strsun.h> 59*1676Sjpk 60*1676Sjpk /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */ 61*1676Sjpk int tsol_strict_error; 62*1676Sjpk 63*1676Sjpk /* 64*1676Sjpk * Some notes on the Trusted Solaris IRE gateway security attributes: 65*1676Sjpk * 66*1676Sjpk * When running in Trusted mode, the routing subsystem determines whether or 67*1676Sjpk * not a packet can be delivered to an off-link host (not directly reachable 68*1676Sjpk * through an interface) based on the accreditation checks of the packet's 69*1676Sjpk * security attributes against those associated with the next-hop gateway. 70*1676Sjpk * 71*1676Sjpk * The next-hop gateway's security attributes can be derived from two sources 72*1676Sjpk * (in order of preference): route-related and the host database. A Trusted 73*1676Sjpk * system must be configured with at least the host database containing an 74*1676Sjpk * entry for the next-hop gateway, or otherwise no accreditation checks can 75*1676Sjpk * be performed, which may result in the inability to send packets to any 76*1676Sjpk * off-link destination host. 77*1676Sjpk * 78*1676Sjpk * The major differences between the two sources are the number and type of 79*1676Sjpk * security attributes used for accreditation checks. A host database entry 80*1676Sjpk * can contain at most one set of security attributes, specific only to the 81*1676Sjpk * next-hop gateway. On contrast, route-related security attributes are made 82*1676Sjpk * up of a collection of security attributes for the distant networks, and 83*1676Sjpk * are grouped together per next-hop gateway used to reach those networks. 84*1676Sjpk * This is the preferred method, and the routing subsystem will fallback to 85*1676Sjpk * the host database entry only if there are no route-related attributes 86*1676Sjpk * associated with the next-hop gateway. 87*1676Sjpk * 88*1676Sjpk * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/ 89*1676Sjpk * INTERFACE type) are initialized to contain a placeholder to store this 90*1676Sjpk * information. The ire_gw_secattr structure gets allocated, initialized 91*1676Sjpk * and associated with the IRE during the time of the IRE creation. The 92*1676Sjpk * initialization process also includes resolving the host database entry 93*1676Sjpk * of the next-hop gateway for fallback purposes. It does not include any 94*1676Sjpk * route-related attribute setup, as that process comes separately as part 95*1676Sjpk * of the route requests (add/change) made to the routing subsystem. 96*1676Sjpk * 97*1676Sjpk * The underlying logic which involves associating IREs with the gateway 98*1676Sjpk * security attributes are represented by the following data structures: 99*1676Sjpk * 100*1676Sjpk * tsol_gcdb_t, or "gcdb" 101*1676Sjpk * 102*1676Sjpk * - This is a system-wide collection of records containing the 103*1676Sjpk * currently used route-related security attributes, which are fed 104*1676Sjpk * through the routing socket interface, e.g. "route add/change". 105*1676Sjpk * 106*1676Sjpk * tsol_gc_t, or "gc" 107*1676Sjpk * 108*1676Sjpk * - This is the gateway credential structure, and it provides for the 109*1676Sjpk * only mechanism to access the contents of gcdb. More than one gc 110*1676Sjpk * entries may refer to the same gcdb record. gc's in the system are 111*1676Sjpk * grouped according to the next-hop gateway address. 112*1676Sjpk * 113*1676Sjpk * tsol_gcgrp_t, or "gcgrp" 114*1676Sjpk * 115*1676Sjpk * - Group of gateway credentials, and is unique per next-hop gateway 116*1676Sjpk * address. When the group is not empty, i.e. when gcgrp_count is 117*1676Sjpk * greater than zero, it contains one or more gc's, each pointing to 118*1676Sjpk * a gcdb record which indicates the gateway security attributes 119*1676Sjpk * associated with the next-hop gateway. 120*1676Sjpk * 121*1676Sjpk * The fields of the tsol_ire_gw_secattr_t used from within the IRE are: 122*1676Sjpk * 123*1676Sjpk * igsa_lock 124*1676Sjpk * 125*1676Sjpk * - Lock that protects all fields within tsol_ire_gw_secattr_t. 126*1676Sjpk * 127*1676Sjpk * igsa_rhc 128*1676Sjpk * 129*1676Sjpk * - Remote host cache database entry of next-hop gateway. This is 130*1676Sjpk * used in the case when there are no route-related attributes 131*1676Sjpk * configured for the IRE. 132*1676Sjpk * 133*1676Sjpk * igsa_gc 134*1676Sjpk * 135*1676Sjpk * - A set of route-related attributes that only get set for prefix 136*1676Sjpk * IREs. If this is non-NULL, the prefix IRE has been associated 137*1676Sjpk * with a set of gateway security attributes by way of route add/ 138*1676Sjpk * change functionality. This field stays NULL for IRE_CACHEs. 139*1676Sjpk * 140*1676Sjpk * igsa_gcgrp 141*1676Sjpk * 142*1676Sjpk * - Group of gc's which only gets set for IRE_CACHEs. Each of the gc 143*1676Sjpk * points to a gcdb record that contains the security attributes 144*1676Sjpk * used to perform the credential checks of the packet which uses 145*1676Sjpk * the IRE. If the group is not empty, the list of gc's can be 146*1676Sjpk * traversed starting at gcgrp_head. This field stays NULL for 147*1676Sjpk * prefix IREs. 148*1676Sjpk */ 149*1676Sjpk 150*1676Sjpk static kmem_cache_t *ire_gw_secattr_cache; 151*1676Sjpk 152*1676Sjpk #define GCDB_HASH_SIZE 101 153*1676Sjpk #define GCGRP_HASH_SIZE 101 154*1676Sjpk 155*1676Sjpk #define GCDB_REFRELE(p) { \ 156*1676Sjpk mutex_enter(&gcdb_lock); \ 157*1676Sjpk ASSERT((p)->gcdb_refcnt > 0); \ 158*1676Sjpk if (--((p)->gcdb_refcnt) == 0) \ 159*1676Sjpk gcdb_inactive(p); \ 160*1676Sjpk ASSERT(MUTEX_HELD(&gcdb_lock)); \ 161*1676Sjpk mutex_exit(&gcdb_lock); \ 162*1676Sjpk } 163*1676Sjpk 164*1676Sjpk static int gcdb_hash_size = GCDB_HASH_SIZE; 165*1676Sjpk static int gcgrp_hash_size = GCGRP_HASH_SIZE; 166*1676Sjpk static mod_hash_t *gcdb_hash; 167*1676Sjpk static mod_hash_t *gcgrp4_hash; 168*1676Sjpk static mod_hash_t *gcgrp6_hash; 169*1676Sjpk 170*1676Sjpk static kmutex_t gcdb_lock; 171*1676Sjpk kmutex_t gcgrp_lock; 172*1676Sjpk 173*1676Sjpk static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t); 174*1676Sjpk static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t); 175*1676Sjpk static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t); 176*1676Sjpk static void gcdb_inactive(tsol_gcdb_t *); 177*1676Sjpk 178*1676Sjpk static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t); 179*1676Sjpk static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t); 180*1676Sjpk 181*1676Sjpk static int ire_gw_secattr_constructor(void *, void *, int); 182*1676Sjpk static void ire_gw_secattr_destructor(void *, void *); 183*1676Sjpk 184*1676Sjpk void 185*1676Sjpk tnet_init(void) 186*1676Sjpk { 187*1676Sjpk ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache", 188*1676Sjpk sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor, 189*1676Sjpk ire_gw_secattr_destructor, NULL, NULL, NULL, 0); 190*1676Sjpk 191*1676Sjpk gcdb_hash = mod_hash_create_extended("gcdb_hash", 192*1676Sjpk gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 193*1676Sjpk gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP); 194*1676Sjpk 195*1676Sjpk gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash", 196*1676Sjpk gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 197*1676Sjpk gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP); 198*1676Sjpk 199*1676Sjpk gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash", 200*1676Sjpk gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 201*1676Sjpk gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP); 202*1676Sjpk 203*1676Sjpk mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL); 204*1676Sjpk mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL); 205*1676Sjpk } 206*1676Sjpk 207*1676Sjpk void 208*1676Sjpk tnet_fini(void) 209*1676Sjpk { 210*1676Sjpk kmem_cache_destroy(ire_gw_secattr_cache); 211*1676Sjpk mod_hash_destroy_hash(gcdb_hash); 212*1676Sjpk mod_hash_destroy_hash(gcgrp4_hash); 213*1676Sjpk mod_hash_destroy_hash(gcgrp6_hash); 214*1676Sjpk mutex_destroy(&gcdb_lock); 215*1676Sjpk mutex_destroy(&gcgrp_lock); 216*1676Sjpk } 217*1676Sjpk 218*1676Sjpk /* ARGSUSED */ 219*1676Sjpk static int 220*1676Sjpk ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags) 221*1676Sjpk { 222*1676Sjpk tsol_ire_gw_secattr_t *attrp = buf; 223*1676Sjpk 224*1676Sjpk mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL); 225*1676Sjpk 226*1676Sjpk attrp->igsa_rhc = NULL; 227*1676Sjpk attrp->igsa_gc = NULL; 228*1676Sjpk attrp->igsa_gcgrp = NULL; 229*1676Sjpk 230*1676Sjpk return (0); 231*1676Sjpk } 232*1676Sjpk 233*1676Sjpk /* ARGSUSED */ 234*1676Sjpk static void 235*1676Sjpk ire_gw_secattr_destructor(void *buf, void *cdrarg) 236*1676Sjpk { 237*1676Sjpk tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf; 238*1676Sjpk 239*1676Sjpk mutex_destroy(&attrp->igsa_lock); 240*1676Sjpk } 241*1676Sjpk 242*1676Sjpk tsol_ire_gw_secattr_t * 243*1676Sjpk ire_gw_secattr_alloc(int kmflags) 244*1676Sjpk { 245*1676Sjpk return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags)); 246*1676Sjpk } 247*1676Sjpk 248*1676Sjpk void 249*1676Sjpk ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp) 250*1676Sjpk { 251*1676Sjpk ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock)); 252*1676Sjpk 253*1676Sjpk if (attrp->igsa_rhc != NULL) { 254*1676Sjpk TNRHC_RELE(attrp->igsa_rhc); 255*1676Sjpk attrp->igsa_rhc = NULL; 256*1676Sjpk } 257*1676Sjpk 258*1676Sjpk if (attrp->igsa_gc != NULL) { 259*1676Sjpk GC_REFRELE(attrp->igsa_gc); 260*1676Sjpk attrp->igsa_gc = NULL; 261*1676Sjpk } 262*1676Sjpk if (attrp->igsa_gcgrp != NULL) { 263*1676Sjpk GCGRP_REFRELE(attrp->igsa_gcgrp); 264*1676Sjpk attrp->igsa_gcgrp = NULL; 265*1676Sjpk } 266*1676Sjpk 267*1676Sjpk ASSERT(attrp->igsa_rhc == NULL); 268*1676Sjpk ASSERT(attrp->igsa_gc == NULL); 269*1676Sjpk ASSERT(attrp->igsa_gcgrp == NULL); 270*1676Sjpk 271*1676Sjpk kmem_cache_free(ire_gw_secattr_cache, attrp); 272*1676Sjpk } 273*1676Sjpk 274*1676Sjpk /* ARGSUSED */ 275*1676Sjpk static uint_t 276*1676Sjpk gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key) 277*1676Sjpk { 278*1676Sjpk const struct rtsa_s *rp = (struct rtsa_s *)key; 279*1676Sjpk const uint32_t *up, *ue; 280*1676Sjpk uint_t hash; 281*1676Sjpk int i; 282*1676Sjpk 283*1676Sjpk ASSERT(rp != NULL); 284*1676Sjpk 285*1676Sjpk /* See comments in hash_bylabel in zone.c for details */ 286*1676Sjpk hash = rp->rtsa_doi + (rp->rtsa_doi << 1); 287*1676Sjpk up = (const uint32_t *)&rp->rtsa_slrange; 288*1676Sjpk ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up); 289*1676Sjpk i = 1; 290*1676Sjpk while (up < ue) { 291*1676Sjpk /* using 2^n + 1, 1 <= n <= 16 as source of many primes */ 292*1676Sjpk hash += *up + (*up << ((i % 16) + 1)); 293*1676Sjpk up++; 294*1676Sjpk i++; 295*1676Sjpk } 296*1676Sjpk return (hash); 297*1676Sjpk } 298*1676Sjpk 299*1676Sjpk static int 300*1676Sjpk gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 301*1676Sjpk { 302*1676Sjpk struct rtsa_s *rp1 = (struct rtsa_s *)key1; 303*1676Sjpk struct rtsa_s *rp2 = (struct rtsa_s *)key2; 304*1676Sjpk 305*1676Sjpk ASSERT(rp1 != NULL && rp2 != NULL); 306*1676Sjpk 307*1676Sjpk if (blequal(&rp1->rtsa_slrange.lower_bound, 308*1676Sjpk &rp2->rtsa_slrange.lower_bound) && 309*1676Sjpk blequal(&rp1->rtsa_slrange.upper_bound, 310*1676Sjpk &rp2->rtsa_slrange.upper_bound) && 311*1676Sjpk rp1->rtsa_doi == rp2->rtsa_doi) 312*1676Sjpk return (0); 313*1676Sjpk 314*1676Sjpk /* No match; not found */ 315*1676Sjpk return (-1); 316*1676Sjpk } 317*1676Sjpk 318*1676Sjpk /* ARGSUSED */ 319*1676Sjpk static uint_t 320*1676Sjpk gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key) 321*1676Sjpk { 322*1676Sjpk tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key; 323*1676Sjpk uint_t idx = 0; 324*1676Sjpk uint32_t *ap; 325*1676Sjpk 326*1676Sjpk ASSERT(ga != NULL); 327*1676Sjpk ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 328*1676Sjpk 329*1676Sjpk ap = (uint32_t *)&ga->ga_addr.s6_addr32[0]; 330*1676Sjpk idx ^= *ap++; 331*1676Sjpk idx ^= *ap++; 332*1676Sjpk idx ^= *ap++; 333*1676Sjpk idx ^= *ap; 334*1676Sjpk 335*1676Sjpk return (idx); 336*1676Sjpk } 337*1676Sjpk 338*1676Sjpk static int 339*1676Sjpk gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 340*1676Sjpk { 341*1676Sjpk tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1; 342*1676Sjpk tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2; 343*1676Sjpk 344*1676Sjpk ASSERT(ga1 != NULL && ga2 != NULL); 345*1676Sjpk 346*1676Sjpk /* Address family must match */ 347*1676Sjpk if (ga1->ga_af != ga2->ga_af) 348*1676Sjpk return (-1); 349*1676Sjpk 350*1676Sjpk if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] && 351*1676Sjpk ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] && 352*1676Sjpk ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] && 353*1676Sjpk ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3]) 354*1676Sjpk return (0); 355*1676Sjpk 356*1676Sjpk /* No match; not found */ 357*1676Sjpk return (-1); 358*1676Sjpk } 359*1676Sjpk 360*1676Sjpk #define RTSAFLAGS "\20\11cipso\3doi\2max_sl\1min_sl" 361*1676Sjpk 362*1676Sjpk int 363*1676Sjpk rtsa_validate(const struct rtsa_s *rp) 364*1676Sjpk { 365*1676Sjpk uint32_t mask = rp->rtsa_mask; 366*1676Sjpk 367*1676Sjpk /* RTSA_CIPSO must be set, and DOI must not be zero */ 368*1676Sjpk if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) { 369*1676Sjpk DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *, 370*1676Sjpk "rtsa(1) lacks flag or has 0 doi.", 371*1676Sjpk rtsa_s *, rp); 372*1676Sjpk return (EINVAL); 373*1676Sjpk } 374*1676Sjpk /* 375*1676Sjpk * SL range must be specified, and it must have its 376*1676Sjpk * upper bound dominating its lower bound. 377*1676Sjpk */ 378*1676Sjpk if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE || 379*1676Sjpk !bldominates(&rp->rtsa_slrange.upper_bound, 380*1676Sjpk &rp->rtsa_slrange.lower_bound)) { 381*1676Sjpk DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *, 382*1676Sjpk "rtsa(1) min_sl and max_sl not set or max_sl is " 383*1676Sjpk "not dominating.", rtsa_s *, rp); 384*1676Sjpk return (EINVAL); 385*1676Sjpk } 386*1676Sjpk return (0); 387*1676Sjpk } 388*1676Sjpk 389*1676Sjpk /* 390*1676Sjpk * A brief explanation of the reference counting scheme: 391*1676Sjpk * 392*1676Sjpk * Prefix IREs have a non-NULL igsa_gc and a NULL igsa_gcgrp; 393*1676Sjpk * IRE_CACHEs have it vice-versa. 394*1676Sjpk * 395*1676Sjpk * Apart from dynamic references due to to reference holds done 396*1676Sjpk * actively by threads, we have the following references: 397*1676Sjpk * 398*1676Sjpk * gcdb_refcnt: 399*1676Sjpk * - Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference 400*1676Sjpk * to the gcdb_refcnt. 401*1676Sjpk * 402*1676Sjpk * gc_refcnt: 403*1676Sjpk * - A prefix IRE that points to an igsa_gc contributes a reference 404*1676Sjpk * to the gc_refcnt. 405*1676Sjpk * 406*1676Sjpk * gcgrp_refcnt: 407*1676Sjpk * - An IRE_CACHE that points to an igsa_gcgrp contributes a reference 408*1676Sjpk * to the gcgrp_refcnt of the associated tsol_gcgrp_t. 409*1676Sjpk * - Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes 410*1676Sjpk * a reference to the gcgrp_refcnt. 411*1676Sjpk */ 412*1676Sjpk static tsol_gcdb_t * 413*1676Sjpk gcdb_lookup(struct rtsa_s *rp, boolean_t alloc) 414*1676Sjpk { 415*1676Sjpk tsol_gcdb_t *gcdb = NULL; 416*1676Sjpk 417*1676Sjpk if (rtsa_validate(rp) != 0) 418*1676Sjpk return (NULL); 419*1676Sjpk 420*1676Sjpk mutex_enter(&gcdb_lock); 421*1676Sjpk /* Find a copy in the cache; otherwise, create one and cache it */ 422*1676Sjpk if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp, 423*1676Sjpk (mod_hash_val_t *)&gcdb) == 0) { 424*1676Sjpk gcdb->gcdb_refcnt++; 425*1676Sjpk ASSERT(gcdb->gcdb_refcnt != 0); 426*1676Sjpk 427*1676Sjpk DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *, 428*1676Sjpk "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb); 429*1676Sjpk } else if (alloc) { 430*1676Sjpk gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP); 431*1676Sjpk if (gcdb != NULL) { 432*1676Sjpk gcdb->gcdb_refcnt = 1; 433*1676Sjpk gcdb->gcdb_mask = rp->rtsa_mask; 434*1676Sjpk gcdb->gcdb_doi = rp->rtsa_doi; 435*1676Sjpk gcdb->gcdb_slrange = rp->rtsa_slrange; 436*1676Sjpk 437*1676Sjpk if (mod_hash_insert(gcdb_hash, 438*1676Sjpk (mod_hash_key_t)&gcdb->gcdb_attr, 439*1676Sjpk (mod_hash_val_t)gcdb) != 0) { 440*1676Sjpk mutex_exit(&gcdb_lock); 441*1676Sjpk kmem_free(gcdb, sizeof (*gcdb)); 442*1676Sjpk return (NULL); 443*1676Sjpk } 444*1676Sjpk 445*1676Sjpk DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *, 446*1676Sjpk "gcdb(1) inserted in gcdb_hash(global)", 447*1676Sjpk tsol_gcdb_t *, gcdb); 448*1676Sjpk } 449*1676Sjpk } 450*1676Sjpk mutex_exit(&gcdb_lock); 451*1676Sjpk return (gcdb); 452*1676Sjpk } 453*1676Sjpk 454*1676Sjpk static void 455*1676Sjpk gcdb_inactive(tsol_gcdb_t *gcdb) 456*1676Sjpk { 457*1676Sjpk ASSERT(MUTEX_HELD(&gcdb_lock)); 458*1676Sjpk ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0); 459*1676Sjpk 460*1676Sjpk (void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr, 461*1676Sjpk (mod_hash_val_t *)&gcdb); 462*1676Sjpk 463*1676Sjpk DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *, 464*1676Sjpk "gcdb(1) removed from gcdb_hash(global)", 465*1676Sjpk tsol_gcdb_t *, gcdb); 466*1676Sjpk kmem_free(gcdb, sizeof (*gcdb)); 467*1676Sjpk } 468*1676Sjpk 469*1676Sjpk tsol_gc_t * 470*1676Sjpk gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp) 471*1676Sjpk { 472*1676Sjpk tsol_gc_t *gc; 473*1676Sjpk tsol_gcdb_t *gcdb; 474*1676Sjpk 475*1676Sjpk *gcgrp_xtrarefp = B_TRUE; 476*1676Sjpk 477*1676Sjpk rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER); 478*1676Sjpk if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) { 479*1676Sjpk rw_exit(&gcgrp->gcgrp_rwlock); 480*1676Sjpk return (NULL); 481*1676Sjpk } 482*1676Sjpk 483*1676Sjpk for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) { 484*1676Sjpk if (gc->gc_db == gcdb) { 485*1676Sjpk ASSERT(gc->gc_grp == gcgrp); 486*1676Sjpk 487*1676Sjpk gc->gc_refcnt++; 488*1676Sjpk ASSERT(gc->gc_refcnt != 0); 489*1676Sjpk 490*1676Sjpk GCDB_REFRELE(gcdb); 491*1676Sjpk 492*1676Sjpk DTRACE_PROBE3(tx__gcdb__log__info__gc__create, 493*1676Sjpk char *, "found gc(1) in gcgrp(2)", 494*1676Sjpk tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp); 495*1676Sjpk rw_exit(&gcgrp->gcgrp_rwlock); 496*1676Sjpk return (gc); 497*1676Sjpk } 498*1676Sjpk } 499*1676Sjpk 500*1676Sjpk gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP); 501*1676Sjpk if (gc != NULL) { 502*1676Sjpk if (gcgrp->gcgrp_head == NULL) { 503*1676Sjpk gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc; 504*1676Sjpk } else { 505*1676Sjpk gcgrp->gcgrp_tail->gc_next = gc; 506*1676Sjpk gc->gc_prev = gcgrp->gcgrp_tail; 507*1676Sjpk gcgrp->gcgrp_tail = gc; 508*1676Sjpk } 509*1676Sjpk gcgrp->gcgrp_count++; 510*1676Sjpk ASSERT(gcgrp->gcgrp_count != 0); 511*1676Sjpk 512*1676Sjpk /* caller has incremented gcgrp reference for us */ 513*1676Sjpk gc->gc_grp = gcgrp; 514*1676Sjpk 515*1676Sjpk gc->gc_db = gcdb; 516*1676Sjpk gc->gc_refcnt = 1; 517*1676Sjpk 518*1676Sjpk DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *, 519*1676Sjpk "added gc(1) to gcgrp(2)", tsol_gc_t *, gc, 520*1676Sjpk tsol_gcgrp_t *, gcgrp); 521*1676Sjpk 522*1676Sjpk *gcgrp_xtrarefp = B_FALSE; 523*1676Sjpk } 524*1676Sjpk rw_exit(&gcgrp->gcgrp_rwlock); 525*1676Sjpk 526*1676Sjpk return (gc); 527*1676Sjpk } 528*1676Sjpk 529*1676Sjpk void 530*1676Sjpk gc_inactive(tsol_gc_t *gc) 531*1676Sjpk { 532*1676Sjpk tsol_gcgrp_t *gcgrp = gc->gc_grp; 533*1676Sjpk 534*1676Sjpk ASSERT(gcgrp != NULL); 535*1676Sjpk ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock)); 536*1676Sjpk ASSERT(gc->gc_refcnt == 0); 537*1676Sjpk 538*1676Sjpk if (gc->gc_prev != NULL) 539*1676Sjpk gc->gc_prev->gc_next = gc->gc_next; 540*1676Sjpk else 541*1676Sjpk gcgrp->gcgrp_head = gc->gc_next; 542*1676Sjpk if (gc->gc_next != NULL) 543*1676Sjpk gc->gc_next->gc_prev = gc->gc_prev; 544*1676Sjpk else 545*1676Sjpk gcgrp->gcgrp_tail = gc->gc_prev; 546*1676Sjpk ASSERT(gcgrp->gcgrp_count > 0); 547*1676Sjpk gcgrp->gcgrp_count--; 548*1676Sjpk 549*1676Sjpk /* drop lock before it's destroyed */ 550*1676Sjpk rw_exit(&gcgrp->gcgrp_rwlock); 551*1676Sjpk 552*1676Sjpk DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *, 553*1676Sjpk "removed inactive gc(1) from gcgrp(2)", 554*1676Sjpk tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp); 555*1676Sjpk 556*1676Sjpk GCGRP_REFRELE(gcgrp); 557*1676Sjpk 558*1676Sjpk gc->gc_grp = NULL; 559*1676Sjpk gc->gc_prev = gc->gc_next = NULL; 560*1676Sjpk 561*1676Sjpk if (gc->gc_db != NULL) 562*1676Sjpk GCDB_REFRELE(gc->gc_db); 563*1676Sjpk 564*1676Sjpk kmem_free(gc, sizeof (*gc)); 565*1676Sjpk } 566*1676Sjpk 567*1676Sjpk tsol_gcgrp_t * 568*1676Sjpk gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc) 569*1676Sjpk { 570*1676Sjpk tsol_gcgrp_t *gcgrp = NULL; 571*1676Sjpk mod_hash_t *hashp; 572*1676Sjpk 573*1676Sjpk ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 574*1676Sjpk 575*1676Sjpk hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash; 576*1676Sjpk 577*1676Sjpk mutex_enter(&gcgrp_lock); 578*1676Sjpk if (mod_hash_find(hashp, (mod_hash_key_t)ga, 579*1676Sjpk (mod_hash_val_t *)&gcgrp) == 0) { 580*1676Sjpk gcgrp->gcgrp_refcnt++; 581*1676Sjpk ASSERT(gcgrp->gcgrp_refcnt != 0); 582*1676Sjpk 583*1676Sjpk DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *, 584*1676Sjpk "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp, 585*1676Sjpk mod_hash_t *, hashp); 586*1676Sjpk 587*1676Sjpk } else if (alloc) { 588*1676Sjpk gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP); 589*1676Sjpk if (gcgrp != NULL) { 590*1676Sjpk gcgrp->gcgrp_refcnt = 1; 591*1676Sjpk rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL); 592*1676Sjpk bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga)); 593*1676Sjpk 594*1676Sjpk if (mod_hash_insert(hashp, 595*1676Sjpk (mod_hash_key_t)&gcgrp->gcgrp_addr, 596*1676Sjpk (mod_hash_val_t)gcgrp) != 0) { 597*1676Sjpk mutex_exit(&gcgrp_lock); 598*1676Sjpk kmem_free(gcgrp, sizeof (*gcgrp)); 599*1676Sjpk return (NULL); 600*1676Sjpk } 601*1676Sjpk 602*1676Sjpk DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert, 603*1676Sjpk char *, "inserted gcgrp(1) in hash(2)", 604*1676Sjpk tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp); 605*1676Sjpk } 606*1676Sjpk } 607*1676Sjpk mutex_exit(&gcgrp_lock); 608*1676Sjpk return (gcgrp); 609*1676Sjpk } 610*1676Sjpk 611*1676Sjpk void 612*1676Sjpk gcgrp_inactive(tsol_gcgrp_t *gcgrp) 613*1676Sjpk { 614*1676Sjpk tsol_gcgrp_addr_t *ga; 615*1676Sjpk mod_hash_t *hashp; 616*1676Sjpk 617*1676Sjpk ASSERT(MUTEX_HELD(&gcgrp_lock)); 618*1676Sjpk ASSERT(!RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)); 619*1676Sjpk ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0); 620*1676Sjpk ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0); 621*1676Sjpk 622*1676Sjpk ga = &gcgrp->gcgrp_addr; 623*1676Sjpk ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 624*1676Sjpk 625*1676Sjpk hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash; 626*1676Sjpk (void) mod_hash_remove(hashp, (mod_hash_key_t)ga, 627*1676Sjpk (mod_hash_val_t *)&gcgrp); 628*1676Sjpk rw_destroy(&gcgrp->gcgrp_rwlock); 629*1676Sjpk 630*1676Sjpk DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *, 631*1676Sjpk "removed inactive gcgrp(1) from hash(2)", 632*1676Sjpk tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp); 633*1676Sjpk 634*1676Sjpk kmem_free(gcgrp, sizeof (*gcgrp)); 635*1676Sjpk } 636*1676Sjpk 637*1676Sjpk /* 638*1676Sjpk * Converts CIPSO option to sensitivity label. 639*1676Sjpk * Validity checks based on restrictions defined in 640*1676Sjpk * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity) 641*1676Sjpk */ 642*1676Sjpk static boolean_t 643*1676Sjpk cipso_to_sl(const uchar_t *option, bslabel_t *sl) 644*1676Sjpk { 645*1676Sjpk const struct cipso_option *co = (const struct cipso_option *)option; 646*1676Sjpk const struct cipso_tag_type_1 *tt1; 647*1676Sjpk 648*1676Sjpk tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0]; 649*1676Sjpk if (tt1->tag_type != 1 || 650*1676Sjpk tt1->tag_length < TSOL_TT1_MIN_LENGTH || 651*1676Sjpk tt1->tag_length > TSOL_TT1_MAX_LENGTH || 652*1676Sjpk tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length) 653*1676Sjpk return (B_FALSE); 654*1676Sjpk 655*1676Sjpk bsllow(sl); /* assumed: sets compartments to all zeroes */ 656*1676Sjpk LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl); 657*1676Sjpk bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments, 658*1676Sjpk tt1->tag_length - TSOL_TT1_MIN_LENGTH); 659*1676Sjpk return (B_TRUE); 660*1676Sjpk } 661*1676Sjpk 662*1676Sjpk /* 663*1676Sjpk * Parse the CIPSO label in the incoming packet and construct a ts_label_t 664*1676Sjpk * that reflects the CIPSO label and attach it to the dblk cred. Later as 665*1676Sjpk * the mblk flows up through the stack any code that needs to examine the 666*1676Sjpk * packet label can inspect the label from the dblk cred. This function is 667*1676Sjpk * called right in ip_rput for all packets, i.e. locally destined and 668*1676Sjpk * to be forwarded packets. The forwarding path needs to examine the label 669*1676Sjpk * to determine how to forward the packet. 670*1676Sjpk * 671*1676Sjpk * For IPv4, IP header options have been pulled up, but other headers might not 672*1676Sjpk * have been. For IPv6, any hop-by-hop options have been pulled up, but any 673*1676Sjpk * other headers might not be present. 674*1676Sjpk */ 675*1676Sjpk boolean_t 676*1676Sjpk tsol_get_pkt_label(mblk_t *mp, int version) 677*1676Sjpk { 678*1676Sjpk tsol_tpc_t *src_rhtp; 679*1676Sjpk uchar_t *opt_ptr = NULL; 680*1676Sjpk const ipha_t *ipha; 681*1676Sjpk bslabel_t sl; 682*1676Sjpk uint32_t doi; 683*1676Sjpk tsol_ip_label_t label_type; 684*1676Sjpk const cipso_option_t *co; 685*1676Sjpk const void *src; 686*1676Sjpk const ip6_t *ip6h; 687*1676Sjpk 688*1676Sjpk ASSERT(DB_TYPE(mp) == M_DATA); 689*1676Sjpk 690*1676Sjpk if (version == IPV4_VERSION) { 691*1676Sjpk ipha = (const ipha_t *)mp->b_rptr; 692*1676Sjpk src = &ipha->ipha_src; 693*1676Sjpk label_type = tsol_get_option(mp, &opt_ptr); 694*1676Sjpk } else { 695*1676Sjpk uchar_t *after_secopt; 696*1676Sjpk boolean_t hbh_needed; 697*1676Sjpk const uchar_t *ip6hbh; 698*1676Sjpk size_t optlen; 699*1676Sjpk 700*1676Sjpk label_type = OPT_NONE; 701*1676Sjpk ip6h = (const ip6_t *)mp->b_rptr; 702*1676Sjpk src = &ip6h->ip6_src; 703*1676Sjpk if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) { 704*1676Sjpk ip6hbh = (const uchar_t *)&ip6h[1]; 705*1676Sjpk optlen = (ip6hbh[1] + 1) << 3; 706*1676Sjpk ASSERT(ip6hbh + optlen <= mp->b_wptr); 707*1676Sjpk opt_ptr = tsol_find_secopt_v6(ip6hbh, optlen, 708*1676Sjpk &after_secopt, &hbh_needed); 709*1676Sjpk /* tsol_find_secopt_v6 guarantees some sanity */ 710*1676Sjpk if (opt_ptr != NULL && 711*1676Sjpk (optlen = opt_ptr[1]) >= 8) { 712*1676Sjpk opt_ptr += 2; 713*1676Sjpk bcopy(opt_ptr, &doi, sizeof (doi)); 714*1676Sjpk doi = ntohl(doi); 715*1676Sjpk if (doi == IP6LS_DOI_V4 && 716*1676Sjpk opt_ptr[4] == IP6LS_TT_V4 && 717*1676Sjpk opt_ptr[5] <= optlen - 4 && 718*1676Sjpk opt_ptr[7] <= optlen - 6) { 719*1676Sjpk opt_ptr += sizeof (doi) + 2; 720*1676Sjpk label_type = OPT_CIPSO; 721*1676Sjpk } 722*1676Sjpk } 723*1676Sjpk } 724*1676Sjpk } 725*1676Sjpk 726*1676Sjpk switch (label_type) { 727*1676Sjpk case OPT_CIPSO: 728*1676Sjpk /* 729*1676Sjpk * Convert the CIPSO label to the internal format 730*1676Sjpk * and attach it to the dblk cred. 731*1676Sjpk * Validity checks based on restrictions defined in 732*1676Sjpk * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) 733*1676Sjpk * (draft-ietf-cipso-ipsecurity) 734*1676Sjpk */ 735*1676Sjpk if (version == IPV6_VERSION && ip6opt_ls == 0) 736*1676Sjpk return (B_FALSE); 737*1676Sjpk co = (const struct cipso_option *)opt_ptr; 738*1676Sjpk if ((co->cipso_length < 739*1676Sjpk TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) || 740*1676Sjpk (co->cipso_length > IP_MAX_OPT_LENGTH)) 741*1676Sjpk return (B_FALSE); 742*1676Sjpk bcopy(co->cipso_doi, &doi, sizeof (doi)); 743*1676Sjpk doi = ntohl(doi); 744*1676Sjpk if (!cipso_to_sl(opt_ptr, &sl)) 745*1676Sjpk return (B_FALSE); 746*1676Sjpk setbltype(&sl, SUN_SL_ID); 747*1676Sjpk break; 748*1676Sjpk 749*1676Sjpk case OPT_NONE: 750*1676Sjpk /* 751*1676Sjpk * Handle special cases that are not currently labeled, even 752*1676Sjpk * though the sending system may otherwise be configured as 753*1676Sjpk * labeled. 754*1676Sjpk * - IGMP 755*1676Sjpk * - IPv4 ICMP Router Discovery 756*1676Sjpk * - IPv6 Neighbor Discovery 757*1676Sjpk */ 758*1676Sjpk if (version == IPV4_VERSION) { 759*1676Sjpk if (ipha->ipha_protocol == IPPROTO_IGMP) 760*1676Sjpk return (B_TRUE); 761*1676Sjpk if (ipha->ipha_protocol == IPPROTO_ICMP) { 762*1676Sjpk const struct icmp *icmp = (const struct icmp *) 763*1676Sjpk (mp->b_rptr + IPH_HDR_LENGTH(ipha)); 764*1676Sjpk 765*1676Sjpk if ((uchar_t *)icmp > mp->b_wptr) { 766*1676Sjpk if (!pullupmsg(mp, 767*1676Sjpk (uchar_t *)icmp - mp->b_rptr + 1)) 768*1676Sjpk return (B_FALSE); 769*1676Sjpk icmp = (const struct icmp *) 770*1676Sjpk (mp->b_rptr + 771*1676Sjpk IPH_HDR_LENGTH(ipha)); 772*1676Sjpk } 773*1676Sjpk if (icmp->icmp_type == ICMP_ROUTERADVERT || 774*1676Sjpk icmp->icmp_type == ICMP_ROUTERSOLICIT) 775*1676Sjpk return (B_TRUE); 776*1676Sjpk } 777*1676Sjpk src = &ipha->ipha_src; 778*1676Sjpk } else { 779*1676Sjpk if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 780*1676Sjpk const icmp6_t *icmp6 = (const icmp6_t *) 781*1676Sjpk (mp->b_rptr + IPV6_HDR_LEN); 782*1676Sjpk 783*1676Sjpk if ((uchar_t *)icmp6 + ICMP6_MINLEN > 784*1676Sjpk mp->b_wptr) { 785*1676Sjpk if (!pullupmsg(mp, 786*1676Sjpk (uchar_t *)icmp6 - mp->b_rptr + 787*1676Sjpk ICMP6_MINLEN)) 788*1676Sjpk return (B_FALSE); 789*1676Sjpk icmp6 = (const icmp6_t *) 790*1676Sjpk (mp->b_rptr + IPV6_HDR_LEN); 791*1676Sjpk } 792*1676Sjpk if (icmp6->icmp6_type >= MLD_LISTENER_QUERY && 793*1676Sjpk icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE) 794*1676Sjpk return (B_TRUE); 795*1676Sjpk } 796*1676Sjpk src = &ip6h->ip6_src; 797*1676Sjpk } 798*1676Sjpk 799*1676Sjpk /* 800*1676Sjpk * Look up the tnrhtp database and get the implicit label 801*1676Sjpk * that is associated with this unlabeled host and attach 802*1676Sjpk * it to the packet. 803*1676Sjpk */ 804*1676Sjpk if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL) 805*1676Sjpk return (B_FALSE); 806*1676Sjpk 807*1676Sjpk /* If the sender is labeled, drop the unlabeled packet. */ 808*1676Sjpk if (src_rhtp->tpc_tp.host_type != UNLABELED) { 809*1676Sjpk TPC_RELE(src_rhtp); 810*1676Sjpk pr_addr_dbg("unlabeled packet forged from %s\n", 811*1676Sjpk version == IPV4_VERSION ? AF_INET : AF_INET6, src); 812*1676Sjpk return (B_FALSE); 813*1676Sjpk } 814*1676Sjpk 815*1676Sjpk sl = src_rhtp->tpc_tp.tp_def_label; 816*1676Sjpk setbltype(&sl, SUN_SL_ID); 817*1676Sjpk doi = src_rhtp->tpc_tp.tp_doi; 818*1676Sjpk TPC_RELE(src_rhtp); 819*1676Sjpk break; 820*1676Sjpk 821*1676Sjpk default: 822*1676Sjpk return (B_FALSE); 823*1676Sjpk } 824*1676Sjpk 825*1676Sjpk /* Make sure no other thread is messing with this mblk */ 826*1676Sjpk ASSERT(DB_REF(mp) == 1); 827*1676Sjpk if (DB_CRED(mp) == NULL) { 828*1676Sjpk DB_CRED(mp) = newcred_from_bslabel(&sl, doi, KM_NOSLEEP); 829*1676Sjpk if (DB_CRED(mp) == NULL) 830*1676Sjpk return (B_FALSE); 831*1676Sjpk } else { 832*1676Sjpk cred_t *newcr; 833*1676Sjpk 834*1676Sjpk newcr = copycred_from_bslabel(DB_CRED(mp), &sl, doi, 835*1676Sjpk KM_NOSLEEP); 836*1676Sjpk if (newcr == NULL) 837*1676Sjpk return (B_FALSE); 838*1676Sjpk crfree(DB_CRED(mp)); 839*1676Sjpk DB_CRED(mp) = newcr; 840*1676Sjpk } 841*1676Sjpk 842*1676Sjpk /* 843*1676Sjpk * If the source was unlabeled, then flag as such, 844*1676Sjpk * while remembering that CIPSO routers add headers. 845*1676Sjpk */ 846*1676Sjpk if (label_type == OPT_NONE) 847*1676Sjpk crgetlabel(DB_CRED(mp))->tsl_flags |= TSLF_UNLABELED; 848*1676Sjpk else if (label_type == OPT_CIPSO) { 849*1676Sjpk if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL) 850*1676Sjpk return (B_FALSE); 851*1676Sjpk if (src_rhtp->tpc_tp.host_type == UNLABELED) 852*1676Sjpk crgetlabel(DB_CRED(mp))->tsl_flags |= 853*1676Sjpk TSLF_UNLABELED; 854*1676Sjpk TPC_RELE(src_rhtp); 855*1676Sjpk } 856*1676Sjpk 857*1676Sjpk return (B_TRUE); 858*1676Sjpk } 859*1676Sjpk 860*1676Sjpk /* 861*1676Sjpk * This routine determines whether the given packet should be accepted locally. 862*1676Sjpk * It does a range/set check on the packet's label by looking up the given 863*1676Sjpk * address in the remote host database. 864*1676Sjpk */ 865*1676Sjpk boolean_t 866*1676Sjpk tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version, 867*1676Sjpk boolean_t shared_addr, const conn_t *connp) 868*1676Sjpk { 869*1676Sjpk const cred_t *credp; 870*1676Sjpk ts_label_t *plabel, *conn_plabel; 871*1676Sjpk tsol_tpc_t *tp; 872*1676Sjpk boolean_t retv; 873*1676Sjpk const bslabel_t *label, *conn_label; 874*1676Sjpk 875*1676Sjpk /* 876*1676Sjpk * The cases in which this can happen are: 877*1676Sjpk * - IPv6 Router Alert, where ip_rput_data_v6 deliberately skips 878*1676Sjpk * over the label attachment process. 879*1676Sjpk * - MLD output looped-back to ourselves. 880*1676Sjpk * - IPv4 Router Discovery, where tsol_get_pkt_label intentionally 881*1676Sjpk * avoids the labeling process. 882*1676Sjpk * We trust that all valid paths in the code set the cred pointer when 883*1676Sjpk * needed. 884*1676Sjpk */ 885*1676Sjpk if ((credp = DB_CRED(mp)) == NULL) 886*1676Sjpk return (B_TRUE); 887*1676Sjpk 888*1676Sjpk /* 889*1676Sjpk * If this packet is from the inside (not a remote host) and has the 890*1676Sjpk * same zoneid as the selected destination, then no checks are 891*1676Sjpk * necessary. Membership in the zone is enough proof. This is 892*1676Sjpk * intended to be a hot path through this function. 893*1676Sjpk */ 894*1676Sjpk if (!crisremote(credp) && 895*1676Sjpk crgetzone(credp) == crgetzone(connp->conn_cred)) 896*1676Sjpk return (B_TRUE); 897*1676Sjpk 898*1676Sjpk plabel = crgetlabel(credp); 899*1676Sjpk conn_plabel = crgetlabel(connp->conn_cred); 900*1676Sjpk ASSERT(plabel != NULL && conn_plabel != NULL); 901*1676Sjpk 902*1676Sjpk label = label2bslabel(plabel); 903*1676Sjpk conn_label = label2bslabel(crgetlabel(connp->conn_cred)); 904*1676Sjpk 905*1676Sjpk /* 906*1676Sjpk * MLPs are always validated using the range and set of the local 907*1676Sjpk * address, even when the remote host is unlabeled. 908*1676Sjpk */ 909*1676Sjpk if (connp->conn_mlp_type == mlptBoth || 910*1676Sjpk /* LINTED: no consequent */ 911*1676Sjpk connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) { 912*1676Sjpk ; 913*1676Sjpk 914*1676Sjpk /* 915*1676Sjpk * If this is a packet from an unlabeled sender, then we must apply 916*1676Sjpk * different rules. If the label is equal to the zone's label, then 917*1676Sjpk * it's allowed. If it's not equal, but the zone is either the global 918*1676Sjpk * zone or the label is dominated by the zone's label, then allow it 919*1676Sjpk * as long as it's in the range configured for the destination. 920*1676Sjpk */ 921*1676Sjpk } else if (plabel->tsl_flags & TSLF_UNLABELED) { 922*1676Sjpk if (plabel->tsl_doi == conn_plabel->tsl_doi && 923*1676Sjpk blequal(label, conn_label)) 924*1676Sjpk return (B_TRUE); 925*1676Sjpk 926*1676Sjpk if (!connp->conn_mac_exempt || 927*1676Sjpk (connp->conn_zoneid != GLOBAL_ZONEID && 928*1676Sjpk (plabel->tsl_doi != conn_plabel->tsl_doi || 929*1676Sjpk !bldominates(conn_label, label)))) { 930*1676Sjpk DTRACE_PROBE3( 931*1676Sjpk tx__ip__log__drop__receivelocal__mac_unl, 932*1676Sjpk char *, 933*1676Sjpk "unlabeled packet mp(1) fails mac for conn(2)", 934*1676Sjpk mblk_t *, mp, conn_t *, connp); 935*1676Sjpk return (B_FALSE); 936*1676Sjpk } 937*1676Sjpk 938*1676Sjpk /* 939*1676Sjpk * If this is a private address and the connection is SLP for private 940*1676Sjpk * addresses, then the only thing that matters is the label on the 941*1676Sjpk * zone, which is the same as the label on the connection. We don't 942*1676Sjpk * care (and don't have to care) about the tnrhdb. 943*1676Sjpk */ 944*1676Sjpk } else if (!shared_addr) { 945*1676Sjpk /* 946*1676Sjpk * Since this is a zone-specific address, we know that any MLP 947*1676Sjpk * case should have been handled up above. That means this 948*1676Sjpk * connection must not be MLP for zone-specific addresses. We 949*1676Sjpk * assert that to be true. 950*1676Sjpk */ 951*1676Sjpk ASSERT(connp->conn_mlp_type == mlptSingle || 952*1676Sjpk connp->conn_mlp_type == mlptShared); 953*1676Sjpk if (plabel->tsl_doi == conn_plabel->tsl_doi && 954*1676Sjpk blequal(label, conn_label)) 955*1676Sjpk return (B_TRUE); 956*1676Sjpk DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp, 957*1676Sjpk char *, "packet mp(1) fails exactly SLP match conn(2)", 958*1676Sjpk mblk_t *, mp, conn_t *, connp); 959*1676Sjpk return (B_FALSE); 960*1676Sjpk } 961*1676Sjpk 962*1676Sjpk tp = find_tpc(addr, version, B_FALSE); 963*1676Sjpk if (tp == NULL) { 964*1676Sjpk DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr, 965*1676Sjpk char *, "dropping mp(1), host(2) lacks entry", 966*1676Sjpk mblk_t *, mp, void *, addr); 967*1676Sjpk return (B_FALSE); 968*1676Sjpk } 969*1676Sjpk 970*1676Sjpk /* 971*1676Sjpk * The local host address should not be unlabeled at this point. The 972*1676Sjpk * only way this can happen is that the destination isn't unicast. We 973*1676Sjpk * assume that the packet should not have had a label, and thus should 974*1676Sjpk * have been handled by the TSLF_UNLABELED logic above. 975*1676Sjpk */ 976*1676Sjpk if (tp->tpc_tp.host_type == UNLABELED) { 977*1676Sjpk retv = B_FALSE; 978*1676Sjpk DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *, 979*1676Sjpk "mp(1) unlabeled source, but tp is not unlabeled.", 980*1676Sjpk mblk_t *, mp, tsol_tpc_t *, tp); 981*1676Sjpk 982*1676Sjpk } else if (tp->tpc_tp.host_type != SUN_CIPSO) { 983*1676Sjpk retv = B_FALSE; 984*1676Sjpk DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *, 985*1676Sjpk "delivering mp(1), found unrecognized tpc(2) type.", 986*1676Sjpk mblk_t *, mp, tsol_tpc_t *, tp); 987*1676Sjpk 988*1676Sjpk } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) { 989*1676Sjpk retv = B_FALSE; 990*1676Sjpk DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *, 991*1676Sjpk "mp(1) could not be delievered to tp(2), doi mismatch", 992*1676Sjpk mblk_t *, mp, tsol_tpc_t *, tp); 993*1676Sjpk 994*1676Sjpk } else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) && 995*1676Sjpk !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) { 996*1676Sjpk retv = B_FALSE; 997*1676Sjpk DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *, 998*1676Sjpk "mp(1) could not be delievered to tp(2), bad mac", 999*1676Sjpk mblk_t *, mp, tsol_tpc_t *, tp); 1000*1676Sjpk } else { 1001*1676Sjpk retv = B_TRUE; 1002*1676Sjpk } 1003*1676Sjpk 1004*1676Sjpk TPC_RELE(tp); 1005*1676Sjpk 1006*1676Sjpk return (retv); 1007*1676Sjpk } 1008*1676Sjpk 1009*1676Sjpk boolean_t 1010*1676Sjpk tsol_can_accept_raw(mblk_t *mp, boolean_t check_host) 1011*1676Sjpk { 1012*1676Sjpk ts_label_t *plabel = NULL; 1013*1676Sjpk tsol_tpc_t *src_rhtp, *dst_rhtp; 1014*1676Sjpk boolean_t retv; 1015*1676Sjpk 1016*1676Sjpk if (DB_CRED(mp) != NULL) 1017*1676Sjpk plabel = crgetlabel(DB_CRED(mp)); 1018*1676Sjpk 1019*1676Sjpk /* We are bootstrapping or the internal template was never deleted */ 1020*1676Sjpk if (plabel == NULL) 1021*1676Sjpk return (B_TRUE); 1022*1676Sjpk 1023*1676Sjpk if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1024*1676Sjpk ipha_t *ipha = (ipha_t *)mp->b_rptr; 1025*1676Sjpk 1026*1676Sjpk src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION, 1027*1676Sjpk B_FALSE); 1028*1676Sjpk if (src_rhtp == NULL) 1029*1676Sjpk return (B_FALSE); 1030*1676Sjpk dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, 1031*1676Sjpk B_FALSE); 1032*1676Sjpk } else { 1033*1676Sjpk ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1034*1676Sjpk 1035*1676Sjpk src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION, 1036*1676Sjpk B_FALSE); 1037*1676Sjpk if (src_rhtp == NULL) 1038*1676Sjpk return (B_FALSE); 1039*1676Sjpk dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, 1040*1676Sjpk B_FALSE); 1041*1676Sjpk } 1042*1676Sjpk if (dst_rhtp == NULL) { 1043*1676Sjpk TPC_RELE(src_rhtp); 1044*1676Sjpk return (B_FALSE); 1045*1676Sjpk } 1046*1676Sjpk 1047*1676Sjpk if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) { 1048*1676Sjpk retv = B_FALSE; 1049*1676Sjpk 1050*1676Sjpk /* 1051*1676Sjpk * Check that the packet's label is in the correct range for labeled 1052*1676Sjpk * sender, or is equal to the default label for unlabeled sender. 1053*1676Sjpk */ 1054*1676Sjpk } else if ((src_rhtp->tpc_tp.host_type != UNLABELED && 1055*1676Sjpk !_blinrange(label2bslabel(plabel), 1056*1676Sjpk &src_rhtp->tpc_tp.tp_sl_range_cipso) && 1057*1676Sjpk !blinlset(label2bslabel(plabel), 1058*1676Sjpk src_rhtp->tpc_tp.tp_sl_set_cipso)) || 1059*1676Sjpk (src_rhtp->tpc_tp.host_type == UNLABELED && 1060*1676Sjpk !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) { 1061*1676Sjpk retv = B_FALSE; 1062*1676Sjpk 1063*1676Sjpk } else if (check_host) { 1064*1676Sjpk retv = B_TRUE; 1065*1676Sjpk 1066*1676Sjpk /* 1067*1676Sjpk * Until we have SL range in the Zone structure, pass it 1068*1676Sjpk * when our own address lookup returned an internal entry. 1069*1676Sjpk */ 1070*1676Sjpk } else switch (dst_rhtp->tpc_tp.host_type) { 1071*1676Sjpk case UNLABELED: 1072*1676Sjpk retv = B_TRUE; 1073*1676Sjpk break; 1074*1676Sjpk 1075*1676Sjpk case SUN_CIPSO: 1076*1676Sjpk retv = _blinrange(label2bslabel(plabel), 1077*1676Sjpk &dst_rhtp->tpc_tp.tp_sl_range_cipso) || 1078*1676Sjpk blinlset(label2bslabel(plabel), 1079*1676Sjpk dst_rhtp->tpc_tp.tp_sl_set_cipso); 1080*1676Sjpk break; 1081*1676Sjpk 1082*1676Sjpk default: 1083*1676Sjpk retv = B_FALSE; 1084*1676Sjpk } 1085*1676Sjpk TPC_RELE(src_rhtp); 1086*1676Sjpk TPC_RELE(dst_rhtp); 1087*1676Sjpk return (retv); 1088*1676Sjpk } 1089*1676Sjpk 1090*1676Sjpk /* 1091*1676Sjpk * This routine determines whether a response to a failed packet delivery or 1092*1676Sjpk * connection should be sent back. By default, the policy is to allow such 1093*1676Sjpk * messages to be sent at all times, as these messages reveal little useful 1094*1676Sjpk * information and are healthy parts of TCP/IP networking. 1095*1676Sjpk * 1096*1676Sjpk * If tsol_strict_error is set, then we do strict tests: if the packet label is 1097*1676Sjpk * within the label range/set of this host/zone, return B_TRUE; otherwise 1098*1676Sjpk * return B_FALSE, which causes the packet to be dropped silently. 1099*1676Sjpk * 1100*1676Sjpk * Note that tsol_get_pkt_label will cause the packet to drop if the sender is 1101*1676Sjpk * marked as labeled in the remote host database, but the packet lacks a label. 1102*1676Sjpk * This means that we don't need to do a lookup on the source; the 1103*1676Sjpk * TSLF_UNLABELED flag is sufficient. 1104*1676Sjpk */ 1105*1676Sjpk boolean_t 1106*1676Sjpk tsol_can_reply_error(const mblk_t *mp) 1107*1676Sjpk { 1108*1676Sjpk ts_label_t *plabel = NULL; 1109*1676Sjpk tsol_tpc_t *rhtp; 1110*1676Sjpk const ipha_t *ipha; 1111*1676Sjpk const ip6_t *ip6h; 1112*1676Sjpk boolean_t retv; 1113*1676Sjpk bslabel_t *pktbs; 1114*1676Sjpk 1115*1676Sjpk /* Caller must pull up at least the IP header */ 1116*1676Sjpk ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ? 1117*1676Sjpk sizeof (*ipha) : sizeof (*ip6h))); 1118*1676Sjpk 1119*1676Sjpk if (!tsol_strict_error) 1120*1676Sjpk return (B_TRUE); 1121*1676Sjpk 1122*1676Sjpk if (DB_CRED(mp) != NULL) 1123*1676Sjpk plabel = crgetlabel(DB_CRED(mp)); 1124*1676Sjpk 1125*1676Sjpk /* We are bootstrapping or the internal template was never deleted */ 1126*1676Sjpk if (plabel == NULL) 1127*1676Sjpk return (B_TRUE); 1128*1676Sjpk 1129*1676Sjpk if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1130*1676Sjpk ipha = (const ipha_t *)mp->b_rptr; 1131*1676Sjpk rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE); 1132*1676Sjpk } else { 1133*1676Sjpk ip6h = (const ip6_t *)mp->b_rptr; 1134*1676Sjpk rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE); 1135*1676Sjpk } 1136*1676Sjpk 1137*1676Sjpk if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) { 1138*1676Sjpk retv = B_FALSE; 1139*1676Sjpk } else { 1140*1676Sjpk /* 1141*1676Sjpk * If we're in the midst of forwarding, then the destination 1142*1676Sjpk * address might not be labeled. In that case, allow unlabeled 1143*1676Sjpk * packets through only if the default label is the same, and 1144*1676Sjpk * labeled ones if they dominate. 1145*1676Sjpk */ 1146*1676Sjpk pktbs = label2bslabel(plabel); 1147*1676Sjpk switch (rhtp->tpc_tp.host_type) { 1148*1676Sjpk case UNLABELED: 1149*1676Sjpk if (plabel->tsl_flags & TSLF_UNLABELED) { 1150*1676Sjpk retv = blequal(pktbs, 1151*1676Sjpk &rhtp->tpc_tp.tp_def_label); 1152*1676Sjpk } else { 1153*1676Sjpk retv = bldominates(pktbs, 1154*1676Sjpk &rhtp->tpc_tp.tp_def_label); 1155*1676Sjpk } 1156*1676Sjpk break; 1157*1676Sjpk 1158*1676Sjpk case SUN_CIPSO: 1159*1676Sjpk retv = _blinrange(pktbs, 1160*1676Sjpk &rhtp->tpc_tp.tp_sl_range_cipso) || 1161*1676Sjpk blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso); 1162*1676Sjpk break; 1163*1676Sjpk 1164*1676Sjpk default: 1165*1676Sjpk retv = B_FALSE; 1166*1676Sjpk break; 1167*1676Sjpk } 1168*1676Sjpk } 1169*1676Sjpk 1170*1676Sjpk if (rhtp != NULL) 1171*1676Sjpk TPC_RELE(rhtp); 1172*1676Sjpk 1173*1676Sjpk return (retv); 1174*1676Sjpk } 1175*1676Sjpk 1176*1676Sjpk /* 1177*1676Sjpk * Finds the zone associated with the given packet. Returns GLOBAL_ZONEID if 1178*1676Sjpk * the zone cannot be located. 1179*1676Sjpk * 1180*1676Sjpk * This is used by the classifier when the packet matches an ALL_ZONES IRE, and 1181*1676Sjpk * there's no MLP defined. 1182*1676Sjpk */ 1183*1676Sjpk zoneid_t 1184*1676Sjpk tsol_packet_to_zoneid(const mblk_t *mp) 1185*1676Sjpk { 1186*1676Sjpk cred_t *cr = DB_CRED(mp); 1187*1676Sjpk zone_t *zone; 1188*1676Sjpk ts_label_t *label; 1189*1676Sjpk 1190*1676Sjpk if (cr != NULL) { 1191*1676Sjpk if ((label = crgetlabel(cr)) != NULL) { 1192*1676Sjpk zone = zone_find_by_label(label); 1193*1676Sjpk if (zone != NULL) { 1194*1676Sjpk zoneid_t zoneid = zone->zone_id; 1195*1676Sjpk 1196*1676Sjpk zone_rele(zone); 1197*1676Sjpk return (zoneid); 1198*1676Sjpk } 1199*1676Sjpk } 1200*1676Sjpk } 1201*1676Sjpk return (GLOBAL_ZONEID); 1202*1676Sjpk } 1203*1676Sjpk 1204*1676Sjpk int 1205*1676Sjpk tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl) 1206*1676Sjpk { 1207*1676Sjpk int error = 0; 1208*1676Sjpk tsol_ire_gw_secattr_t *attrp = NULL; 1209*1676Sjpk tsol_tnrhc_t *gw_rhc = NULL; 1210*1676Sjpk tsol_gcgrp_t *gcgrp = NULL; 1211*1676Sjpk tsol_gc_t *gc = NULL; 1212*1676Sjpk in_addr_t ga_addr4; 1213*1676Sjpk void *paddr = NULL; 1214*1676Sjpk 1215*1676Sjpk /* Not in Trusted mode or IRE is local/loopback/broadcast/interface */ 1216*1676Sjpk if (!is_system_labeled() || 1217*1676Sjpk (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST | 1218*1676Sjpk IRE_INTERFACE))) 1219*1676Sjpk goto done; 1220*1676Sjpk 1221*1676Sjpk /* 1222*1676Sjpk * If we don't have a label to compare with, or the IRE does not 1223*1676Sjpk * contain any gateway security attributes, there's not much that 1224*1676Sjpk * we can do. We let the former case pass, and the latter fail, 1225*1676Sjpk * since the IRE doesn't qualify for a match due to the lack of 1226*1676Sjpk * security attributes. 1227*1676Sjpk */ 1228*1676Sjpk if (tsl == NULL || ire->ire_gw_secattr == NULL) { 1229*1676Sjpk if (tsl != NULL) { 1230*1676Sjpk DTRACE_PROBE3( 1231*1676Sjpk tx__ip__log__drop__irematch__nogwsec, char *, 1232*1676Sjpk "ire(1) lacks ire_gw_secattr when matching label(2)", 1233*1676Sjpk ire_t *, ire, ts_label_t *, tsl); 1234*1676Sjpk error = EACCES; 1235*1676Sjpk } 1236*1676Sjpk goto done; 1237*1676Sjpk } 1238*1676Sjpk 1239*1676Sjpk attrp = ire->ire_gw_secattr; 1240*1676Sjpk 1241*1676Sjpk /* 1242*1676Sjpk * The possible lock order scenarios related to the tsol gateway 1243*1676Sjpk * attribute locks are documented at the beginning of ip.c in the 1244*1676Sjpk * lock order scenario section. 1245*1676Sjpk */ 1246*1676Sjpk mutex_enter(&attrp->igsa_lock); 1247*1676Sjpk 1248*1676Sjpk /* 1249*1676Sjpk * Depending on the IRE type (prefix vs. cache), we seek the group 1250*1676Sjpk * structure which contains all security credentials of the gateway. 1251*1676Sjpk * A prefix IRE is associated with at most one gateway credential, 1252*1676Sjpk * while a cache IRE is associated with every credentials that the 1253*1676Sjpk * gateway has. 1254*1676Sjpk */ 1255*1676Sjpk if ((gc = attrp->igsa_gc) != NULL) { /* prefix */ 1256*1676Sjpk gcgrp = gc->gc_grp; 1257*1676Sjpk ASSERT(gcgrp != NULL); 1258*1676Sjpk rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1259*1676Sjpk } else if ((gcgrp = attrp->igsa_gcgrp) != NULL) { /* cache */ 1260*1676Sjpk rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1261*1676Sjpk gc = gcgrp->gcgrp_head; 1262*1676Sjpk if (gc == NULL) { 1263*1676Sjpk /* gc group is empty, so the drop lock now */ 1264*1676Sjpk ASSERT(gcgrp->gcgrp_count == 0); 1265*1676Sjpk rw_exit(&gcgrp->gcgrp_rwlock); 1266*1676Sjpk gcgrp = NULL; 1267*1676Sjpk } 1268*1676Sjpk } 1269*1676Sjpk 1270*1676Sjpk if (gcgrp != NULL) 1271*1676Sjpk GCGRP_REFHOLD(gcgrp); 1272*1676Sjpk 1273*1676Sjpk if ((gw_rhc = attrp->igsa_rhc) != NULL) { 1274*1676Sjpk /* 1275*1676Sjpk * If our cached entry has grown stale, then discard it so we 1276*1676Sjpk * can get a new one. 1277*1676Sjpk */ 1278*1676Sjpk if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) { 1279*1676Sjpk TNRHC_RELE(gw_rhc); 1280*1676Sjpk attrp->igsa_rhc = gw_rhc = NULL; 1281*1676Sjpk } else { 1282*1676Sjpk TNRHC_HOLD(gw_rhc) 1283*1676Sjpk } 1284*1676Sjpk } 1285*1676Sjpk 1286*1676Sjpk /* Last attempt at loading the template had failed; try again */ 1287*1676Sjpk if (gw_rhc == NULL) { 1288*1676Sjpk if (gcgrp != NULL) { 1289*1676Sjpk tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr; 1290*1676Sjpk 1291*1676Sjpk if (ire->ire_ipversion == IPV4_VERSION) { 1292*1676Sjpk ASSERT(ga->ga_af == AF_INET); 1293*1676Sjpk IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4); 1294*1676Sjpk paddr = &ga_addr4; 1295*1676Sjpk } else { 1296*1676Sjpk ASSERT(ga->ga_af == AF_INET6); 1297*1676Sjpk paddr = &ga->ga_addr; 1298*1676Sjpk } 1299*1676Sjpk } else if (ire->ire_ipversion == IPV6_VERSION && 1300*1676Sjpk !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) { 1301*1676Sjpk paddr = &ire->ire_gateway_addr_v6; 1302*1676Sjpk } else if (ire->ire_ipversion == IPV4_VERSION && 1303*1676Sjpk ire->ire_gateway_addr != INADDR_ANY) { 1304*1676Sjpk paddr = &ire->ire_gateway_addr; 1305*1676Sjpk } 1306*1676Sjpk 1307*1676Sjpk /* We've found a gateway address to do the template lookup */ 1308*1676Sjpk if (paddr != NULL) { 1309*1676Sjpk ASSERT(gw_rhc == NULL); 1310*1676Sjpk if (ire->ire_ipversion == IPV4_VERSION) 1311*1676Sjpk gw_rhc = find_rhc_v4(paddr); 1312*1676Sjpk else 1313*1676Sjpk gw_rhc = find_rhc_v6(paddr); 1314*1676Sjpk if (gw_rhc != NULL) { 1315*1676Sjpk /* 1316*1676Sjpk * Note that if the lookup above returned an 1317*1676Sjpk * internal template, we'll use it for the 1318*1676Sjpk * time being, and do another lookup next 1319*1676Sjpk * time around. 1320*1676Sjpk */ 1321*1676Sjpk /* Another thread has loaded the template? */ 1322*1676Sjpk if (attrp->igsa_rhc != NULL) { 1323*1676Sjpk TNRHC_RELE(gw_rhc) 1324*1676Sjpk /* reload, it could be different */ 1325*1676Sjpk gw_rhc = attrp->igsa_rhc; 1326*1676Sjpk } else { 1327*1676Sjpk attrp->igsa_rhc = gw_rhc; 1328*1676Sjpk } 1329*1676Sjpk /* 1330*1676Sjpk * Hold an extra reference just like we did 1331*1676Sjpk * above prior to dropping the igsa_lock. 1332*1676Sjpk */ 1333*1676Sjpk TNRHC_HOLD(gw_rhc) 1334*1676Sjpk } 1335*1676Sjpk } 1336*1676Sjpk } 1337*1676Sjpk 1338*1676Sjpk mutex_exit(&attrp->igsa_lock); 1339*1676Sjpk /* Gateway template not found */ 1340*1676Sjpk if (gw_rhc == NULL) { 1341*1676Sjpk /* 1342*1676Sjpk * If destination address is directly reachable through an 1343*1676Sjpk * interface rather than through a learned route, pass it. 1344*1676Sjpk */ 1345*1676Sjpk if (paddr != NULL) { 1346*1676Sjpk DTRACE_PROBE3( 1347*1676Sjpk tx__ip__log__drop__irematch__nogwtmpl, char *, 1348*1676Sjpk "ire(1), label(2) off-link with no gw_rhc", 1349*1676Sjpk ire_t *, ire, ts_label_t *, tsl); 1350*1676Sjpk error = EINVAL; 1351*1676Sjpk } 1352*1676Sjpk goto done; 1353*1676Sjpk } 1354*1676Sjpk 1355*1676Sjpk if (gc != NULL) { 1356*1676Sjpk tsol_gcdb_t *gcdb; 1357*1676Sjpk /* 1358*1676Sjpk * In the case of IRE_CACHE we've got one or more gateway 1359*1676Sjpk * security credentials to compare against the passed in label. 1360*1676Sjpk * Perform label range comparison against each security 1361*1676Sjpk * credential of the gateway. In the case of a prefix ire 1362*1676Sjpk * we need to match against the security attributes of 1363*1676Sjpk * just the route itself, so the loop is executed only once. 1364*1676Sjpk */ 1365*1676Sjpk ASSERT(gcgrp != NULL); 1366*1676Sjpk do { 1367*1676Sjpk gcdb = gc->gc_db; 1368*1676Sjpk if (tsl->tsl_doi == gcdb->gcdb_doi && 1369*1676Sjpk _blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange)) 1370*1676Sjpk break; 1371*1676Sjpk if (ire->ire_type == IRE_CACHE) 1372*1676Sjpk gc = gc->gc_next; 1373*1676Sjpk else 1374*1676Sjpk gc = NULL; 1375*1676Sjpk } while (gc != NULL); 1376*1676Sjpk 1377*1676Sjpk if (gc == NULL) { 1378*1676Sjpk DTRACE_PROBE3( 1379*1676Sjpk tx__ip__log__drop__irematch__nogcmatched, 1380*1676Sjpk char *, "ire(1), tsl(2): all gc failed match", 1381*1676Sjpk ire_t *, ire, ts_label_t *, tsl); 1382*1676Sjpk error = EACCES; 1383*1676Sjpk } 1384*1676Sjpk } else { 1385*1676Sjpk /* 1386*1676Sjpk * We didn't find any gateway credentials in the IRE 1387*1676Sjpk * attributes; fall back to the gateway's template for 1388*1676Sjpk * label range checks, if we are required to do so. 1389*1676Sjpk */ 1390*1676Sjpk ASSERT(gw_rhc != NULL); 1391*1676Sjpk switch (gw_rhc->rhc_tpc->tpc_tp.host_type) { 1392*1676Sjpk case SUN_CIPSO: 1393*1676Sjpk if (tsl->tsl_doi != 1394*1676Sjpk gw_rhc->rhc_tpc->tpc_tp.tp_doi || 1395*1676Sjpk (!_blinrange(&tsl->tsl_label, 1396*1676Sjpk &gw_rhc->rhc_tpc->tpc_tp. 1397*1676Sjpk tp_sl_range_cipso) && 1398*1676Sjpk !blinlset(&tsl->tsl_label, 1399*1676Sjpk gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) { 1400*1676Sjpk error = EACCES; 1401*1676Sjpk DTRACE_PROBE4( 1402*1676Sjpk tx__ip__log__drop__irematch__deftmpl, 1403*1676Sjpk char *, "ire(1), tsl(2), gw_rhc(3) " 1404*1676Sjpk "failed match (cipso gw)", 1405*1676Sjpk ire_t *, ire, ts_label_t *, tsl, 1406*1676Sjpk tsol_tnrhc_t *, gw_rhc); 1407*1676Sjpk } 1408*1676Sjpk break; 1409*1676Sjpk 1410*1676Sjpk case UNLABELED: 1411*1676Sjpk if (tsl->tsl_doi != 1412*1676Sjpk gw_rhc->rhc_tpc->tpc_tp.tp_doi || 1413*1676Sjpk (!_blinrange(&tsl->tsl_label, 1414*1676Sjpk &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) && 1415*1676Sjpk !blinlset(&tsl->tsl_label, 1416*1676Sjpk gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) { 1417*1676Sjpk error = EACCES; 1418*1676Sjpk DTRACE_PROBE4( 1419*1676Sjpk tx__ip__log__drop__irematch__deftmpl, 1420*1676Sjpk char *, "ire(1), tsl(2), gw_rhc(3) " 1421*1676Sjpk "failed match (unlabeled gw)", 1422*1676Sjpk ire_t *, ire, ts_label_t *, tsl, 1423*1676Sjpk tsol_tnrhc_t *, gw_rhc); 1424*1676Sjpk } 1425*1676Sjpk break; 1426*1676Sjpk } 1427*1676Sjpk } 1428*1676Sjpk 1429*1676Sjpk done: 1430*1676Sjpk 1431*1676Sjpk if (gcgrp != NULL) { 1432*1676Sjpk rw_exit(&gcgrp->gcgrp_rwlock); 1433*1676Sjpk GCGRP_REFRELE(gcgrp); 1434*1676Sjpk } 1435*1676Sjpk 1436*1676Sjpk if (gw_rhc != NULL) 1437*1676Sjpk TNRHC_RELE(gw_rhc) 1438*1676Sjpk 1439*1676Sjpk return (error); 1440*1676Sjpk } 1441*1676Sjpk 1442*1676Sjpk /* 1443*1676Sjpk * Performs label accreditation checks for packet forwarding. 1444*1676Sjpk * 1445*1676Sjpk * Returns a pointer to the modified mblk if allowed for forwarding, 1446*1676Sjpk * or NULL if the packet must be dropped. 1447*1676Sjpk */ 1448*1676Sjpk mblk_t * 1449*1676Sjpk tsol_ip_forward(ire_t *ire, mblk_t *mp) 1450*1676Sjpk { 1451*1676Sjpk tsol_ire_gw_secattr_t *attrp = NULL; 1452*1676Sjpk ipha_t *ipha; 1453*1676Sjpk ip6_t *ip6h; 1454*1676Sjpk const void *pdst; 1455*1676Sjpk const void *psrc; 1456*1676Sjpk boolean_t off_link; 1457*1676Sjpk tsol_tpc_t *dst_rhtp, *gw_rhtp; 1458*1676Sjpk tsol_ip_label_t label_type; 1459*1676Sjpk uchar_t *opt_ptr = NULL; 1460*1676Sjpk ts_label_t *tsl; 1461*1676Sjpk uint8_t proto; 1462*1676Sjpk int af, adjust; 1463*1676Sjpk uint16_t iplen; 1464*1676Sjpk 1465*1676Sjpk ASSERT(ire != NULL && mp != NULL); 1466*1676Sjpk ASSERT(ire->ire_stq != NULL); 1467*1676Sjpk 1468*1676Sjpk af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6; 1469*1676Sjpk 1470*1676Sjpk if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1471*1676Sjpk ASSERT(ire->ire_ipversion == IPV4_VERSION); 1472*1676Sjpk ipha = (ipha_t *)mp->b_rptr; 1473*1676Sjpk psrc = &ipha->ipha_src; 1474*1676Sjpk pdst = &ipha->ipha_dst; 1475*1676Sjpk proto = ipha->ipha_protocol; 1476*1676Sjpk 1477*1676Sjpk /* destination not directly reachable? */ 1478*1676Sjpk off_link = (ire->ire_gateway_addr != INADDR_ANY); 1479*1676Sjpk } else { 1480*1676Sjpk ASSERT(ire->ire_ipversion == IPV6_VERSION); 1481*1676Sjpk ip6h = (ip6_t *)mp->b_rptr; 1482*1676Sjpk psrc = &ip6h->ip6_src; 1483*1676Sjpk pdst = &ip6h->ip6_dst; 1484*1676Sjpk proto = ip6h->ip6_nxt; 1485*1676Sjpk 1486*1676Sjpk if (proto != IPPROTO_TCP && proto != IPPROTO_UDP && 1487*1676Sjpk proto != IPPROTO_ICMPV6) { 1488*1676Sjpk uint8_t *nexthdrp; 1489*1676Sjpk uint16_t hdr_len; 1490*1676Sjpk 1491*1676Sjpk if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, 1492*1676Sjpk &nexthdrp)) { 1493*1676Sjpk /* malformed packet; drop it */ 1494*1676Sjpk return (NULL); 1495*1676Sjpk } 1496*1676Sjpk proto = *nexthdrp; 1497*1676Sjpk } 1498*1676Sjpk 1499*1676Sjpk /* destination not directly reachable? */ 1500*1676Sjpk off_link = !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6); 1501*1676Sjpk } 1502*1676Sjpk 1503*1676Sjpk if ((tsl = MBLK_GETLABEL(mp)) == NULL) 1504*1676Sjpk return (mp); 1505*1676Sjpk 1506*1676Sjpk label_type = tsol_get_option(mp, &opt_ptr); 1507*1676Sjpk 1508*1676Sjpk ASSERT(psrc != NULL && pdst != NULL); 1509*1676Sjpk dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE); 1510*1676Sjpk 1511*1676Sjpk if (dst_rhtp == NULL) { 1512*1676Sjpk /* 1513*1676Sjpk * Without a template we do not know if forwarding 1514*1676Sjpk * violates MAC 1515*1676Sjpk */ 1516*1676Sjpk DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *, 1517*1676Sjpk "mp(1) dropped, no template for destination ip4|6(2)", 1518*1676Sjpk mblk_t *, mp, void *, pdst); 1519*1676Sjpk return (NULL); 1520*1676Sjpk } 1521*1676Sjpk 1522*1676Sjpk /* 1523*1676Sjpk * Gateway template must have existed for off-link destinations, 1524*1676Sjpk * since tsol_ire_match_gwattr has ensured such condition. 1525*1676Sjpk */ 1526*1676Sjpk if (((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL || 1527*1676Sjpk (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) { 1528*1676Sjpk DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *, 1529*1676Sjpk "mp(1) dropped, no gateway in ire attributes(2)", 1530*1676Sjpk mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp); 1531*1676Sjpk mp = NULL; 1532*1676Sjpk goto keep_label; 1533*1676Sjpk } 1534*1676Sjpk 1535*1676Sjpk /* 1536*1676Sjpk * Check that the label for the packet is acceptable 1537*1676Sjpk * by destination host; otherwise, drop it. 1538*1676Sjpk */ 1539*1676Sjpk switch (dst_rhtp->tpc_tp.host_type) { 1540*1676Sjpk case SUN_CIPSO: 1541*1676Sjpk if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi || 1542*1676Sjpk (!_blinrange(&tsl->tsl_label, 1543*1676Sjpk &dst_rhtp->tpc_tp.tp_sl_range_cipso) && 1544*1676Sjpk !blinlset(&tsl->tsl_label, 1545*1676Sjpk dst_rhtp->tpc_tp.tp_sl_set_cipso))) { 1546*1676Sjpk DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *, 1547*1676Sjpk "labeled packet mp(1) dropped, label(2) fails " 1548*1676Sjpk "destination(3) accredation check", 1549*1676Sjpk mblk_t *, mp, ts_label_t *, tsl, 1550*1676Sjpk tsol_tpc_t *, dst_rhtp); 1551*1676Sjpk mp = NULL; 1552*1676Sjpk goto keep_label; 1553*1676Sjpk } 1554*1676Sjpk break; 1555*1676Sjpk 1556*1676Sjpk 1557*1676Sjpk case UNLABELED: 1558*1676Sjpk if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi || 1559*1676Sjpk !blequal(&dst_rhtp->tpc_tp.tp_def_label, 1560*1676Sjpk &tsl->tsl_label)) { 1561*1676Sjpk DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *, 1562*1676Sjpk "unlabeled packet mp(1) dropped, label(2) fails " 1563*1676Sjpk "destination(3) accredation check", 1564*1676Sjpk mblk_t *, mp, ts_label_t *, tsl, 1565*1676Sjpk tsol_tpc_t *, dst_rhtp); 1566*1676Sjpk mp = NULL; 1567*1676Sjpk goto keep_label; 1568*1676Sjpk } 1569*1676Sjpk break; 1570*1676Sjpk } 1571*1676Sjpk if (label_type == OPT_CIPSO) { 1572*1676Sjpk /* 1573*1676Sjpk * We keep the label on any of the following cases: 1574*1676Sjpk * 1575*1676Sjpk * 1. The destination is labeled (on/off-link). 1576*1676Sjpk * 2. The unlabeled destination is off-link, 1577*1676Sjpk * and the next hop gateway is labeled. 1578*1676Sjpk */ 1579*1676Sjpk if (dst_rhtp->tpc_tp.host_type != UNLABELED || 1580*1676Sjpk (off_link && 1581*1676Sjpk gw_rhtp->tpc_tp.host_type != UNLABELED)) 1582*1676Sjpk goto keep_label; 1583*1676Sjpk 1584*1676Sjpk /* 1585*1676Sjpk * Strip off the CIPSO option from the packet because: the 1586*1676Sjpk * unlabeled destination host is directly reachable through 1587*1676Sjpk * an interface (on-link); or, the unlabeled destination host 1588*1676Sjpk * is not directly reachable (off-link), and the next hop 1589*1676Sjpk * gateway is unlabeled. 1590*1676Sjpk */ 1591*1676Sjpk adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) : 1592*1676Sjpk tsol_remove_secopt_v6(ip6h, MBLKL(mp)); 1593*1676Sjpk 1594*1676Sjpk ASSERT(adjust <= 0); 1595*1676Sjpk if (adjust != 0) { 1596*1676Sjpk 1597*1676Sjpk /* adjust is negative */ 1598*1676Sjpk ASSERT((mp->b_wptr + adjust) >= mp->b_rptr); 1599*1676Sjpk mp->b_wptr += adjust; 1600*1676Sjpk 1601*1676Sjpk if (af == AF_INET) { 1602*1676Sjpk ipha = (ipha_t *)mp->b_rptr; 1603*1676Sjpk iplen = ntohs(ipha->ipha_length) + adjust; 1604*1676Sjpk ipha->ipha_length = htons(iplen); 1605*1676Sjpk ipha->ipha_hdr_checksum = 0; 1606*1676Sjpk ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1607*1676Sjpk } 1608*1676Sjpk DTRACE_PROBE3(tx__ip__log__info__forward__adjust, 1609*1676Sjpk char *, 1610*1676Sjpk "mp(1) adjusted(2) for CIPSO option removal", 1611*1676Sjpk mblk_t *, mp, int, adjust); 1612*1676Sjpk } 1613*1676Sjpk goto keep_label; 1614*1676Sjpk } 1615*1676Sjpk 1616*1676Sjpk ASSERT(label_type == OPT_NONE); 1617*1676Sjpk ASSERT(dst_rhtp != NULL); 1618*1676Sjpk 1619*1676Sjpk /* 1620*1676Sjpk * We need to add CIPSO option if the destination or the next hop 1621*1676Sjpk * gateway is labeled. Otherwise, pass the packet as is. 1622*1676Sjpk */ 1623*1676Sjpk if (dst_rhtp->tpc_tp.host_type == UNLABELED && 1624*1676Sjpk (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED)) 1625*1676Sjpk goto keep_label; 1626*1676Sjpk 1627*1676Sjpk if ((af == AF_INET && 1628*1676Sjpk tsol_check_label(DB_CRED(mp), &mp, &adjust, B_FALSE) != 0) || 1629*1676Sjpk (af == AF_INET6 && 1630*1676Sjpk tsol_check_label_v6(DB_CRED(mp), &mp, &adjust, B_FALSE) != 0)) { 1631*1676Sjpk mp = NULL; 1632*1676Sjpk goto keep_label; 1633*1676Sjpk } 1634*1676Sjpk 1635*1676Sjpk ASSERT(adjust != -1); 1636*1676Sjpk if (adjust != 0) { 1637*1676Sjpk if (af == AF_INET) { 1638*1676Sjpk ipha = (ipha_t *)mp->b_rptr; 1639*1676Sjpk iplen = ntohs(ipha->ipha_length) + adjust; 1640*1676Sjpk ipha->ipha_length = htons(iplen); 1641*1676Sjpk ipha->ipha_hdr_checksum = 0; 1642*1676Sjpk ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1643*1676Sjpk } 1644*1676Sjpk 1645*1676Sjpk DTRACE_PROBE3(tx__ip__log__info__forward__adjust, char *, 1646*1676Sjpk "mp(1) adjusted(2) for CIPSO option removal", 1647*1676Sjpk mblk_t *, mp, int, adjust); 1648*1676Sjpk } 1649*1676Sjpk 1650*1676Sjpk keep_label: 1651*1676Sjpk TPC_RELE(dst_rhtp); 1652*1676Sjpk return (mp); 1653*1676Sjpk } 1654*1676Sjpk 1655*1676Sjpk /* 1656*1676Sjpk * Name: tsol_rtsa_init() 1657*1676Sjpk * 1658*1676Sjpk * Normal: Sanity checks on the route security attributes provided by 1659*1676Sjpk * user. Convert it into a route security parameter list to 1660*1676Sjpk * be returned to caller. 1661*1676Sjpk * 1662*1676Sjpk * Output: EINVAL if bad security attributes in the routing message 1663*1676Sjpk * ENOMEM if unable to allocate data structures 1664*1676Sjpk * 0 otherwise. 1665*1676Sjpk * 1666*1676Sjpk * Note: On input, cp must point to the end of any addresses in 1667*1676Sjpk * the rt_msghdr_t structure. 1668*1676Sjpk */ 1669*1676Sjpk int 1670*1676Sjpk tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp) 1671*1676Sjpk { 1672*1676Sjpk uint_t sacnt; 1673*1676Sjpk int err; 1674*1676Sjpk caddr_t lim; 1675*1676Sjpk tsol_rtsecattr_t *tp; 1676*1676Sjpk 1677*1676Sjpk ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL); 1678*1676Sjpk 1679*1676Sjpk /* 1680*1676Sjpk * In theory, we could accept as many security attributes configured 1681*1676Sjpk * per route destination. However, the current design is limited 1682*1676Sjpk * such that at most only one set security attributes is allowed to 1683*1676Sjpk * be associated with a prefix IRE. We therefore assert for now. 1684*1676Sjpk */ 1685*1676Sjpk /* LINTED */ 1686*1676Sjpk ASSERT(TSOL_RTSA_REQUEST_MAX == 1); 1687*1676Sjpk 1688*1676Sjpk sp->rtsa_cnt = 0; 1689*1676Sjpk lim = (caddr_t)rtm + rtm->rtm_msglen; 1690*1676Sjpk ASSERT(cp <= lim); 1691*1676Sjpk 1692*1676Sjpk if ((lim - cp) < sizeof (rtm_ext_t) || 1693*1676Sjpk ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR) 1694*1676Sjpk return (0); 1695*1676Sjpk 1696*1676Sjpk if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t)) 1697*1676Sjpk return (EINVAL); 1698*1676Sjpk 1699*1676Sjpk cp += sizeof (rtm_ext_t); 1700*1676Sjpk 1701*1676Sjpk if ((lim - cp) < sizeof (*tp) || 1702*1676Sjpk (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) || 1703*1676Sjpk (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt)) 1704*1676Sjpk return (EINVAL); 1705*1676Sjpk 1706*1676Sjpk /* 1707*1676Sjpk * Trying to add route security attributes when system 1708*1676Sjpk * labeling service is not available, or when user supllies 1709*1676Sjpk * more than the maximum number of security attributes 1710*1676Sjpk * allowed per request. 1711*1676Sjpk */ 1712*1676Sjpk if ((sacnt > 0 && !is_system_labeled()) || 1713*1676Sjpk sacnt > TSOL_RTSA_REQUEST_MAX) 1714*1676Sjpk return (EINVAL); 1715*1676Sjpk 1716*1676Sjpk /* Ensure valid credentials */ 1717*1676Sjpk if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)-> 1718*1676Sjpk rtsa_attr[0])) != 0) { 1719*1676Sjpk cp += sizeof (*sp); 1720*1676Sjpk return (err); 1721*1676Sjpk } 1722*1676Sjpk 1723*1676Sjpk bcopy(cp, sp, sizeof (*sp)); 1724*1676Sjpk cp += sizeof (*sp); 1725*1676Sjpk return (0); 1726*1676Sjpk } 1727*1676Sjpk 1728*1676Sjpk int 1729*1676Sjpk tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc, 1730*1676Sjpk tsol_gcgrp_t *gcgrp) 1731*1676Sjpk { 1732*1676Sjpk tsol_ire_gw_secattr_t *attrp; 1733*1676Sjpk boolean_t exists = B_FALSE; 1734*1676Sjpk in_addr_t ga_addr4; 1735*1676Sjpk void *paddr = NULL; 1736*1676Sjpk 1737*1676Sjpk ASSERT(ire != NULL); 1738*1676Sjpk 1739*1676Sjpk /* 1740*1676Sjpk * The only time that attrp can be NULL is when this routine is 1741*1676Sjpk * called for the first time during the creation/initialization 1742*1676Sjpk * of the corresponding IRE. It will only get cleared when the 1743*1676Sjpk * IRE is deleted. 1744*1676Sjpk */ 1745*1676Sjpk if ((attrp = ire->ire_gw_secattr) == NULL) { 1746*1676Sjpk attrp = ire_gw_secattr_alloc(KM_NOSLEEP); 1747*1676Sjpk if (attrp == NULL) 1748*1676Sjpk return (ENOMEM); 1749*1676Sjpk ire->ire_gw_secattr = attrp; 1750*1676Sjpk } else { 1751*1676Sjpk exists = B_TRUE; 1752*1676Sjpk mutex_enter(&attrp->igsa_lock); 1753*1676Sjpk 1754*1676Sjpk if (attrp->igsa_rhc != NULL) { 1755*1676Sjpk TNRHC_RELE(attrp->igsa_rhc); 1756*1676Sjpk attrp->igsa_rhc = NULL; 1757*1676Sjpk } 1758*1676Sjpk 1759*1676Sjpk if (attrp->igsa_gc != NULL) 1760*1676Sjpk GC_REFRELE(attrp->igsa_gc); 1761*1676Sjpk if (attrp->igsa_gcgrp != NULL) 1762*1676Sjpk GCGRP_REFRELE(attrp->igsa_gcgrp); 1763*1676Sjpk } 1764*1676Sjpk ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock)); 1765*1676Sjpk 1766*1676Sjpk /* 1767*1676Sjpk * References already held by caller and we keep them; 1768*1676Sjpk * note that both gc and gcgrp may be set to NULL to 1769*1676Sjpk * clear out igsa_gc and igsa_gcgrp, respectively. 1770*1676Sjpk */ 1771*1676Sjpk attrp->igsa_gc = gc; 1772*1676Sjpk attrp->igsa_gcgrp = gcgrp; 1773*1676Sjpk 1774*1676Sjpk if (gcgrp == NULL && gc != NULL) { 1775*1676Sjpk gcgrp = gc->gc_grp; 1776*1676Sjpk ASSERT(gcgrp != NULL); 1777*1676Sjpk } 1778*1676Sjpk 1779*1676Sjpk /* 1780*1676Sjpk * Intialize the template for gateway; we use the gateway's 1781*1676Sjpk * address found in either the passed in gateway credential 1782*1676Sjpk * or group pointer, or the ire_gateway_addr{_v6} field. 1783*1676Sjpk */ 1784*1676Sjpk if (gcgrp != NULL) { 1785*1676Sjpk tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr; 1786*1676Sjpk 1787*1676Sjpk /* 1788*1676Sjpk * Caller is holding a reference, and that we don't 1789*1676Sjpk * need to hold any lock to access the address. 1790*1676Sjpk */ 1791*1676Sjpk if (ipversion == IPV4_VERSION) { 1792*1676Sjpk ASSERT(ga->ga_af == AF_INET); 1793*1676Sjpk IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4); 1794*1676Sjpk paddr = &ga_addr4; 1795*1676Sjpk } else { 1796*1676Sjpk ASSERT(ga->ga_af == AF_INET6); 1797*1676Sjpk paddr = &ga->ga_addr; 1798*1676Sjpk } 1799*1676Sjpk } else if (ipversion == IPV6_VERSION && 1800*1676Sjpk !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) { 1801*1676Sjpk paddr = &ire->ire_gateway_addr_v6; 1802*1676Sjpk } else if (ipversion == IPV4_VERSION && 1803*1676Sjpk ire->ire_gateway_addr != INADDR_ANY) { 1804*1676Sjpk paddr = &ire->ire_gateway_addr; 1805*1676Sjpk } 1806*1676Sjpk 1807*1676Sjpk /* 1808*1676Sjpk * Lookup the gateway template; note that we could get an internal 1809*1676Sjpk * template here, which we cache anyway. During IRE matching, we'll 1810*1676Sjpk * try to update this gateway template cache and hopefully get a 1811*1676Sjpk * real one. 1812*1676Sjpk */ 1813*1676Sjpk if (paddr != NULL) { 1814*1676Sjpk attrp->igsa_rhc = (ipversion == IPV4_VERSION) ? 1815*1676Sjpk find_rhc_v4(paddr) : find_rhc_v6(paddr); 1816*1676Sjpk } 1817*1676Sjpk 1818*1676Sjpk if (exists) 1819*1676Sjpk mutex_exit(&attrp->igsa_lock); 1820*1676Sjpk 1821*1676Sjpk return (0); 1822*1676Sjpk } 1823*1676Sjpk 1824*1676Sjpk /* 1825*1676Sjpk * This function figures the type of MLP that we'll be using based on the 1826*1676Sjpk * address that the user is binding and the zone. If the address is 1827*1676Sjpk * unspecified, then we're looking at both private and shared. If it's one 1828*1676Sjpk * of the zone's private addresses, then it's private only. If it's one 1829*1676Sjpk * of the global addresses, then it's shared only. 1830*1676Sjpk * 1831*1676Sjpk * If we can't figure out what it is, then return mlptSingle. That's actually 1832*1676Sjpk * an error case. 1833*1676Sjpk */ 1834*1676Sjpk mlp_type_t 1835*1676Sjpk tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr) 1836*1676Sjpk { 1837*1676Sjpk in_addr_t in4; 1838*1676Sjpk ire_t *ire; 1839*1676Sjpk ipif_t *ipif; 1840*1676Sjpk zoneid_t addrzone; 1841*1676Sjpk 1842*1676Sjpk ASSERT(addr != NULL); 1843*1676Sjpk 1844*1676Sjpk if (version == IPV6_VERSION && 1845*1676Sjpk IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) { 1846*1676Sjpk IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4); 1847*1676Sjpk addr = &in4; 1848*1676Sjpk version = IPV4_VERSION; 1849*1676Sjpk } 1850*1676Sjpk 1851*1676Sjpk if (version == IPV4_VERSION) { 1852*1676Sjpk in4 = *(const in_addr_t *)addr; 1853*1676Sjpk if (in4 == INADDR_ANY) 1854*1676Sjpk return (mlptBoth); 1855*1676Sjpk ire = ire_cache_lookup(in4, zoneid, NULL); 1856*1676Sjpk } else { 1857*1676Sjpk if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr)) 1858*1676Sjpk return (mlptBoth); 1859*1676Sjpk ire = ire_cache_lookup_v6(addr, zoneid, NULL); 1860*1676Sjpk } 1861*1676Sjpk /* 1862*1676Sjpk * If we can't find the IRE, then we have to behave exactly like 1863*1676Sjpk * ip_bind_laddr{,_v6}. That means looking up the IPIF so that users 1864*1676Sjpk * can bind to addresses on "down" interfaces. 1865*1676Sjpk * 1866*1676Sjpk * If we can't find that either, then the bind is going to fail, so 1867*1676Sjpk * just give up. Note that there's a miniscule chance that the address 1868*1676Sjpk * is in transition, but we don't bother handling that. 1869*1676Sjpk */ 1870*1676Sjpk if (ire == NULL) { 1871*1676Sjpk if (version == IPV4_VERSION) 1872*1676Sjpk ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL, 1873*1676Sjpk zoneid, NULL, NULL, NULL, NULL); 1874*1676Sjpk else 1875*1676Sjpk ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr, 1876*1676Sjpk NULL, zoneid, NULL, NULL, NULL, NULL); 1877*1676Sjpk if (ipif == NULL) 1878*1676Sjpk return (mlptSingle); 1879*1676Sjpk addrzone = ipif->ipif_zoneid; 1880*1676Sjpk ipif_refrele(ipif); 1881*1676Sjpk } else { 1882*1676Sjpk addrzone = ire->ire_zoneid; 1883*1676Sjpk ire_refrele(ire); 1884*1676Sjpk } 1885*1676Sjpk return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate); 1886*1676Sjpk } 1887*1676Sjpk 1888*1676Sjpk /* 1889*1676Sjpk * Since we are configuring local interfaces, and we know trusted 1890*1676Sjpk * extension CDE requires local interfaces to be cipso host type in 1891*1676Sjpk * order to function correctly, we'll associate a cipso template 1892*1676Sjpk * to each local interface and let the interface come up. Configuring 1893*1676Sjpk * a local interface to be "unlabeled" host type is a configuration error. 1894*1676Sjpk * We'll override that error and make the interface host type to be cipso 1895*1676Sjpk * here. 1896*1676Sjpk * 1897*1676Sjpk * The code is optimized for the usual "success" case and unwinds things on 1898*1676Sjpk * error. We don't want to go to the trouble and expense of formatting the 1899*1676Sjpk * interface name for the usual case where everything is configured correctly. 1900*1676Sjpk */ 1901*1676Sjpk boolean_t 1902*1676Sjpk tsol_check_interface_address(const ipif_t *ipif) 1903*1676Sjpk { 1904*1676Sjpk tsol_tpc_t *tp; 1905*1676Sjpk char addrbuf[INET6_ADDRSTRLEN]; 1906*1676Sjpk int af; 1907*1676Sjpk const void *addr; 1908*1676Sjpk zone_t *zone; 1909*1676Sjpk ts_label_t *plabel; 1910*1676Sjpk const bslabel_t *label; 1911*1676Sjpk char ifbuf[LIFNAMSIZ + 10]; 1912*1676Sjpk const char *ifname; 1913*1676Sjpk boolean_t retval; 1914*1676Sjpk tsol_rhent_t rhent; 1915*1676Sjpk 1916*1676Sjpk if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) { 1917*1676Sjpk af = AF_INET; 1918*1676Sjpk addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr); 1919*1676Sjpk } else { 1920*1676Sjpk af = AF_INET6; 1921*1676Sjpk addr = &ipif->ipif_v6lcl_addr; 1922*1676Sjpk } 1923*1676Sjpk 1924*1676Sjpk tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE); 1925*1676Sjpk zone = ipif->ipif_zoneid == ALL_ZONES ? NULL : 1926*1676Sjpk zone_find_by_id(ipif->ipif_zoneid); 1927*1676Sjpk if (zone != NULL) { 1928*1676Sjpk plabel = zone->zone_slabel; 1929*1676Sjpk ASSERT(plabel != NULL); 1930*1676Sjpk label = label2bslabel(plabel); 1931*1676Sjpk } 1932*1676Sjpk 1933*1676Sjpk /* 1934*1676Sjpk * If it's CIPSO and an all-zones address, then we're done. 1935*1676Sjpk * If it's a CIPSO zone specific address, the zone's label 1936*1676Sjpk * must be in the range or set specified in the template. 1937*1676Sjpk * When the remote host entry is missing or the template 1938*1676Sjpk * type is incorrect for this interface, we create a 1939*1676Sjpk * CIPSO host entry in kernel and allow the interface to be 1940*1676Sjpk * brought up as CIPSO type. 1941*1676Sjpk */ 1942*1676Sjpk if (tp != NULL && ( 1943*1676Sjpk /* The all-zones case */ 1944*1676Sjpk (tp->tpc_tp.host_type == SUN_CIPSO && 1945*1676Sjpk tp->tpc_tp.tp_doi == default_doi && 1946*1676Sjpk ipif->ipif_zoneid == ALL_ZONES) || 1947*1676Sjpk /* The local-zone case */ 1948*1676Sjpk (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi && 1949*1676Sjpk ((tp->tpc_tp.host_type == SUN_CIPSO && 1950*1676Sjpk (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) || 1951*1676Sjpk blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) { 1952*1676Sjpk if (zone != NULL) 1953*1676Sjpk zone_rele(zone); 1954*1676Sjpk TPC_RELE(tp); 1955*1676Sjpk return (B_TRUE); 1956*1676Sjpk } 1957*1676Sjpk 1958*1676Sjpk ifname = ipif->ipif_ill->ill_name; 1959*1676Sjpk if (ipif->ipif_id != 0) { 1960*1676Sjpk (void) snprintf(ifbuf, sizeof (ifbuf), "%s:%u", ifname, 1961*1676Sjpk ipif->ipif_id); 1962*1676Sjpk ifname = ifbuf; 1963*1676Sjpk } 1964*1676Sjpk (void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf)); 1965*1676Sjpk 1966*1676Sjpk if (tp == NULL) { 1967*1676Sjpk cmn_err(CE_NOTE, "template entry for %s missing. Default to " 1968*1676Sjpk "CIPSO type for %s", ifname, addrbuf); 1969*1676Sjpk retval = B_TRUE; 1970*1676Sjpk } else if (tp->tpc_tp.host_type == UNLABELED) { 1971*1676Sjpk cmn_err(CE_NOTE, "template type for %s incorrectly configured. " 1972*1676Sjpk "Change to CIPSO type for %s", ifname, addrbuf); 1973*1676Sjpk retval = B_TRUE; 1974*1676Sjpk } else if (ipif->ipif_zoneid == ALL_ZONES) { 1975*1676Sjpk if (tp->tpc_tp.host_type != SUN_CIPSO) { 1976*1676Sjpk cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for " 1977*1676Sjpk "all-zones. Converted to CIPSO.", ifname, addrbuf); 1978*1676Sjpk retval = B_TRUE; 1979*1676Sjpk } else { 1980*1676Sjpk cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d " 1981*1676Sjpk "instead of %d", ifname, addrbuf, 1982*1676Sjpk tp->tpc_tp.tp_doi, default_doi); 1983*1676Sjpk retval = B_FALSE; 1984*1676Sjpk } 1985*1676Sjpk } else if (zone == NULL) { 1986*1676Sjpk cmn_err(CE_NOTE, "%s failed: zoneid %d unknown", 1987*1676Sjpk ifname, ipif->ipif_zoneid); 1988*1676Sjpk retval = B_FALSE; 1989*1676Sjpk } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) { 1990*1676Sjpk cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has " 1991*1676Sjpk "DOI %d", ifname, zone->zone_name, plabel->tsl_doi, 1992*1676Sjpk addrbuf, tp->tpc_tp.tp_doi); 1993*1676Sjpk retval = B_FALSE; 1994*1676Sjpk } else { 1995*1676Sjpk cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with " 1996*1676Sjpk "%s", ifname, zone->zone_name, addrbuf); 1997*1676Sjpk tsol_print_label(label, "zone label"); 1998*1676Sjpk retval = B_FALSE; 1999*1676Sjpk } 2000*1676Sjpk 2001*1676Sjpk if (zone != NULL) 2002*1676Sjpk zone_rele(zone); 2003*1676Sjpk if (tp != NULL) 2004*1676Sjpk TPC_RELE(tp); 2005*1676Sjpk if (retval) { 2006*1676Sjpk /* 2007*1676Sjpk * we've corrected a config error and let the interface 2008*1676Sjpk * come up as cipso. Need to insert an rhent. 2009*1676Sjpk */ 2010*1676Sjpk if ((rhent.rh_address.ta_family = af) == AF_INET) { 2011*1676Sjpk rhent.rh_prefix = 32; 2012*1676Sjpk rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr; 2013*1676Sjpk } else { 2014*1676Sjpk rhent.rh_prefix = 128; 2015*1676Sjpk rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr; 2016*1676Sjpk } 2017*1676Sjpk (void) strcpy(rhent.rh_template, "cipso"); 2018*1676Sjpk if (tnrh_load(&rhent) != 0) { 2019*1676Sjpk cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO " 2020*1676Sjpk "template for local addr %s", ifname, addrbuf); 2021*1676Sjpk retval = B_FALSE; 2022*1676Sjpk } 2023*1676Sjpk } 2024*1676Sjpk return (retval); 2025*1676Sjpk } 2026