10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 220Sstevel@tonic-gate /* 230Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate /* 300Sstevel@tonic-gate * IP interface to squeues. 310Sstevel@tonic-gate * 320Sstevel@tonic-gate * IP creates an squeue instance for each CPU. The squeue pointer is saved in 330Sstevel@tonic-gate * cpu_squeue field of the cpu structure. Each squeue is associated with a 340Sstevel@tonic-gate * connection instance (conn_t). 350Sstevel@tonic-gate * 360Sstevel@tonic-gate * For CPUs available at system startup time the squeue creation and association 370Sstevel@tonic-gate * with CPU happens at MP initialization time. For CPUs added during dynamic 380Sstevel@tonic-gate * reconfiguration, the initialization happens when the new CPU is configured in 390Sstevel@tonic-gate * the system. The squeue is chosen using IP_SQUEUE_GET macro which will either 400Sstevel@tonic-gate * return per-CPU squeue or random squeue based on the ip_squeue_fanout 410Sstevel@tonic-gate * variable. 420Sstevel@tonic-gate * 430Sstevel@tonic-gate * There are two modes of associating connection with squeues. The first mode 440Sstevel@tonic-gate * associates each connection with the CPU that creates the connection (either 450Sstevel@tonic-gate * during open time or during accept time). The second mode associates each 460Sstevel@tonic-gate * connection with a random CPU, effectively distributing load over all CPUs 470Sstevel@tonic-gate * and all squeues in the system. The mode is controlled by the 480Sstevel@tonic-gate * ip_squeue_fanout variable. 490Sstevel@tonic-gate * 500Sstevel@tonic-gate * NOTE: The fact that there is an association between each connection and 510Sstevel@tonic-gate * squeue and squeue and CPU does not mean that each connection is always 520Sstevel@tonic-gate * processed on this CPU and on this CPU only. Any thread calling squeue_enter() 530Sstevel@tonic-gate * may process the connection on whatever CPU it is scheduled. The squeue to CPU 540Sstevel@tonic-gate * binding is only relevant for the worker thread. 550Sstevel@tonic-gate * 560Sstevel@tonic-gate * The list of all created squeues is kept in squeue_set structure. This list is 570Sstevel@tonic-gate * used when ip_squeue_fanout is set and the load is distributed across all 580Sstevel@tonic-gate * squeues. 590Sstevel@tonic-gate * 600Sstevel@tonic-gate * INTERFACE: 610Sstevel@tonic-gate * 620Sstevel@tonic-gate * squeue_t *ip_squeue_get(hint) 630Sstevel@tonic-gate * 640Sstevel@tonic-gate * Find an squeue based on the 'hint' value. The hint is used as an index 650Sstevel@tonic-gate * in the array of IP squeues available. The way hint is computed may 660Sstevel@tonic-gate * affect the effectiveness of the squeue distribution. Currently squeues 670Sstevel@tonic-gate * are assigned in round-robin fashion using lbolt as a hint. 680Sstevel@tonic-gate * 690Sstevel@tonic-gate * 700Sstevel@tonic-gate * DR Notes 710Sstevel@tonic-gate * ======== 720Sstevel@tonic-gate * 730Sstevel@tonic-gate * The ip_squeue_init() registers a call-back function with the CPU DR 740Sstevel@tonic-gate * subsystem using register_cpu_setup_func(). The call-back function does two 750Sstevel@tonic-gate * things: 760Sstevel@tonic-gate * 770Sstevel@tonic-gate * o When the CPU is going off-line or unconfigured, the worker thread is 780Sstevel@tonic-gate * unbound from the CPU. This allows the CPU unconfig code to move it to 790Sstevel@tonic-gate * another CPU. 800Sstevel@tonic-gate * 810Sstevel@tonic-gate * o When the CPU is going online, it creates a new squeue for this CPU if 820Sstevel@tonic-gate * necessary and binds the squeue worker thread to this CPU. 830Sstevel@tonic-gate * 840Sstevel@tonic-gate * TUNEBALES: 850Sstevel@tonic-gate * 860Sstevel@tonic-gate * ip_squeue_bind: if set to 1 each squeue worker thread is bound to the CPU 870Sstevel@tonic-gate * associated with an squeue instance. 880Sstevel@tonic-gate * 890Sstevel@tonic-gate * ip_squeue_profile: if set to 1 squeue profiling is enabled. NOTE: squeue.c 900Sstevel@tonic-gate * should be compiled with SQUEUE_PROFILE enabled for this variable to have 910Sstevel@tonic-gate * an impact. 920Sstevel@tonic-gate * 930Sstevel@tonic-gate * ip_squeue_fanout: if set to 1 use ip_squeue_get() to find an squeue, 940Sstevel@tonic-gate * otherwise get it from CPU->cpu_squeue. 950Sstevel@tonic-gate * 960Sstevel@tonic-gate * ip_squeue_bind, ip_squeue_profile and ip_squeue_fanout can be accessed and 970Sstevel@tonic-gate * changed using ndd on /dev/tcp or /dev/ip. 980Sstevel@tonic-gate * 990Sstevel@tonic-gate * ip_squeue_worker_wait: global value for the sq_wait field for all squeues 1000Sstevel@tonic-gate * created. This is the time squeue code waits before waking up the worker 1010Sstevel@tonic-gate * thread after queuing a request. 1020Sstevel@tonic-gate */ 1030Sstevel@tonic-gate 1040Sstevel@tonic-gate #include <sys/types.h> 1050Sstevel@tonic-gate #include <sys/debug.h> 1060Sstevel@tonic-gate #include <sys/kmem.h> 1070Sstevel@tonic-gate #include <sys/cpuvar.h> 1080Sstevel@tonic-gate 1090Sstevel@tonic-gate #include <sys/cmn_err.h> 1100Sstevel@tonic-gate 1110Sstevel@tonic-gate #include <inet/common.h> 1120Sstevel@tonic-gate #include <inet/ip.h> 1130Sstevel@tonic-gate #include <inet/ip_if.h> 1140Sstevel@tonic-gate #include <inet/mi.h> 1150Sstevel@tonic-gate #include <inet/nd.h> 1160Sstevel@tonic-gate #include <inet/ipclassifier.h> 1170Sstevel@tonic-gate #include <sys/types.h> 1180Sstevel@tonic-gate #include <sys/conf.h> 1190Sstevel@tonic-gate #include <sys/sunddi.h> 1200Sstevel@tonic-gate #include <sys/ddi.h> 1210Sstevel@tonic-gate #include <sys/squeue_impl.h> 1220Sstevel@tonic-gate 1230Sstevel@tonic-gate 1240Sstevel@tonic-gate /* 1250Sstevel@tonic-gate * We allow multiple NICs to bind to the same CPU but want to preserve 1 <-> 1 1260Sstevel@tonic-gate * mapping between squeue and NIC (or Rx ring) for performance reasons so 1270Sstevel@tonic-gate * each squeue can uniquely own a NIC or a Rx ring and do polling 1280Sstevel@tonic-gate * (PSARC 2004/630). So we allow up to MAX_THREAD_PER_CPU squeues per CPU. 1290Sstevel@tonic-gate * We start by creating MIN_THREAD_PER_CPU squeues per CPU but more squeues 1300Sstevel@tonic-gate * can be created dynamically as needed. 1310Sstevel@tonic-gate */ 1320Sstevel@tonic-gate #define MAX_THREAD_PER_CPU 32 1330Sstevel@tonic-gate #define MIN_THREAD_PER_CPU 1 1340Sstevel@tonic-gate uint_t ip_threads_per_cpu = MIN_THREAD_PER_CPU; 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate /* 1370Sstevel@tonic-gate * List of all created squeue sets. The size is protected by cpu_lock 1380Sstevel@tonic-gate */ 1390Sstevel@tonic-gate squeue_set_t **sqset_global_list; 1400Sstevel@tonic-gate uint_t sqset_global_size; 1410Sstevel@tonic-gate 1420Sstevel@tonic-gate int ip_squeue_bind = B_TRUE; 1430Sstevel@tonic-gate int ip_squeue_profile = B_TRUE; 1440Sstevel@tonic-gate static void (*ip_squeue_create_callback)(squeue_t *) = NULL; 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate /* 1470Sstevel@tonic-gate * ip_squeue_worker_wait: global value for the sq_wait field for all squeues 1480Sstevel@tonic-gate * created. This is the time squeue code waits before waking up the worker 1490Sstevel@tonic-gate * thread after queuing a request. 1500Sstevel@tonic-gate */ 1510Sstevel@tonic-gate uint_t ip_squeue_worker_wait = 10; 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate static squeue_set_t *ip_squeue_set_create(cpu_t *, boolean_t); 1540Sstevel@tonic-gate static int ip_squeue_cpu_setup(cpu_setup_t, int, void *); 1550Sstevel@tonic-gate 1560Sstevel@tonic-gate static void ip_squeue_set_bind(squeue_set_t *); 1570Sstevel@tonic-gate static void ip_squeue_set_unbind(squeue_set_t *); 1580Sstevel@tonic-gate 1590Sstevel@tonic-gate #define CPU_ISON(c) (c != NULL && CPU_ACTIVE(c) && (c->cpu_flags & CPU_EXISTS)) 1600Sstevel@tonic-gate 1610Sstevel@tonic-gate /* 1620Sstevel@tonic-gate * Create squeue set containing ip_threads_per_cpu number of squeues 1630Sstevel@tonic-gate * for this CPU and bind them all to the CPU. 1640Sstevel@tonic-gate */ 1650Sstevel@tonic-gate static squeue_set_t * 1660Sstevel@tonic-gate ip_squeue_set_create(cpu_t *cp, boolean_t reuse) 1670Sstevel@tonic-gate { 1680Sstevel@tonic-gate int i; 1690Sstevel@tonic-gate squeue_set_t *sqs; 1700Sstevel@tonic-gate squeue_t *sqp; 1710Sstevel@tonic-gate char sqname[64]; 1720Sstevel@tonic-gate processorid_t id = cp->cpu_id; 1730Sstevel@tonic-gate 1740Sstevel@tonic-gate if (reuse) { 1750Sstevel@tonic-gate int i; 1760Sstevel@tonic-gate 1770Sstevel@tonic-gate /* 1780Sstevel@tonic-gate * We may already have an squeue created for this CPU. Try to 1790Sstevel@tonic-gate * find one and reuse it if possible. 1800Sstevel@tonic-gate */ 1810Sstevel@tonic-gate for (i = 0; i < sqset_global_size; i++) { 1820Sstevel@tonic-gate sqs = sqset_global_list[i]; 1830Sstevel@tonic-gate if (id == sqs->sqs_bind) 1840Sstevel@tonic-gate return (sqs); 1850Sstevel@tonic-gate } 1860Sstevel@tonic-gate } 1870Sstevel@tonic-gate 1880Sstevel@tonic-gate sqs = kmem_zalloc(sizeof (squeue_set_t) + 1890Sstevel@tonic-gate (sizeof (squeue_t *) * MAX_THREAD_PER_CPU), KM_SLEEP); 1900Sstevel@tonic-gate mutex_init(&sqs->sqs_lock, NULL, MUTEX_DEFAULT, NULL); 1910Sstevel@tonic-gate sqs->sqs_list = (squeue_t **)&sqs[1]; 1920Sstevel@tonic-gate sqs->sqs_max_size = MAX_THREAD_PER_CPU; 1930Sstevel@tonic-gate sqs->sqs_bind = id; 1940Sstevel@tonic-gate 1950Sstevel@tonic-gate for (i = 0; i < ip_threads_per_cpu; i++) { 1960Sstevel@tonic-gate bzero(sqname, sizeof (sqname)); 1970Sstevel@tonic-gate 1980Sstevel@tonic-gate (void) snprintf(sqname, sizeof (sqname), 1990Sstevel@tonic-gate "ip_squeue_cpu_%d/%d/%d", cp->cpu_seqid, 2000Sstevel@tonic-gate cp->cpu_id, i); 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate sqp = squeue_create(sqname, id, ip_squeue_worker_wait, 2030Sstevel@tonic-gate minclsyspri); 2040Sstevel@tonic-gate 2050Sstevel@tonic-gate ASSERT(sqp != NULL); 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate squeue_profile_enable(sqp); 2080Sstevel@tonic-gate sqs->sqs_list[sqs->sqs_size++] = sqp; 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate if (ip_squeue_create_callback != NULL) 2110Sstevel@tonic-gate ip_squeue_create_callback(sqp); 2120Sstevel@tonic-gate } 2130Sstevel@tonic-gate 214*405Sakolb if (ip_squeue_bind && cpu_is_online(cp)) 2150Sstevel@tonic-gate ip_squeue_set_bind(sqs); 2160Sstevel@tonic-gate 2170Sstevel@tonic-gate sqset_global_list[sqset_global_size++] = sqs; 2180Sstevel@tonic-gate ASSERT(sqset_global_size <= NCPU); 2190Sstevel@tonic-gate return (sqs); 2200Sstevel@tonic-gate } 2210Sstevel@tonic-gate 2220Sstevel@tonic-gate /* 2230Sstevel@tonic-gate * Initialize IP squeues. 2240Sstevel@tonic-gate */ 2250Sstevel@tonic-gate void 2260Sstevel@tonic-gate ip_squeue_init(void (*callback)(squeue_t *)) 2270Sstevel@tonic-gate { 2280Sstevel@tonic-gate int i; 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate ASSERT(sqset_global_list == NULL); 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate if (ip_threads_per_cpu < MIN_THREAD_PER_CPU) 2330Sstevel@tonic-gate ip_threads_per_cpu = MIN_THREAD_PER_CPU; 2340Sstevel@tonic-gate else if (ip_threads_per_cpu > MAX_THREAD_PER_CPU) 2350Sstevel@tonic-gate ip_threads_per_cpu = MAX_THREAD_PER_CPU; 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate ip_squeue_create_callback = callback; 2380Sstevel@tonic-gate squeue_init(); 2390Sstevel@tonic-gate sqset_global_list = 2400Sstevel@tonic-gate kmem_zalloc(sizeof (squeue_set_t *) * NCPU, KM_SLEEP); 2410Sstevel@tonic-gate sqset_global_size = 0; 2420Sstevel@tonic-gate mutex_enter(&cpu_lock); 2430Sstevel@tonic-gate 2440Sstevel@tonic-gate /* Create squeue for each active CPU available */ 2450Sstevel@tonic-gate for (i = 0; i < NCPU; i++) { 2460Sstevel@tonic-gate cpu_t *cp = cpu[i]; 2470Sstevel@tonic-gate if (CPU_ISON(cp) && cp->cpu_squeue_set == NULL) { 2480Sstevel@tonic-gate cp->cpu_squeue_set = ip_squeue_set_create(cp, B_FALSE); 2490Sstevel@tonic-gate } 2500Sstevel@tonic-gate } 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate register_cpu_setup_func(ip_squeue_cpu_setup, NULL); 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate mutex_exit(&cpu_lock); 2550Sstevel@tonic-gate 2560Sstevel@tonic-gate if (ip_squeue_profile) 2570Sstevel@tonic-gate squeue_profile_start(); 2580Sstevel@tonic-gate } 2590Sstevel@tonic-gate 2600Sstevel@tonic-gate /* 2610Sstevel@tonic-gate * Get squeue_t structure based on index. 2620Sstevel@tonic-gate * Since the squeue list can only grow, no need to grab any lock. 2630Sstevel@tonic-gate */ 2640Sstevel@tonic-gate squeue_t * 2650Sstevel@tonic-gate ip_squeue_random(uint_t index) 2660Sstevel@tonic-gate { 2670Sstevel@tonic-gate squeue_set_t *sqs; 2680Sstevel@tonic-gate 2690Sstevel@tonic-gate sqs = sqset_global_list[index % sqset_global_size]; 2700Sstevel@tonic-gate return (sqs->sqs_list[index % sqs->sqs_size]); 2710Sstevel@tonic-gate } 2720Sstevel@tonic-gate 2730Sstevel@tonic-gate /* ARGSUSED */ 2740Sstevel@tonic-gate void 2750Sstevel@tonic-gate ip_squeue_clean(void *arg1, mblk_t *mp, void *arg2) 2760Sstevel@tonic-gate { 2770Sstevel@tonic-gate squeue_t *sqp = arg2; 2780Sstevel@tonic-gate ill_rx_ring_t *ring = sqp->sq_rx_ring; 2790Sstevel@tonic-gate ill_t *ill; 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate ASSERT(sqp != NULL); 2820Sstevel@tonic-gate 2830Sstevel@tonic-gate if (ring == NULL) { 2840Sstevel@tonic-gate return; 2850Sstevel@tonic-gate } 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate /* 2880Sstevel@tonic-gate * Clean up squeue 2890Sstevel@tonic-gate */ 2900Sstevel@tonic-gate mutex_enter(&sqp->sq_lock); 2910Sstevel@tonic-gate sqp->sq_state &= ~(SQS_ILL_BOUND|SQS_POLL_CAPAB); 2920Sstevel@tonic-gate sqp->sq_rx_ring = NULL; 2930Sstevel@tonic-gate mutex_exit(&sqp->sq_lock); 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate ill = ring->rr_ill; 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate /* 2980Sstevel@tonic-gate * Cleanup the ring 2990Sstevel@tonic-gate */ 3000Sstevel@tonic-gate 3010Sstevel@tonic-gate ring->rr_blank = NULL; 3020Sstevel@tonic-gate ring->rr_handle = NULL; 3030Sstevel@tonic-gate ring->rr_sqp = NULL; 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate /* 3060Sstevel@tonic-gate * Signal ill that cleanup is done 3070Sstevel@tonic-gate */ 3080Sstevel@tonic-gate mutex_enter(&ill->ill_lock); 3090Sstevel@tonic-gate ring->rr_ring_state = ILL_RING_FREE; 3100Sstevel@tonic-gate cv_signal(&ill->ill_cv); 3110Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 3120Sstevel@tonic-gate } 3130Sstevel@tonic-gate 3140Sstevel@tonic-gate typedef struct ip_taskq_arg { 3150Sstevel@tonic-gate ill_t *ip_taskq_ill; 3160Sstevel@tonic-gate ill_rx_ring_t *ip_taskq_ill_rx_ring; 3170Sstevel@tonic-gate cpu_t *ip_taskq_cpu; 3180Sstevel@tonic-gate } ip_taskq_arg_t; 3190Sstevel@tonic-gate 3200Sstevel@tonic-gate /* 3210Sstevel@tonic-gate * Do a Rx ring to squeue binding. Find a unique squeue that is not 3220Sstevel@tonic-gate * managing a receive ring. If no such squeue exists, dynamically 3230Sstevel@tonic-gate * create a new one in the squeue set. 3240Sstevel@tonic-gate * 3250Sstevel@tonic-gate * The function runs via the system taskq. The ill passed as an 3260Sstevel@tonic-gate * argument can't go away since we hold a ref. The lock order is 3270Sstevel@tonic-gate * ill_lock -> sqs_lock -> sq_lock. 3280Sstevel@tonic-gate * 3290Sstevel@tonic-gate * If we are binding a Rx ring to a squeue attached to the offline CPU, 3300Sstevel@tonic-gate * no need to check that because squeues are never destroyed once 3310Sstevel@tonic-gate * created. 3320Sstevel@tonic-gate */ 3330Sstevel@tonic-gate /* ARGSUSED */ 3340Sstevel@tonic-gate static void 3350Sstevel@tonic-gate ip_squeue_extend(void *arg) 3360Sstevel@tonic-gate { 3370Sstevel@tonic-gate ip_taskq_arg_t *sq_arg = (ip_taskq_arg_t *)arg; 3380Sstevel@tonic-gate ill_t *ill = sq_arg->ip_taskq_ill; 3390Sstevel@tonic-gate ill_rx_ring_t *ill_rx_ring = sq_arg->ip_taskq_ill_rx_ring; 3400Sstevel@tonic-gate cpu_t *intr_cpu = sq_arg->ip_taskq_cpu; 3410Sstevel@tonic-gate squeue_set_t *sqs; 3420Sstevel@tonic-gate squeue_t *sqp = NULL; 3430Sstevel@tonic-gate char sqname[64]; 3440Sstevel@tonic-gate int i; 3450Sstevel@tonic-gate 3460Sstevel@tonic-gate ASSERT(ill != NULL); 3470Sstevel@tonic-gate ASSERT(ill_rx_ring != NULL); 3480Sstevel@tonic-gate kmem_free(arg, sizeof (ip_taskq_arg_t)); 3490Sstevel@tonic-gate 3500Sstevel@tonic-gate sqs = intr_cpu->cpu_squeue_set; 3510Sstevel@tonic-gate 3520Sstevel@tonic-gate /* 3530Sstevel@tonic-gate * If this ill represents link aggregation, then there might be 3540Sstevel@tonic-gate * multiple NICs trying to register them selves at the same time 3550Sstevel@tonic-gate * and in order to ensure that test and assignment of free rings 3560Sstevel@tonic-gate * is sequential, we need to hold the ill_lock. 3570Sstevel@tonic-gate */ 3580Sstevel@tonic-gate mutex_enter(&ill->ill_lock); 3590Sstevel@tonic-gate mutex_enter(&sqs->sqs_lock); 3600Sstevel@tonic-gate for (i = 0; i < sqs->sqs_size; i++) { 3610Sstevel@tonic-gate mutex_enter(&sqs->sqs_list[i]->sq_lock); 3620Sstevel@tonic-gate if ((sqs->sqs_list[i]->sq_state & SQS_ILL_BOUND) == 0) { 3630Sstevel@tonic-gate sqp = sqs->sqs_list[i]; 3640Sstevel@tonic-gate break; 3650Sstevel@tonic-gate } 3660Sstevel@tonic-gate mutex_exit(&sqs->sqs_list[i]->sq_lock); 3670Sstevel@tonic-gate } 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate if (sqp == NULL) { 3700Sstevel@tonic-gate /* Need to create a new squeue */ 3710Sstevel@tonic-gate if (sqs->sqs_size == sqs->sqs_max_size) { 3720Sstevel@tonic-gate /* 3730Sstevel@tonic-gate * Reached the max limit for squeue 3740Sstevel@tonic-gate * we can allocate on this CPU. Leave 3750Sstevel@tonic-gate * ill_ring_state set to ILL_RING_INPROC 3760Sstevel@tonic-gate * so that ip_squeue_direct will just 3770Sstevel@tonic-gate * assign the default squeue for this 3780Sstevel@tonic-gate * ring for future connections. 3790Sstevel@tonic-gate */ 3800Sstevel@tonic-gate #ifdef DEBUG 3810Sstevel@tonic-gate cmn_err(CE_NOTE, "ip_squeue_add: Reached max " 3820Sstevel@tonic-gate " threads per CPU for sqp = %p\n", (void *)sqp); 3830Sstevel@tonic-gate #endif 3840Sstevel@tonic-gate mutex_exit(&sqs->sqs_lock); 3850Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 3860Sstevel@tonic-gate ill_waiter_dcr(ill); 3870Sstevel@tonic-gate return; 3880Sstevel@tonic-gate } 3890Sstevel@tonic-gate 3900Sstevel@tonic-gate bzero(sqname, sizeof (sqname)); 3910Sstevel@tonic-gate (void) snprintf(sqname, sizeof (sqname), 3920Sstevel@tonic-gate "ip_squeue_cpu_%d/%d/%d", CPU->cpu_seqid, 3930Sstevel@tonic-gate CPU->cpu_id, sqs->sqs_size); 3940Sstevel@tonic-gate 3950Sstevel@tonic-gate sqp = squeue_create(sqname, CPU->cpu_id, ip_squeue_worker_wait, 3960Sstevel@tonic-gate minclsyspri); 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate ASSERT(sqp != NULL); 3990Sstevel@tonic-gate 4000Sstevel@tonic-gate squeue_profile_enable(sqp); 4010Sstevel@tonic-gate sqs->sqs_list[sqs->sqs_size++] = sqp; 4020Sstevel@tonic-gate 4030Sstevel@tonic-gate if (ip_squeue_create_callback != NULL) 4040Sstevel@tonic-gate ip_squeue_create_callback(sqp); 4050Sstevel@tonic-gate 4060Sstevel@tonic-gate if (ip_squeue_bind) { 4070Sstevel@tonic-gate squeue_bind(sqp, -1); 4080Sstevel@tonic-gate } 4090Sstevel@tonic-gate mutex_enter(&sqp->sq_lock); 4100Sstevel@tonic-gate } 4110Sstevel@tonic-gate 4120Sstevel@tonic-gate ASSERT(sqp != NULL); 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate sqp->sq_rx_ring = ill_rx_ring; 4150Sstevel@tonic-gate ill_rx_ring->rr_sqp = sqp; 4160Sstevel@tonic-gate ill_rx_ring->rr_ring_state = ILL_RING_INUSE; 4170Sstevel@tonic-gate 4180Sstevel@tonic-gate sqp->sq_state |= (SQS_ILL_BOUND|SQS_POLL_CAPAB); 4190Sstevel@tonic-gate mutex_exit(&sqp->sq_lock); 4200Sstevel@tonic-gate mutex_exit(&sqs->sqs_lock); 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate /* ill_waiter_dcr will also signal any waiters on ill_ring_state */ 4250Sstevel@tonic-gate ill_waiter_dcr(ill); 4260Sstevel@tonic-gate } 4270Sstevel@tonic-gate 4280Sstevel@tonic-gate /* 4290Sstevel@tonic-gate * Find the squeue assigned to manage this Rx ring. If the Rx ring is not 4300Sstevel@tonic-gate * owned by a squeue yet, do the assignment. When the NIC registers it 4310Sstevel@tonic-gate * Rx rings with IP, we don't know where the interrupts will land and 4320Sstevel@tonic-gate * hence we need to wait till this point to do the assignment. 4330Sstevel@tonic-gate */ 4340Sstevel@tonic-gate squeue_t * 4350Sstevel@tonic-gate ip_squeue_get(ill_rx_ring_t *ill_rx_ring) 4360Sstevel@tonic-gate { 4370Sstevel@tonic-gate squeue_t *sqp; 4380Sstevel@tonic-gate ill_t *ill; 4390Sstevel@tonic-gate int interrupt; 4400Sstevel@tonic-gate ip_taskq_arg_t *taskq_arg; 4410Sstevel@tonic-gate boolean_t refheld; 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate if (ill_rx_ring == NULL) 4440Sstevel@tonic-gate return (IP_SQUEUE_GET(lbolt)); 4450Sstevel@tonic-gate 4460Sstevel@tonic-gate sqp = ill_rx_ring->rr_sqp; 4470Sstevel@tonic-gate /* 4480Sstevel@tonic-gate * Do a quick check. If it's not NULL, we are done. 4490Sstevel@tonic-gate * Squeues are never destroyed so worse we will bind 4500Sstevel@tonic-gate * this connection to a suboptimal squeue. 4510Sstevel@tonic-gate * 4520Sstevel@tonic-gate * This is the fast path case. 4530Sstevel@tonic-gate */ 4540Sstevel@tonic-gate if (sqp != NULL) 4550Sstevel@tonic-gate return (sqp); 4560Sstevel@tonic-gate 4570Sstevel@tonic-gate ill = ill_rx_ring->rr_ill; 4580Sstevel@tonic-gate ASSERT(ill != NULL); 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate interrupt = servicing_interrupt(); 4610Sstevel@tonic-gate taskq_arg = (ip_taskq_arg_t *)kmem_zalloc(sizeof (ip_taskq_arg_t), 4620Sstevel@tonic-gate KM_NOSLEEP); 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate mutex_enter(&ill->ill_lock); 4650Sstevel@tonic-gate if (!interrupt || ill_rx_ring->rr_ring_state != ILL_RING_INUSE || 4660Sstevel@tonic-gate taskq_arg == NULL) { 4670Sstevel@tonic-gate /* 4680Sstevel@tonic-gate * Do the ring to squeue binding only if we are in interrupt 4690Sstevel@tonic-gate * context and there is no one else trying the bind already. 4700Sstevel@tonic-gate */ 4710Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 4720Sstevel@tonic-gate if (taskq_arg != NULL) 4730Sstevel@tonic-gate kmem_free(taskq_arg, sizeof (ip_taskq_arg_t)); 4740Sstevel@tonic-gate return (IP_SQUEUE_GET(lbolt)); 4750Sstevel@tonic-gate } 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate /* 4780Sstevel@tonic-gate * No sqp assigned yet. Can't really do that in interrupt 4790Sstevel@tonic-gate * context. Assign the default sqp to this connection and 4800Sstevel@tonic-gate * trigger creation of new sqp and binding it to this ring 4810Sstevel@tonic-gate * via taskq. Need to make sure ill stays around. 4820Sstevel@tonic-gate */ 4830Sstevel@tonic-gate taskq_arg->ip_taskq_ill = ill; 4840Sstevel@tonic-gate taskq_arg->ip_taskq_ill_rx_ring = ill_rx_ring; 4850Sstevel@tonic-gate taskq_arg->ip_taskq_cpu = CPU; 4860Sstevel@tonic-gate ill_rx_ring->rr_ring_state = ILL_RING_INPROC; 4870Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 4880Sstevel@tonic-gate refheld = ill_waiter_inc(ill); 4890Sstevel@tonic-gate if (refheld) { 4900Sstevel@tonic-gate if (taskq_dispatch(system_taskq, ip_squeue_extend, 4910Sstevel@tonic-gate taskq_arg, TQ_NOSLEEP) != NULL) { 4920Sstevel@tonic-gate return (IP_SQUEUE_GET(lbolt)); 4930Sstevel@tonic-gate } 4940Sstevel@tonic-gate } 4950Sstevel@tonic-gate /* 4960Sstevel@tonic-gate * The ill is closing and we could not get a reference on the ill OR 4970Sstevel@tonic-gate * taskq_dispatch failed probably due to memory allocation failure. 4980Sstevel@tonic-gate * We will try again next time. 4990Sstevel@tonic-gate */ 5000Sstevel@tonic-gate mutex_enter(&ill->ill_lock); 5010Sstevel@tonic-gate ill_rx_ring->rr_ring_state = ILL_RING_INUSE; 5020Sstevel@tonic-gate mutex_exit(&ill->ill_lock); 5030Sstevel@tonic-gate kmem_free(taskq_arg, sizeof (ip_taskq_arg_t)); 5040Sstevel@tonic-gate if (refheld) 5050Sstevel@tonic-gate ill_waiter_dcr(ill); 5060Sstevel@tonic-gate 5070Sstevel@tonic-gate return (IP_SQUEUE_GET(lbolt)); 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate /* 5110Sstevel@tonic-gate * NDD hooks for setting ip_squeue_xxx tuneables. 5120Sstevel@tonic-gate */ 5130Sstevel@tonic-gate 5140Sstevel@tonic-gate /* ARGSUSED */ 5150Sstevel@tonic-gate int 5160Sstevel@tonic-gate ip_squeue_bind_set(queue_t *q, mblk_t *mp, char *value, 5170Sstevel@tonic-gate caddr_t addr, cred_t *cr) 5180Sstevel@tonic-gate { 5190Sstevel@tonic-gate int *bind_enabled = (int *)addr; 5200Sstevel@tonic-gate long new_value; 5210Sstevel@tonic-gate int i; 5220Sstevel@tonic-gate 5230Sstevel@tonic-gate if (ddi_strtol(value, NULL, 10, &new_value) != 0) 5240Sstevel@tonic-gate return (EINVAL); 5250Sstevel@tonic-gate 5260Sstevel@tonic-gate if (ip_squeue_bind == new_value) 5270Sstevel@tonic-gate return (0); 5280Sstevel@tonic-gate 5290Sstevel@tonic-gate *bind_enabled = new_value; 5300Sstevel@tonic-gate mutex_enter(&cpu_lock); 5310Sstevel@tonic-gate if (new_value == 0) { 5320Sstevel@tonic-gate for (i = 0; i < sqset_global_size; i++) 5330Sstevel@tonic-gate ip_squeue_set_unbind(sqset_global_list[i]); 5340Sstevel@tonic-gate } else { 5350Sstevel@tonic-gate for (i = 0; i < sqset_global_size; i++) 5360Sstevel@tonic-gate ip_squeue_set_bind(sqset_global_list[i]); 5370Sstevel@tonic-gate } 5380Sstevel@tonic-gate 5390Sstevel@tonic-gate mutex_exit(&cpu_lock); 5400Sstevel@tonic-gate return (0); 5410Sstevel@tonic-gate } 5420Sstevel@tonic-gate 5430Sstevel@tonic-gate /* 5440Sstevel@tonic-gate * Set squeue profiling. 5450Sstevel@tonic-gate * 0 means "disable" 5460Sstevel@tonic-gate * 1 means "enable" 5470Sstevel@tonic-gate * 2 means "enable and reset" 5480Sstevel@tonic-gate */ 5490Sstevel@tonic-gate /* ARGSUSED */ 5500Sstevel@tonic-gate int 5510Sstevel@tonic-gate ip_squeue_profile_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 5520Sstevel@tonic-gate cred_t *cr) 5530Sstevel@tonic-gate { 5540Sstevel@tonic-gate int *profile_enabled = (int *)cp; 5550Sstevel@tonic-gate long new_value; 5560Sstevel@tonic-gate squeue_set_t *sqs; 5570Sstevel@tonic-gate 5580Sstevel@tonic-gate if (ddi_strtol(value, NULL, 10, &new_value) != 0) 5590Sstevel@tonic-gate return (EINVAL); 5600Sstevel@tonic-gate 5610Sstevel@tonic-gate if (new_value == 0) 5620Sstevel@tonic-gate squeue_profile_stop(); 5630Sstevel@tonic-gate else if (new_value == 1) 5640Sstevel@tonic-gate squeue_profile_start(); 5650Sstevel@tonic-gate else if (new_value == 2) { 5660Sstevel@tonic-gate int i, j; 5670Sstevel@tonic-gate 5680Sstevel@tonic-gate squeue_profile_stop(); 5690Sstevel@tonic-gate mutex_enter(&cpu_lock); 5700Sstevel@tonic-gate for (i = 0; i < sqset_global_size; i++) { 5710Sstevel@tonic-gate sqs = sqset_global_list[i]; 5720Sstevel@tonic-gate for (j = 0; j < sqs->sqs_size; j++) { 5730Sstevel@tonic-gate squeue_profile_reset(sqs->sqs_list[j]); 5740Sstevel@tonic-gate } 5750Sstevel@tonic-gate } 5760Sstevel@tonic-gate mutex_exit(&cpu_lock); 5770Sstevel@tonic-gate 5780Sstevel@tonic-gate new_value = 1; 5790Sstevel@tonic-gate squeue_profile_start(); 5800Sstevel@tonic-gate } 5810Sstevel@tonic-gate *profile_enabled = new_value; 5820Sstevel@tonic-gate 5830Sstevel@tonic-gate return (0); 5840Sstevel@tonic-gate } 5850Sstevel@tonic-gate 5860Sstevel@tonic-gate /* 5870Sstevel@tonic-gate * Reconfiguration callback 5880Sstevel@tonic-gate */ 5890Sstevel@tonic-gate 5900Sstevel@tonic-gate /* ARGSUSED */ 5910Sstevel@tonic-gate static int 5920Sstevel@tonic-gate ip_squeue_cpu_setup(cpu_setup_t what, int id, void *arg) 5930Sstevel@tonic-gate { 5940Sstevel@tonic-gate cpu_t *cp = cpu[id]; 5950Sstevel@tonic-gate 5960Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 5970Sstevel@tonic-gate switch (what) { 598*405Sakolb case CPU_CONFIG: 599*405Sakolb /* 600*405Sakolb * A new CPU is added. Create an squeue for it but do not bind 601*405Sakolb * it yet. 602*405Sakolb */ 603*405Sakolb if (cp->cpu_squeue_set == NULL) 604*405Sakolb cp->cpu_squeue_set = ip_squeue_set_create(cp, B_TRUE); 605*405Sakolb break; 6060Sstevel@tonic-gate case CPU_ON: 6070Sstevel@tonic-gate case CPU_INIT: 6080Sstevel@tonic-gate case CPU_CPUPART_IN: 6090Sstevel@tonic-gate if (cp->cpu_squeue_set == NULL) { 6100Sstevel@tonic-gate cp->cpu_squeue_set = ip_squeue_set_create(cp, B_TRUE); 6110Sstevel@tonic-gate } 6120Sstevel@tonic-gate if (ip_squeue_bind) 6130Sstevel@tonic-gate ip_squeue_set_bind(cp->cpu_squeue_set); 6140Sstevel@tonic-gate break; 6150Sstevel@tonic-gate case CPU_UNCONFIG: 6160Sstevel@tonic-gate case CPU_OFF: 6170Sstevel@tonic-gate case CPU_CPUPART_OUT: 6180Sstevel@tonic-gate ASSERT((cp->cpu_squeue_set != NULL) || 6190Sstevel@tonic-gate (cp->cpu_flags & CPU_OFFLINE)); 6200Sstevel@tonic-gate 6210Sstevel@tonic-gate if (cp->cpu_squeue_set != NULL) { 6220Sstevel@tonic-gate ip_squeue_set_unbind(cp->cpu_squeue_set); 6230Sstevel@tonic-gate } 6240Sstevel@tonic-gate break; 6250Sstevel@tonic-gate default: 6260Sstevel@tonic-gate break; 6270Sstevel@tonic-gate } 6280Sstevel@tonic-gate return (0); 6290Sstevel@tonic-gate } 6300Sstevel@tonic-gate 6310Sstevel@tonic-gate /* ARGSUSED */ 6320Sstevel@tonic-gate static void 6330Sstevel@tonic-gate ip_squeue_set_bind(squeue_set_t *sqs) 6340Sstevel@tonic-gate { 6350Sstevel@tonic-gate int i; 6360Sstevel@tonic-gate squeue_t *sqp; 6370Sstevel@tonic-gate 6380Sstevel@tonic-gate if (!ip_squeue_bind) 6390Sstevel@tonic-gate return; 6400Sstevel@tonic-gate 6410Sstevel@tonic-gate mutex_enter(&sqs->sqs_lock); 6420Sstevel@tonic-gate for (i = 0; i < sqs->sqs_size; i++) { 6430Sstevel@tonic-gate sqp = sqs->sqs_list[i]; 6440Sstevel@tonic-gate if (sqp->sq_state & SQS_BOUND) 6450Sstevel@tonic-gate continue; 6460Sstevel@tonic-gate squeue_bind(sqp, -1); 6470Sstevel@tonic-gate } 6480Sstevel@tonic-gate mutex_exit(&sqs->sqs_lock); 6490Sstevel@tonic-gate } 6500Sstevel@tonic-gate 6510Sstevel@tonic-gate static void 6520Sstevel@tonic-gate ip_squeue_set_unbind(squeue_set_t *sqs) 6530Sstevel@tonic-gate { 6540Sstevel@tonic-gate int i; 6550Sstevel@tonic-gate squeue_t *sqp; 6560Sstevel@tonic-gate 6570Sstevel@tonic-gate mutex_enter(&sqs->sqs_lock); 6580Sstevel@tonic-gate for (i = 0; i < sqs->sqs_size; i++) { 6590Sstevel@tonic-gate sqp = sqs->sqs_list[i]; 6600Sstevel@tonic-gate if (!(sqp->sq_state & SQS_BOUND)) 6610Sstevel@tonic-gate continue; 6620Sstevel@tonic-gate squeue_unbind(sqp); 6630Sstevel@tonic-gate } 6640Sstevel@tonic-gate mutex_exit(&sqs->sqs_lock); 6650Sstevel@tonic-gate } 666