10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 58138SVallish.Vaidyeshwara@Sun.COM * Common Development and Distribution License (the "License"). 68138SVallish.Vaidyeshwara@Sun.COM * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 228138SVallish.Vaidyeshwara@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #include <thread.h> 270Sstevel@tonic-gate #include <stdlib.h> 280Sstevel@tonic-gate #include <errno.h> 290Sstevel@tonic-gate #include <strings.h> 300Sstevel@tonic-gate #include <tiuser.h> 310Sstevel@tonic-gate #include <syslog.h> 320Sstevel@tonic-gate #include <zone.h> 330Sstevel@tonic-gate #include <sys/priocntl.h> 340Sstevel@tonic-gate #include <sys/fxpriocntl.h> 350Sstevel@tonic-gate #include <nfs/nfs.h> 360Sstevel@tonic-gate #include <nfs/nfssys.h> 370Sstevel@tonic-gate #include "thrpool.h" 380Sstevel@tonic-gate 390Sstevel@tonic-gate extern int _nfssys(int, void *); 400Sstevel@tonic-gate 410Sstevel@tonic-gate /* 420Sstevel@tonic-gate * Thread to call into the kernel and do work on behalf of NFS. 430Sstevel@tonic-gate */ 440Sstevel@tonic-gate static void * 450Sstevel@tonic-gate svcstart(void *arg) 460Sstevel@tonic-gate { 470Sstevel@tonic-gate int id = (int)arg; 480Sstevel@tonic-gate 49*8139SVallish.Vaidyeshwara@Sun.COM /* 50*8139SVallish.Vaidyeshwara@Sun.COM * Create a kernel worker thread to service 51*8139SVallish.Vaidyeshwara@Sun.COM * new incoming requests on a pool. 52*8139SVallish.Vaidyeshwara@Sun.COM */ 53*8139SVallish.Vaidyeshwara@Sun.COM _nfssys(SVCPOOL_RUN, &id); 540Sstevel@tonic-gate 550Sstevel@tonic-gate /* 56*8139SVallish.Vaidyeshwara@Sun.COM * Returned from the kernel, this thread's work is done, 57*8139SVallish.Vaidyeshwara@Sun.COM * and it should exit. For new incoming requests, 58*8139SVallish.Vaidyeshwara@Sun.COM * svcblock() will spawn another worker thread by 59*8139SVallish.Vaidyeshwara@Sun.COM * calling svcstart() again. 600Sstevel@tonic-gate */ 610Sstevel@tonic-gate thr_exit(NULL); 620Sstevel@tonic-gate return (NULL); 630Sstevel@tonic-gate } 640Sstevel@tonic-gate 650Sstevel@tonic-gate static void * 660Sstevel@tonic-gate svc_rdma_creator(void *arg) 670Sstevel@tonic-gate { 680Sstevel@tonic-gate struct rdma_svc_args *rsap = (struct rdma_svc_args *)arg; 690Sstevel@tonic-gate 708138SVallish.Vaidyeshwara@Sun.COM if (_nfssys(RDMA_SVC_INIT, rsap) < 0) { 718138SVallish.Vaidyeshwara@Sun.COM if (errno != ENODEV) { 720Sstevel@tonic-gate (void) syslog(LOG_INFO, "RDMA transport startup " 730Sstevel@tonic-gate "failed with %m"); 740Sstevel@tonic-gate } 750Sstevel@tonic-gate } 760Sstevel@tonic-gate free(rsap); 770Sstevel@tonic-gate thr_exit(NULL); 780Sstevel@tonic-gate return (NULL); 790Sstevel@tonic-gate } 800Sstevel@tonic-gate 810Sstevel@tonic-gate /* 820Sstevel@tonic-gate * User-space "creator" thread. This thread blocks in the kernel 830Sstevel@tonic-gate * until new worker threads need to be created for the service 840Sstevel@tonic-gate * pool. On return to userspace, if there is no error, create a 850Sstevel@tonic-gate * new thread for the service pool. 860Sstevel@tonic-gate */ 870Sstevel@tonic-gate static void * 880Sstevel@tonic-gate svcblock(void *arg) 890Sstevel@tonic-gate { 900Sstevel@tonic-gate int id = (int)arg; 910Sstevel@tonic-gate 920Sstevel@tonic-gate /* CONSTCOND */ 930Sstevel@tonic-gate while (1) { 940Sstevel@tonic-gate thread_t tid; 950Sstevel@tonic-gate 960Sstevel@tonic-gate /* 970Sstevel@tonic-gate * Call into the kernel, and hang out there 980Sstevel@tonic-gate * until a thread needs to be created. 990Sstevel@tonic-gate */ 1008138SVallish.Vaidyeshwara@Sun.COM if (_nfssys(SVCPOOL_WAIT, &id) < 0) { 101*8139SVallish.Vaidyeshwara@Sun.COM if (errno == ECANCELED || errno == EINTR || 102*8139SVallish.Vaidyeshwara@Sun.COM errno == EBUSY) 1030Sstevel@tonic-gate /* 104*8139SVallish.Vaidyeshwara@Sun.COM * If we get back ECANCELED or EINTR, 105*8139SVallish.Vaidyeshwara@Sun.COM * the service pool is exiting, and we 106*8139SVallish.Vaidyeshwara@Sun.COM * may as well clean up this thread. If 107*8139SVallish.Vaidyeshwara@Sun.COM * EBUSY is returned, there's already a 108*8139SVallish.Vaidyeshwara@Sun.COM * thread looping on this pool, so we 109*8139SVallish.Vaidyeshwara@Sun.COM * should give up. 1100Sstevel@tonic-gate */ 1110Sstevel@tonic-gate break; 1120Sstevel@tonic-gate else 1130Sstevel@tonic-gate continue; 1140Sstevel@tonic-gate } 1150Sstevel@tonic-gate 1160Sstevel@tonic-gate /* 1170Sstevel@tonic-gate * User portion of the thread does no real work since 1180Sstevel@tonic-gate * the svcpool threads actually spend their entire 1190Sstevel@tonic-gate * lives in the kernel. So, user portion of the thread 1200Sstevel@tonic-gate * should have the smallest stack possible. 1210Sstevel@tonic-gate */ 1220Sstevel@tonic-gate (void) thr_create(NULL, THR_MIN_STACK, svcstart, (void *)id, 1230Sstevel@tonic-gate THR_BOUND | THR_DETACHED, &tid); 1240Sstevel@tonic-gate } 1250Sstevel@tonic-gate 1260Sstevel@tonic-gate thr_exit(NULL); 1270Sstevel@tonic-gate return (NULL); 1280Sstevel@tonic-gate } 1290Sstevel@tonic-gate 1300Sstevel@tonic-gate void 1310Sstevel@tonic-gate svcsetprio(void) 1320Sstevel@tonic-gate { 1330Sstevel@tonic-gate pcinfo_t pcinfo; 1340Sstevel@tonic-gate pri_t maxupri; 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate /* 1370Sstevel@tonic-gate * By default, all threads should be part of the FX scheduler 1380Sstevel@tonic-gate * class. As nfsd/lockd server threads used to be part of the 1390Sstevel@tonic-gate * kernel, they're used to being scheduled in the SYS class. 1400Sstevel@tonic-gate * Userland threads shouldn't be in SYS, but they can be given a 1410Sstevel@tonic-gate * higher priority by default. This change still renders nfsd/lockd 1420Sstevel@tonic-gate * managable by an admin by utilizing commands to change scheduling 1430Sstevel@tonic-gate * manually, or by using resource management tools such as pools 1440Sstevel@tonic-gate * to associate them with a different scheduling class and segregate 1450Sstevel@tonic-gate * the workload. 1460Sstevel@tonic-gate * 1470Sstevel@tonic-gate * We set the threads' priority to the upper bound for priorities 1480Sstevel@tonic-gate * in FX. This should be 60, but since the desired action is to 1490Sstevel@tonic-gate * make nfsd/lockd more important than TS threads, we bow to the 1500Sstevel@tonic-gate * system's knowledge rather than setting it manually. Furthermore, 1510Sstevel@tonic-gate * since the SYS class doesn't timeslice, use an "infinite" quantum. 1520Sstevel@tonic-gate * If anything fails, just log the failure and let the daemon 1530Sstevel@tonic-gate * default to TS. 1540Sstevel@tonic-gate * 1550Sstevel@tonic-gate * The change of scheduling class is expected to fail in a non-global 1560Sstevel@tonic-gate * zone, so we avoid worrying the zone administrator unnecessarily. 1570Sstevel@tonic-gate */ 1580Sstevel@tonic-gate (void) strcpy(pcinfo.pc_clname, "FX"); 1590Sstevel@tonic-gate if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) != -1) { 1600Sstevel@tonic-gate maxupri = ((fxinfo_t *)pcinfo.pc_clinfo)->fx_maxupri; 1610Sstevel@tonic-gate if (priocntl(P_LWPID, P_MYID, PC_SETXPARMS, "FX", 1620Sstevel@tonic-gate FX_KY_UPRILIM, maxupri, FX_KY_UPRI, maxupri, 1630Sstevel@tonic-gate FX_KY_TQNSECS, FX_TQINF, NULL) != 0 && 1640Sstevel@tonic-gate getzoneid() == GLOBAL_ZONEID) 1650Sstevel@tonic-gate (void) syslog(LOG_ERR, "Unable to use FX scheduler: " 1660Sstevel@tonic-gate "%m. Using system default scheduler."); 1670Sstevel@tonic-gate } else 1680Sstevel@tonic-gate (void) syslog(LOG_ERR, "Unable to determine parameters " 1690Sstevel@tonic-gate "for FX scheduler. Using system default scheduler."); 1700Sstevel@tonic-gate } 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate int 1730Sstevel@tonic-gate svcrdma(int id, int versmin, int versmax, int delegation) 1740Sstevel@tonic-gate { 1750Sstevel@tonic-gate thread_t tid; 1760Sstevel@tonic-gate struct rdma_svc_args *rsa; 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate rsa = (struct rdma_svc_args *)malloc(sizeof (struct rdma_svc_args)); 1790Sstevel@tonic-gate rsa->poolid = (uint32_t)id; 1800Sstevel@tonic-gate rsa->netid = NULL; 1810Sstevel@tonic-gate rsa->nfs_versmin = versmin; 1820Sstevel@tonic-gate rsa->nfs_versmax = versmax; 1830Sstevel@tonic-gate rsa->delegation = delegation; 1840Sstevel@tonic-gate 1850Sstevel@tonic-gate /* 1860Sstevel@tonic-gate * Create a thread to handle RDMA start and stop. 1870Sstevel@tonic-gate */ 1880Sstevel@tonic-gate if (thr_create(NULL, THR_MIN_STACK * 2, svc_rdma_creator, (void *)rsa, 1890Sstevel@tonic-gate THR_BOUND | THR_DETACHED, &tid)) 1900Sstevel@tonic-gate return (1); 1910Sstevel@tonic-gate 1920Sstevel@tonic-gate return (0); 1930Sstevel@tonic-gate } 1940Sstevel@tonic-gate 1950Sstevel@tonic-gate int 1960Sstevel@tonic-gate svcwait(int id) 1970Sstevel@tonic-gate { 1980Sstevel@tonic-gate thread_t tid; 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate /* 2010Sstevel@tonic-gate * Create a bound thread to wait for kernel LWPs that 2020Sstevel@tonic-gate * need to be created. This thread also has little need 2030Sstevel@tonic-gate * of stackspace, so should be created with that in mind. 2040Sstevel@tonic-gate */ 2050Sstevel@tonic-gate if (thr_create(NULL, THR_MIN_STACK * 2, svcblock, (void *)id, 2060Sstevel@tonic-gate THR_BOUND | THR_DETACHED, &tid)) 2070Sstevel@tonic-gate return (1); 2080Sstevel@tonic-gate 2090Sstevel@tonic-gate return (0); 2100Sstevel@tonic-gate } 211