xref: /onnv-gate/usr/src/cmd/fs.d/nfs/lib/thrpool.c (revision 8138)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*8138SVallish.Vaidyeshwara@Sun.COM  * Common Development and Distribution License (the "License").
6*8138SVallish.Vaidyeshwara@Sun.COM  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*8138SVallish.Vaidyeshwara@Sun.COM  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <thread.h>
270Sstevel@tonic-gate #include <stdlib.h>
280Sstevel@tonic-gate #include <errno.h>
290Sstevel@tonic-gate #include <strings.h>
300Sstevel@tonic-gate #include <tiuser.h>
310Sstevel@tonic-gate #include <syslog.h>
320Sstevel@tonic-gate #include <zone.h>
330Sstevel@tonic-gate #include <sys/priocntl.h>
340Sstevel@tonic-gate #include <sys/fxpriocntl.h>
350Sstevel@tonic-gate #include <nfs/nfs.h>
360Sstevel@tonic-gate #include <nfs/nfssys.h>
370Sstevel@tonic-gate #include "thrpool.h"
380Sstevel@tonic-gate 
390Sstevel@tonic-gate extern	int	_nfssys(int, void *);
400Sstevel@tonic-gate 
410Sstevel@tonic-gate /*
420Sstevel@tonic-gate  * Thread to call into the kernel and do work on behalf of NFS.
430Sstevel@tonic-gate  */
440Sstevel@tonic-gate static void *
450Sstevel@tonic-gate svcstart(void *arg)
460Sstevel@tonic-gate {
470Sstevel@tonic-gate 	int id = (int)arg;
480Sstevel@tonic-gate 
49*8138SVallish.Vaidyeshwara@Sun.COM 	while (_nfssys(SVCPOOL_RUN, &id) < 0) {
500Sstevel@tonic-gate 		/*
510Sstevel@tonic-gate 		 * Interrupted by a signal while in the kernel.
520Sstevel@tonic-gate 		 * this process is still alive, try again.
530Sstevel@tonic-gate 		 */
54*8138SVallish.Vaidyeshwara@Sun.COM 		if (errno == EINTR)
550Sstevel@tonic-gate 			continue;
560Sstevel@tonic-gate 		else
570Sstevel@tonic-gate 			break;
580Sstevel@tonic-gate 	}
590Sstevel@tonic-gate 
600Sstevel@tonic-gate 	/*
610Sstevel@tonic-gate 	 * If we weren't interrupted by a signal, but did
620Sstevel@tonic-gate 	 * return from the kernel, this thread's work is done,
630Sstevel@tonic-gate 	 * and it should exit.
640Sstevel@tonic-gate 	 */
650Sstevel@tonic-gate 	thr_exit(NULL);
660Sstevel@tonic-gate 	return (NULL);
670Sstevel@tonic-gate }
680Sstevel@tonic-gate 
690Sstevel@tonic-gate static void *
700Sstevel@tonic-gate svc_rdma_creator(void *arg)
710Sstevel@tonic-gate {
720Sstevel@tonic-gate 	struct rdma_svc_args *rsap = (struct rdma_svc_args *)arg;
730Sstevel@tonic-gate 
74*8138SVallish.Vaidyeshwara@Sun.COM 	if (_nfssys(RDMA_SVC_INIT, rsap) < 0) {
75*8138SVallish.Vaidyeshwara@Sun.COM 		if (errno != ENODEV) {
760Sstevel@tonic-gate 			(void) syslog(LOG_INFO, "RDMA transport startup "
770Sstevel@tonic-gate 			    "failed with %m");
780Sstevel@tonic-gate 		}
790Sstevel@tonic-gate 	}
800Sstevel@tonic-gate 	free(rsap);
810Sstevel@tonic-gate 	thr_exit(NULL);
820Sstevel@tonic-gate 	return (NULL);
830Sstevel@tonic-gate }
840Sstevel@tonic-gate 
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  * User-space "creator" thread. This thread blocks in the kernel
870Sstevel@tonic-gate  * until new worker threads need to be created for the service
880Sstevel@tonic-gate  * pool. On return to userspace, if there is no error, create a
890Sstevel@tonic-gate  * new thread for the service pool.
900Sstevel@tonic-gate  */
910Sstevel@tonic-gate static void *
920Sstevel@tonic-gate svcblock(void *arg)
930Sstevel@tonic-gate {
940Sstevel@tonic-gate 	int id = (int)arg;
950Sstevel@tonic-gate 
960Sstevel@tonic-gate 	/* CONSTCOND */
970Sstevel@tonic-gate 	while (1) {
980Sstevel@tonic-gate 		thread_t tid;
990Sstevel@tonic-gate 
1000Sstevel@tonic-gate 		/*
1010Sstevel@tonic-gate 		 * Call into the kernel, and hang out there
1020Sstevel@tonic-gate 		 * until a thread needs to be created.
1030Sstevel@tonic-gate 		 */
104*8138SVallish.Vaidyeshwara@Sun.COM 		if (_nfssys(SVCPOOL_WAIT, &id) < 0) {
105*8138SVallish.Vaidyeshwara@Sun.COM 			if (errno == ECANCELED || errno == EBUSY)
1060Sstevel@tonic-gate 				/*
1070Sstevel@tonic-gate 				 * If we get back ECANCELED, the service
1080Sstevel@tonic-gate 				 * pool is exiting, and we may as well
1090Sstevel@tonic-gate 				 * clean up this thread. If EBUSY is
1100Sstevel@tonic-gate 				 * returned, there's already a thread
1110Sstevel@tonic-gate 				 * looping on this pool, so we should
1120Sstevel@tonic-gate 				 * give up.
1130Sstevel@tonic-gate 				 */
1140Sstevel@tonic-gate 				break;
1150Sstevel@tonic-gate 			else
1160Sstevel@tonic-gate 				continue;
1170Sstevel@tonic-gate 		}
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate 		/*
1200Sstevel@tonic-gate 		 * User portion of the thread does no real work since
1210Sstevel@tonic-gate 		 * the svcpool threads actually spend their entire
1220Sstevel@tonic-gate 		 * lives in the kernel. So, user portion of the thread
1230Sstevel@tonic-gate 		 * should have the smallest stack possible.
1240Sstevel@tonic-gate 		 */
1250Sstevel@tonic-gate 		(void) thr_create(NULL, THR_MIN_STACK, svcstart, (void *)id,
1260Sstevel@tonic-gate 		    THR_BOUND | THR_DETACHED, &tid);
1270Sstevel@tonic-gate 	}
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate 	thr_exit(NULL);
1300Sstevel@tonic-gate 	return (NULL);
1310Sstevel@tonic-gate }
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate void
1340Sstevel@tonic-gate svcsetprio(void)
1350Sstevel@tonic-gate {
1360Sstevel@tonic-gate 	pcinfo_t pcinfo;
1370Sstevel@tonic-gate 	pri_t maxupri;
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 	/*
1400Sstevel@tonic-gate 	 * By default, all threads should be part of the FX scheduler
1410Sstevel@tonic-gate 	 * class. As nfsd/lockd server threads used to be part of the
1420Sstevel@tonic-gate 	 * kernel, they're used to being scheduled in the SYS class.
1430Sstevel@tonic-gate 	 * Userland threads shouldn't be in SYS, but they can be given a
1440Sstevel@tonic-gate 	 * higher priority by default. This change still renders nfsd/lockd
1450Sstevel@tonic-gate 	 * managable by an admin by utilizing commands to change scheduling
1460Sstevel@tonic-gate 	 * manually, or by using resource management tools such as pools
1470Sstevel@tonic-gate 	 * to associate them with a different scheduling class and segregate
1480Sstevel@tonic-gate 	 * the workload.
1490Sstevel@tonic-gate 	 *
1500Sstevel@tonic-gate 	 * We set the threads' priority to the upper bound for priorities
1510Sstevel@tonic-gate 	 * in FX. This should be 60, but since the desired action is to
1520Sstevel@tonic-gate 	 * make nfsd/lockd more important than TS threads, we bow to the
1530Sstevel@tonic-gate 	 * system's knowledge rather than setting it manually. Furthermore,
1540Sstevel@tonic-gate 	 * since the SYS class doesn't timeslice, use an "infinite" quantum.
1550Sstevel@tonic-gate 	 * If anything fails, just log the failure and let the daemon
1560Sstevel@tonic-gate 	 * default to TS.
1570Sstevel@tonic-gate 	 *
1580Sstevel@tonic-gate 	 * The change of scheduling class is expected to fail in a non-global
1590Sstevel@tonic-gate 	 * zone, so we avoid worrying the zone administrator unnecessarily.
1600Sstevel@tonic-gate 	 */
1610Sstevel@tonic-gate 	(void) strcpy(pcinfo.pc_clname, "FX");
1620Sstevel@tonic-gate 	if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) != -1) {
1630Sstevel@tonic-gate 		maxupri = ((fxinfo_t *)pcinfo.pc_clinfo)->fx_maxupri;
1640Sstevel@tonic-gate 		if (priocntl(P_LWPID, P_MYID, PC_SETXPARMS, "FX",
1650Sstevel@tonic-gate 		    FX_KY_UPRILIM, maxupri, FX_KY_UPRI, maxupri,
1660Sstevel@tonic-gate 		    FX_KY_TQNSECS, FX_TQINF, NULL) != 0 &&
1670Sstevel@tonic-gate 		    getzoneid() == GLOBAL_ZONEID)
1680Sstevel@tonic-gate 			(void) syslog(LOG_ERR, "Unable to use FX scheduler: "
1690Sstevel@tonic-gate 			    "%m. Using system default scheduler.");
1700Sstevel@tonic-gate 	} else
1710Sstevel@tonic-gate 		(void) syslog(LOG_ERR, "Unable to determine parameters "
1720Sstevel@tonic-gate 		    "for FX scheduler. Using system default scheduler.");
1730Sstevel@tonic-gate }
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate int
1760Sstevel@tonic-gate svcrdma(int id, int versmin, int versmax, int delegation)
1770Sstevel@tonic-gate {
1780Sstevel@tonic-gate 	thread_t tid;
1790Sstevel@tonic-gate 	struct rdma_svc_args *rsa;
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate 	rsa = (struct rdma_svc_args *)malloc(sizeof (struct rdma_svc_args));
1820Sstevel@tonic-gate 	rsa->poolid = (uint32_t)id;
1830Sstevel@tonic-gate 	rsa->netid = NULL;
1840Sstevel@tonic-gate 	rsa->nfs_versmin = versmin;
1850Sstevel@tonic-gate 	rsa->nfs_versmax = versmax;
1860Sstevel@tonic-gate 	rsa->delegation = delegation;
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate 	/*
1890Sstevel@tonic-gate 	 * Create a thread to handle RDMA start and stop.
1900Sstevel@tonic-gate 	 */
1910Sstevel@tonic-gate 	if (thr_create(NULL, THR_MIN_STACK * 2, svc_rdma_creator, (void *)rsa,
1920Sstevel@tonic-gate 	    THR_BOUND | THR_DETACHED, &tid))
1930Sstevel@tonic-gate 		return (1);
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate 	return (0);
1960Sstevel@tonic-gate }
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate int
1990Sstevel@tonic-gate svcwait(int id)
2000Sstevel@tonic-gate {
2010Sstevel@tonic-gate 	thread_t tid;
2020Sstevel@tonic-gate 
2030Sstevel@tonic-gate 	/*
2040Sstevel@tonic-gate 	 * Create a bound thread to wait for kernel LWPs that
2050Sstevel@tonic-gate 	 * need to be created. This thread also has little need
2060Sstevel@tonic-gate 	 * of stackspace, so should be created with that in mind.
2070Sstevel@tonic-gate 	 */
2080Sstevel@tonic-gate 	if (thr_create(NULL, THR_MIN_STACK * 2, svcblock, (void *)id,
2090Sstevel@tonic-gate 	    THR_BOUND | THR_DETACHED, &tid))
2100Sstevel@tonic-gate 		return (1);
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate 	return (0);
2130Sstevel@tonic-gate }
214