1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <thread.h>
30*0Sstevel@tonic-gate #include <stdlib.h>
31*0Sstevel@tonic-gate #include <errno.h>
32*0Sstevel@tonic-gate #include <strings.h>
33*0Sstevel@tonic-gate #include <tiuser.h>
34*0Sstevel@tonic-gate #include <syslog.h>
35*0Sstevel@tonic-gate #include <zone.h>
36*0Sstevel@tonic-gate #include <sys/priocntl.h>
37*0Sstevel@tonic-gate #include <sys/fxpriocntl.h>
38*0Sstevel@tonic-gate #include <nfs/nfs.h>
39*0Sstevel@tonic-gate #include <nfs/nfssys.h>
40*0Sstevel@tonic-gate #include "thrpool.h"
41*0Sstevel@tonic-gate 
42*0Sstevel@tonic-gate extern	int	_nfssys(int, void *);
43*0Sstevel@tonic-gate 
44*0Sstevel@tonic-gate /*
45*0Sstevel@tonic-gate  * Thread to call into the kernel and do work on behalf of NFS.
46*0Sstevel@tonic-gate  */
47*0Sstevel@tonic-gate static void *
48*0Sstevel@tonic-gate svcstart(void *arg)
49*0Sstevel@tonic-gate {
50*0Sstevel@tonic-gate 	int id = (int)arg;
51*0Sstevel@tonic-gate 	int err;
52*0Sstevel@tonic-gate 
53*0Sstevel@tonic-gate 	while ((err = _nfssys(SVCPOOL_RUN, &id)) != 0) {
54*0Sstevel@tonic-gate 		/*
55*0Sstevel@tonic-gate 		 * Interrupted by a signal while in the kernel.
56*0Sstevel@tonic-gate 		 * this process is still alive, try again.
57*0Sstevel@tonic-gate 		 */
58*0Sstevel@tonic-gate 		if (err == EINTR)
59*0Sstevel@tonic-gate 			continue;
60*0Sstevel@tonic-gate 		else
61*0Sstevel@tonic-gate 			break;
62*0Sstevel@tonic-gate 	}
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate 	/*
65*0Sstevel@tonic-gate 	 * If we weren't interrupted by a signal, but did
66*0Sstevel@tonic-gate 	 * return from the kernel, this thread's work is done,
67*0Sstevel@tonic-gate 	 * and it should exit.
68*0Sstevel@tonic-gate 	 */
69*0Sstevel@tonic-gate 	thr_exit(NULL);
70*0Sstevel@tonic-gate 	return (NULL);
71*0Sstevel@tonic-gate }
72*0Sstevel@tonic-gate 
73*0Sstevel@tonic-gate static void *
74*0Sstevel@tonic-gate svc_rdma_creator(void *arg)
75*0Sstevel@tonic-gate {
76*0Sstevel@tonic-gate 	int error = 0;
77*0Sstevel@tonic-gate 	struct rdma_svc_args *rsap = (struct rdma_svc_args *)arg;
78*0Sstevel@tonic-gate 
79*0Sstevel@tonic-gate 	if (error = _nfssys(RDMA_SVC_INIT, rsap)) {
80*0Sstevel@tonic-gate 		if (error != ENODEV) {
81*0Sstevel@tonic-gate 			(void) syslog(LOG_INFO, "RDMA transport startup "
82*0Sstevel@tonic-gate 			    "failed with %m");
83*0Sstevel@tonic-gate 		}
84*0Sstevel@tonic-gate 	}
85*0Sstevel@tonic-gate 	free(rsap);
86*0Sstevel@tonic-gate 	thr_exit(NULL);
87*0Sstevel@tonic-gate 	return (NULL);
88*0Sstevel@tonic-gate }
89*0Sstevel@tonic-gate 
90*0Sstevel@tonic-gate /*
91*0Sstevel@tonic-gate  * User-space "creator" thread. This thread blocks in the kernel
92*0Sstevel@tonic-gate  * until new worker threads need to be created for the service
93*0Sstevel@tonic-gate  * pool. On return to userspace, if there is no error, create a
94*0Sstevel@tonic-gate  * new thread for the service pool.
95*0Sstevel@tonic-gate  */
96*0Sstevel@tonic-gate static void *
97*0Sstevel@tonic-gate svcblock(void *arg)
98*0Sstevel@tonic-gate {
99*0Sstevel@tonic-gate 	int id = (int)arg;
100*0Sstevel@tonic-gate 
101*0Sstevel@tonic-gate 	/* CONSTCOND */
102*0Sstevel@tonic-gate 	while (1) {
103*0Sstevel@tonic-gate 		thread_t tid;
104*0Sstevel@tonic-gate 		int err;
105*0Sstevel@tonic-gate 
106*0Sstevel@tonic-gate 		/*
107*0Sstevel@tonic-gate 		 * Call into the kernel, and hang out there
108*0Sstevel@tonic-gate 		 * until a thread needs to be created.
109*0Sstevel@tonic-gate 		 */
110*0Sstevel@tonic-gate 		if (err = _nfssys(SVCPOOL_WAIT, &id)) {
111*0Sstevel@tonic-gate 			if (err == ECANCELED || err == EBUSY)
112*0Sstevel@tonic-gate 				/*
113*0Sstevel@tonic-gate 				 * If we get back ECANCELED, the service
114*0Sstevel@tonic-gate 				 * pool is exiting, and we may as well
115*0Sstevel@tonic-gate 				 * clean up this thread. If EBUSY is
116*0Sstevel@tonic-gate 				 * returned, there's already a thread
117*0Sstevel@tonic-gate 				 * looping on this pool, so we should
118*0Sstevel@tonic-gate 				 * give up.
119*0Sstevel@tonic-gate 				 */
120*0Sstevel@tonic-gate 				break;
121*0Sstevel@tonic-gate 			else
122*0Sstevel@tonic-gate 				continue;
123*0Sstevel@tonic-gate 		}
124*0Sstevel@tonic-gate 
125*0Sstevel@tonic-gate 		/*
126*0Sstevel@tonic-gate 		 * User portion of the thread does no real work since
127*0Sstevel@tonic-gate 		 * the svcpool threads actually spend their entire
128*0Sstevel@tonic-gate 		 * lives in the kernel. So, user portion of the thread
129*0Sstevel@tonic-gate 		 * should have the smallest stack possible.
130*0Sstevel@tonic-gate 		 */
131*0Sstevel@tonic-gate 		(void) thr_create(NULL, THR_MIN_STACK, svcstart, (void *)id,
132*0Sstevel@tonic-gate 		    THR_BOUND | THR_DETACHED, &tid);
133*0Sstevel@tonic-gate 	}
134*0Sstevel@tonic-gate 
135*0Sstevel@tonic-gate 	thr_exit(NULL);
136*0Sstevel@tonic-gate 	return (NULL);
137*0Sstevel@tonic-gate }
138*0Sstevel@tonic-gate 
139*0Sstevel@tonic-gate void
140*0Sstevel@tonic-gate svcsetprio(void)
141*0Sstevel@tonic-gate {
142*0Sstevel@tonic-gate 	pcinfo_t pcinfo;
143*0Sstevel@tonic-gate 	pri_t maxupri;
144*0Sstevel@tonic-gate 
145*0Sstevel@tonic-gate 	/*
146*0Sstevel@tonic-gate 	 * By default, all threads should be part of the FX scheduler
147*0Sstevel@tonic-gate 	 * class. As nfsd/lockd server threads used to be part of the
148*0Sstevel@tonic-gate 	 * kernel, they're used to being scheduled in the SYS class.
149*0Sstevel@tonic-gate 	 * Userland threads shouldn't be in SYS, but they can be given a
150*0Sstevel@tonic-gate 	 * higher priority by default. This change still renders nfsd/lockd
151*0Sstevel@tonic-gate 	 * managable by an admin by utilizing commands to change scheduling
152*0Sstevel@tonic-gate 	 * manually, or by using resource management tools such as pools
153*0Sstevel@tonic-gate 	 * to associate them with a different scheduling class and segregate
154*0Sstevel@tonic-gate 	 * the workload.
155*0Sstevel@tonic-gate 	 *
156*0Sstevel@tonic-gate 	 * We set the threads' priority to the upper bound for priorities
157*0Sstevel@tonic-gate 	 * in FX. This should be 60, but since the desired action is to
158*0Sstevel@tonic-gate 	 * make nfsd/lockd more important than TS threads, we bow to the
159*0Sstevel@tonic-gate 	 * system's knowledge rather than setting it manually. Furthermore,
160*0Sstevel@tonic-gate 	 * since the SYS class doesn't timeslice, use an "infinite" quantum.
161*0Sstevel@tonic-gate 	 * If anything fails, just log the failure and let the daemon
162*0Sstevel@tonic-gate 	 * default to TS.
163*0Sstevel@tonic-gate 	 *
164*0Sstevel@tonic-gate 	 * The change of scheduling class is expected to fail in a non-global
165*0Sstevel@tonic-gate 	 * zone, so we avoid worrying the zone administrator unnecessarily.
166*0Sstevel@tonic-gate 	 */
167*0Sstevel@tonic-gate 	(void) strcpy(pcinfo.pc_clname, "FX");
168*0Sstevel@tonic-gate 	if (priocntl(0, 0, PC_GETCID, (caddr_t)&pcinfo) != -1) {
169*0Sstevel@tonic-gate 		maxupri = ((fxinfo_t *)pcinfo.pc_clinfo)->fx_maxupri;
170*0Sstevel@tonic-gate 		if (priocntl(P_LWPID, P_MYID, PC_SETXPARMS, "FX",
171*0Sstevel@tonic-gate 		    FX_KY_UPRILIM, maxupri, FX_KY_UPRI, maxupri,
172*0Sstevel@tonic-gate 		    FX_KY_TQNSECS, FX_TQINF, NULL) != 0 &&
173*0Sstevel@tonic-gate 		    getzoneid() == GLOBAL_ZONEID)
174*0Sstevel@tonic-gate 			(void) syslog(LOG_ERR, "Unable to use FX scheduler: "
175*0Sstevel@tonic-gate 			    "%m. Using system default scheduler.");
176*0Sstevel@tonic-gate 	} else
177*0Sstevel@tonic-gate 		(void) syslog(LOG_ERR, "Unable to determine parameters "
178*0Sstevel@tonic-gate 		    "for FX scheduler. Using system default scheduler.");
179*0Sstevel@tonic-gate }
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate int
182*0Sstevel@tonic-gate svcrdma(int id, int versmin, int versmax, int delegation)
183*0Sstevel@tonic-gate {
184*0Sstevel@tonic-gate 	thread_t tid;
185*0Sstevel@tonic-gate 	struct rdma_svc_args *rsa;
186*0Sstevel@tonic-gate 
187*0Sstevel@tonic-gate 	rsa = (struct rdma_svc_args *)malloc(sizeof (struct rdma_svc_args));
188*0Sstevel@tonic-gate 	rsa->poolid = (uint32_t)id;
189*0Sstevel@tonic-gate 	rsa->netid = NULL;
190*0Sstevel@tonic-gate 	rsa->nfs_versmin = versmin;
191*0Sstevel@tonic-gate 	rsa->nfs_versmax = versmax;
192*0Sstevel@tonic-gate 	rsa->delegation = delegation;
193*0Sstevel@tonic-gate 
194*0Sstevel@tonic-gate 	/*
195*0Sstevel@tonic-gate 	 * Create a thread to handle RDMA start and stop.
196*0Sstevel@tonic-gate 	 */
197*0Sstevel@tonic-gate 	if (thr_create(NULL, THR_MIN_STACK * 2, svc_rdma_creator, (void *)rsa,
198*0Sstevel@tonic-gate 	    THR_BOUND | THR_DETACHED, &tid))
199*0Sstevel@tonic-gate 		return (1);
200*0Sstevel@tonic-gate 
201*0Sstevel@tonic-gate 	return (0);
202*0Sstevel@tonic-gate }
203*0Sstevel@tonic-gate 
204*0Sstevel@tonic-gate int
205*0Sstevel@tonic-gate svcwait(int id)
206*0Sstevel@tonic-gate {
207*0Sstevel@tonic-gate 	thread_t tid;
208*0Sstevel@tonic-gate 
209*0Sstevel@tonic-gate 	/*
210*0Sstevel@tonic-gate 	 * Create a bound thread to wait for kernel LWPs that
211*0Sstevel@tonic-gate 	 * need to be created. This thread also has little need
212*0Sstevel@tonic-gate 	 * of stackspace, so should be created with that in mind.
213*0Sstevel@tonic-gate 	 */
214*0Sstevel@tonic-gate 	if (thr_create(NULL, THR_MIN_STACK * 2, svcblock, (void *)id,
215*0Sstevel@tonic-gate 	    THR_BOUND | THR_DETACHED, &tid))
216*0Sstevel@tonic-gate 		return (1);
217*0Sstevel@tonic-gate 
218*0Sstevel@tonic-gate 	return (0);
219*0Sstevel@tonic-gate }
220