xref: /onnv-gate/usr/src/cmd/avs/nsctl/nskernd.c (revision 11576:b23c42c0c9d6)
17836SJohn.Forte@Sun.COM /*
27836SJohn.Forte@Sun.COM  * CDDL HEADER START
37836SJohn.Forte@Sun.COM  *
47836SJohn.Forte@Sun.COM  * The contents of this file are subject to the terms of the
57836SJohn.Forte@Sun.COM  * Common Development and Distribution License (the "License").
67836SJohn.Forte@Sun.COM  * You may not use this file except in compliance with the License.
77836SJohn.Forte@Sun.COM  *
87836SJohn.Forte@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97836SJohn.Forte@Sun.COM  * or http://www.opensolaris.org/os/licensing.
107836SJohn.Forte@Sun.COM  * See the License for the specific language governing permissions
117836SJohn.Forte@Sun.COM  * and limitations under the License.
127836SJohn.Forte@Sun.COM  *
137836SJohn.Forte@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
147836SJohn.Forte@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157836SJohn.Forte@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
167836SJohn.Forte@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
177836SJohn.Forte@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
187836SJohn.Forte@Sun.COM  *
197836SJohn.Forte@Sun.COM  * CDDL HEADER END
207836SJohn.Forte@Sun.COM  */
21*11576SSurya.Prakki@Sun.COM 
227836SJohn.Forte@Sun.COM /*
23*11576SSurya.Prakki@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
247836SJohn.Forte@Sun.COM  * Use is subject to license terms.
257836SJohn.Forte@Sun.COM  */
267836SJohn.Forte@Sun.COM 
277836SJohn.Forte@Sun.COM #include <sys/types.h>
287836SJohn.Forte@Sun.COM #include <sys/resource.h>
297836SJohn.Forte@Sun.COM #include <sys/priocntl.h>
307836SJohn.Forte@Sun.COM #include <sys/rtpriocntl.h>
317836SJohn.Forte@Sun.COM #include <sys/tspriocntl.h>
327836SJohn.Forte@Sun.COM #include <sys/wait.h>
337836SJohn.Forte@Sun.COM #include <sys/stat.h>
347836SJohn.Forte@Sun.COM 
357836SJohn.Forte@Sun.COM #include <strings.h>
367836SJohn.Forte@Sun.COM #include <thread.h>
377836SJohn.Forte@Sun.COM #include <stdlib.h>
387836SJohn.Forte@Sun.COM #include <signal.h>
397836SJohn.Forte@Sun.COM #include <errno.h>
407836SJohn.Forte@Sun.COM #include <stdio.h>
417836SJohn.Forte@Sun.COM #include <fcntl.h>
427836SJohn.Forte@Sun.COM #include <locale.h>
437836SJohn.Forte@Sun.COM #include <unistd.h>
447836SJohn.Forte@Sun.COM #include <syslog.h>
457836SJohn.Forte@Sun.COM 
467836SJohn.Forte@Sun.COM #include <sys/nsctl/cfg.h>
477836SJohn.Forte@Sun.COM #include <sys/nsctl/nsctl.h>
487836SJohn.Forte@Sun.COM #include <sys/nsctl/nsc_ioctl.h>
497836SJohn.Forte@Sun.COM #include <sys/nskernd.h>
507836SJohn.Forte@Sun.COM #include <nsctl.h>
517836SJohn.Forte@Sun.COM 
527836SJohn.Forte@Sun.COM #include <sys/mkdev.h>
537836SJohn.Forte@Sun.COM #include <sys/nsctl/sv_efi.h>
547836SJohn.Forte@Sun.COM 
557836SJohn.Forte@Sun.COM static const char *rdev = "/dev/nsctl";
567836SJohn.Forte@Sun.COM 
577836SJohn.Forte@Sun.COM /*
587836SJohn.Forte@Sun.COM  * Define a minimal user stack size in bytes over and above the
597836SJohn.Forte@Sun.COM  * libthread THR_STACK_MIN minimum value.
607836SJohn.Forte@Sun.COM  *
617836SJohn.Forte@Sun.COM  * This stack size needs to be sufficient to run _newlwp() and then
627836SJohn.Forte@Sun.COM  * ioctl() down into the kernel.
637836SJohn.Forte@Sun.COM  */
647836SJohn.Forte@Sun.COM #define	NSK_STACK_SIZE	512
657836SJohn.Forte@Sun.COM 
667836SJohn.Forte@Sun.COM /*
677836SJohn.Forte@Sun.COM  * LWP scheduling control switches.
687836SJohn.Forte@Sun.COM  *
697836SJohn.Forte@Sun.COM  * allow_pri	- set to non-zero to enable priocntl() manipulations of
707836SJohn.Forte@Sun.COM  *		created LWPs.
717836SJohn.Forte@Sun.COM  * allow_rt	- set to non-zero to use the RT rather than the TS
727836SJohn.Forte@Sun.COM  *		scheduling class when manipulating the schduling
737836SJohn.Forte@Sun.COM  *		parameters for an LWP.  Only used if allow_pri is
747836SJohn.Forte@Sun.COM  *		non-zero.
757836SJohn.Forte@Sun.COM  */
767836SJohn.Forte@Sun.COM static int allow_pri = 1;
777836SJohn.Forte@Sun.COM static int allow_rt = 0;	/* disallow - bad interactions with timeout() */
787836SJohn.Forte@Sun.COM 
797836SJohn.Forte@Sun.COM static int nsctl_fd = -1;
807836SJohn.Forte@Sun.COM static int sigterm;
817836SJohn.Forte@Sun.COM 
827836SJohn.Forte@Sun.COM static int nthreads;		/* number of threads in the kernel */
837836SJohn.Forte@Sun.COM static int exiting;		/* shutdown in progress flag */
847836SJohn.Forte@Sun.COM static mutex_t thr_mutex = DEFAULTMUTEX;
857836SJohn.Forte@Sun.COM static mutex_t cfg_mutex = DEFAULTMUTEX;
867836SJohn.Forte@Sun.COM 
877836SJohn.Forte@Sun.COM static int cl_nodeid = -1;
887836SJohn.Forte@Sun.COM 
897836SJohn.Forte@Sun.COM static int display_msg = 0;
907836SJohn.Forte@Sun.COM static int delay_time = 30;
917836SJohn.Forte@Sun.COM 
927836SJohn.Forte@Sun.COM static void
usage(void)937836SJohn.Forte@Sun.COM usage(void)
947836SJohn.Forte@Sun.COM {
95*11576SSurya.Prakki@Sun.COM 	(void) fprintf(stderr, gettext("usage: nskernd\n"));
967836SJohn.Forte@Sun.COM 	exit(255);
977836SJohn.Forte@Sun.COM }
987836SJohn.Forte@Sun.COM 
997836SJohn.Forte@Sun.COM 
1007836SJohn.Forte@Sun.COM static void
sighand(int sig)1017836SJohn.Forte@Sun.COM sighand(int sig)
1027836SJohn.Forte@Sun.COM {
1037836SJohn.Forte@Sun.COM 	if (sig == SIGTERM) {
1047836SJohn.Forte@Sun.COM 		sigterm++;
1057836SJohn.Forte@Sun.COM 	}
1067836SJohn.Forte@Sun.COM }
1077836SJohn.Forte@Sun.COM 
1087836SJohn.Forte@Sun.COM 
1097836SJohn.Forte@Sun.COM /*
1107836SJohn.Forte@Sun.COM  * Returns: 1 - can enter kernel; 0 - shutdown in progress, do not enter kernel
1117836SJohn.Forte@Sun.COM  */
1127836SJohn.Forte@Sun.COM int
nthread_inc(void)1137836SJohn.Forte@Sun.COM nthread_inc(void)
1147836SJohn.Forte@Sun.COM {
115*11576SSurya.Prakki@Sun.COM 	(void) mutex_lock(&thr_mutex);
1167836SJohn.Forte@Sun.COM 	if (exiting) {
1177836SJohn.Forte@Sun.COM 		/* cannot enter kernel as nskernd is being shutdown - exit */
118*11576SSurya.Prakki@Sun.COM 		(void) mutex_unlock(&thr_mutex);
1197836SJohn.Forte@Sun.COM 		return (0);
1207836SJohn.Forte@Sun.COM 	}
1217836SJohn.Forte@Sun.COM 	nthreads++;
122*11576SSurya.Prakki@Sun.COM 	(void) mutex_unlock(&thr_mutex);
1237836SJohn.Forte@Sun.COM 	return (1);
1247836SJohn.Forte@Sun.COM }
1257836SJohn.Forte@Sun.COM 
1267836SJohn.Forte@Sun.COM 
1277836SJohn.Forte@Sun.COM void
nthread_dec(void)1287836SJohn.Forte@Sun.COM nthread_dec(void)
1297836SJohn.Forte@Sun.COM {
130*11576SSurya.Prakki@Sun.COM 	(void) mutex_lock(&thr_mutex);
1317836SJohn.Forte@Sun.COM 	nthreads--;
132*11576SSurya.Prakki@Sun.COM 	(void) mutex_unlock(&thr_mutex);
1337836SJohn.Forte@Sun.COM }
1347836SJohn.Forte@Sun.COM 
1357836SJohn.Forte@Sun.COM 
1367836SJohn.Forte@Sun.COM /*
1377836SJohn.Forte@Sun.COM  * returns: 1 - can shutdown; 0 - unable to shutdown
1387836SJohn.Forte@Sun.COM  */
1397836SJohn.Forte@Sun.COM int
canshutdown(void)1407836SJohn.Forte@Sun.COM canshutdown(void)
1417836SJohn.Forte@Sun.COM {
1427836SJohn.Forte@Sun.COM 	int rc = 1;
1437836SJohn.Forte@Sun.COM 	time_t	start_delay;
1447836SJohn.Forte@Sun.COM 
145*11576SSurya.Prakki@Sun.COM 	(void) mutex_lock(&thr_mutex);
1467836SJohn.Forte@Sun.COM 	if (nthreads > 0) {
1477836SJohn.Forte@Sun.COM 		if (display_msg) {
148*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr,
1497836SJohn.Forte@Sun.COM 			    gettext("nskernd: unable to shutdown: "
1507836SJohn.Forte@Sun.COM 			    "%d kernel threads in use\n"), nthreads);
1517836SJohn.Forte@Sun.COM 		}
1527836SJohn.Forte@Sun.COM 		start_delay = time(0);
1537836SJohn.Forte@Sun.COM 		while (nthreads > 0 && (time(0) - start_delay) < delay_time) {
154*11576SSurya.Prakki@Sun.COM 			(void) mutex_unlock(&thr_mutex);
155*11576SSurya.Prakki@Sun.COM 			(void) sleep(1);
156*11576SSurya.Prakki@Sun.COM 			(void) mutex_lock(&thr_mutex);
157*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr,
1587836SJohn.Forte@Sun.COM 			    gettext("nskernd:   delay shutdown: "
1597836SJohn.Forte@Sun.COM 			    "%d kernel threads in use\n"), nthreads);
1607836SJohn.Forte@Sun.COM 		}
1617836SJohn.Forte@Sun.COM 		if (nthreads > 0) {
1627836SJohn.Forte@Sun.COM 			rc = 0;
1637836SJohn.Forte@Sun.COM 		} else {
1647836SJohn.Forte@Sun.COM 			exiting = 1;
1657836SJohn.Forte@Sun.COM 		}
1667836SJohn.Forte@Sun.COM 	} else {
1677836SJohn.Forte@Sun.COM 		/* flag shutdown in progress */
1687836SJohn.Forte@Sun.COM 		exiting = 1;
1697836SJohn.Forte@Sun.COM 	}
170*11576SSurya.Prakki@Sun.COM 	(void) mutex_unlock(&thr_mutex);
1717836SJohn.Forte@Sun.COM 
1727836SJohn.Forte@Sun.COM 	return (rc);
1737836SJohn.Forte@Sun.COM }
1747836SJohn.Forte@Sun.COM 
1757836SJohn.Forte@Sun.COM 
1767836SJohn.Forte@Sun.COM /*
1777836SJohn.Forte@Sun.COM  * returns: 1 - shutdown successful; 0 - unable to shutdown
1787836SJohn.Forte@Sun.COM  */
1797836SJohn.Forte@Sun.COM int
shutdown(void)1807836SJohn.Forte@Sun.COM shutdown(void)
1817836SJohn.Forte@Sun.COM {
1827836SJohn.Forte@Sun.COM 	struct nskernd data;
1837836SJohn.Forte@Sun.COM 	int rc;
1847836SJohn.Forte@Sun.COM 
1857836SJohn.Forte@Sun.COM 	if (nsctl_fd < 0)
1867836SJohn.Forte@Sun.COM 		return (1);
1877836SJohn.Forte@Sun.COM 
1887836SJohn.Forte@Sun.COM 	bzero(&data, sizeof (data));
1897836SJohn.Forte@Sun.COM 	data.command = NSKERND_STOP;
1907836SJohn.Forte@Sun.COM 
1917836SJohn.Forte@Sun.COM 	if (!canshutdown()) {
1927836SJohn.Forte@Sun.COM 		return (0);
1937836SJohn.Forte@Sun.COM 	}
1947836SJohn.Forte@Sun.COM 
1957836SJohn.Forte@Sun.COM 	rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data);
1967836SJohn.Forte@Sun.COM 	if (rc < 0) {
1977836SJohn.Forte@Sun.COM 		if (errno != EINTR || !sigterm) {
198*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr,
1997836SJohn.Forte@Sun.COM 			    gettext("nskernd: NSKERND_STOP failed\n"));
2007836SJohn.Forte@Sun.COM 		}
2017836SJohn.Forte@Sun.COM 	}
2027836SJohn.Forte@Sun.COM 
2037836SJohn.Forte@Sun.COM 	return (1);
2047836SJohn.Forte@Sun.COM }
2057836SJohn.Forte@Sun.COM 
2067836SJohn.Forte@Sun.COM 
2077836SJohn.Forte@Sun.COM /*
2087836SJohn.Forte@Sun.COM  * First function run by a NSKERND_NEWLWP thread.
2097836SJohn.Forte@Sun.COM  *
2107836SJohn.Forte@Sun.COM  * Determines if it needs to change the scheduling priority of the LWP,
2117836SJohn.Forte@Sun.COM  * and then calls back into the kernel.
2127836SJohn.Forte@Sun.COM  */
2137836SJohn.Forte@Sun.COM static void *
_newlwp(void * arg)2147836SJohn.Forte@Sun.COM _newlwp(void *arg)
2157836SJohn.Forte@Sun.COM {
2167836SJohn.Forte@Sun.COM 	struct nskernd nsk;
2177836SJohn.Forte@Sun.COM 	pcparms_t pcparms;
2187836SJohn.Forte@Sun.COM 	pcinfo_t pcinfo;
2197836SJohn.Forte@Sun.COM 
2207836SJohn.Forte@Sun.COM 	/* copy arguments onto stack and free heap memory */
2217836SJohn.Forte@Sun.COM 	bcopy(arg, &nsk, sizeof (nsk));
2227836SJohn.Forte@Sun.COM 	free(arg);
2237836SJohn.Forte@Sun.COM 
2247836SJohn.Forte@Sun.COM 	if (nsk.data2 && allow_pri) {
2257836SJohn.Forte@Sun.COM 		/* increase the scheduling priority of this LWP */
2267836SJohn.Forte@Sun.COM 
2277836SJohn.Forte@Sun.COM 		bzero(&pcinfo, sizeof (pcinfo));
228*11576SSurya.Prakki@Sun.COM 		(void) strcpy(pcinfo.pc_clname, allow_rt ? "RT" : "TS");
2297836SJohn.Forte@Sun.COM 
2307836SJohn.Forte@Sun.COM 		if (priocntl(0, 0, PC_GETCID, (char *)&pcinfo) < 0) {
231*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr,
232*11576SSurya.Prakki@Sun.COM 			    gettext(
233*11576SSurya.Prakki@Sun.COM 			    "nskernd: priocntl(PC_GETCID) failed: %s\n"),
234*11576SSurya.Prakki@Sun.COM 			    strerror(errno));
2357836SJohn.Forte@Sun.COM 			goto pri_done;
2367836SJohn.Forte@Sun.COM 		}
2377836SJohn.Forte@Sun.COM 
2387836SJohn.Forte@Sun.COM 		bzero(&pcparms, sizeof (pcparms));
2397836SJohn.Forte@Sun.COM 		pcparms.pc_cid = pcinfo.pc_cid;
2407836SJohn.Forte@Sun.COM 
2417836SJohn.Forte@Sun.COM 		if (allow_rt) {
2427836SJohn.Forte@Sun.COM 			((rtparms_t *)pcparms.pc_clparms)->rt_pri =
2437836SJohn.Forte@Sun.COM 				(pri_t)0; /* minimum RT priority */
2447836SJohn.Forte@Sun.COM 			((rtparms_t *)pcparms.pc_clparms)->rt_tqsecs =
2457836SJohn.Forte@Sun.COM 				(uint_t)RT_TQDEF;
2467836SJohn.Forte@Sun.COM 			((rtparms_t *)pcparms.pc_clparms)->rt_tqnsecs =
2477836SJohn.Forte@Sun.COM 				RT_TQDEF;
2487836SJohn.Forte@Sun.COM 		} else {
2497836SJohn.Forte@Sun.COM 			((tsparms_t *)pcparms.pc_clparms)->ts_uprilim =
2507836SJohn.Forte@Sun.COM 				((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri;
2517836SJohn.Forte@Sun.COM 			((tsparms_t *)pcparms.pc_clparms)->ts_upri =
2527836SJohn.Forte@Sun.COM 				((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri;
2537836SJohn.Forte@Sun.COM 		}
2547836SJohn.Forte@Sun.COM 
2557836SJohn.Forte@Sun.COM 		if (priocntl(P_LWPID, P_MYID,
2567836SJohn.Forte@Sun.COM 		    PC_SETPARMS, (char *)&pcparms) < 0) {
257*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr,
258*11576SSurya.Prakki@Sun.COM 			    gettext(
259*11576SSurya.Prakki@Sun.COM 			    "nskernd: priocntl(PC_SETPARMS) failed: %s\n"),
260*11576SSurya.Prakki@Sun.COM 			    strerror(errno));
2617836SJohn.Forte@Sun.COM 		}
2627836SJohn.Forte@Sun.COM 	}
2637836SJohn.Forte@Sun.COM 
2647836SJohn.Forte@Sun.COM pri_done:
2657836SJohn.Forte@Sun.COM 	if (nthread_inc()) {
2667836SJohn.Forte@Sun.COM 		(void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk);
2677836SJohn.Forte@Sun.COM 		nthread_dec();
2687836SJohn.Forte@Sun.COM 	}
2697836SJohn.Forte@Sun.COM 	return (NULL);
2707836SJohn.Forte@Sun.COM }
2717836SJohn.Forte@Sun.COM 
2727836SJohn.Forte@Sun.COM 
2737836SJohn.Forte@Sun.COM /*
2747836SJohn.Forte@Sun.COM  * Start a new thread bound to an LWP.
2757836SJohn.Forte@Sun.COM  *
2767836SJohn.Forte@Sun.COM  * This is the user level side of nsc_create_process().
2777836SJohn.Forte@Sun.COM  */
2787836SJohn.Forte@Sun.COM static void
newlwp(struct nskernd * req)2797836SJohn.Forte@Sun.COM newlwp(struct nskernd *req)
2807836SJohn.Forte@Sun.COM {
2817836SJohn.Forte@Sun.COM 	struct nskernd *nskp;
2827836SJohn.Forte@Sun.COM 	thread_t tid;
2837836SJohn.Forte@Sun.COM 	int rc;
2847836SJohn.Forte@Sun.COM 
2857836SJohn.Forte@Sun.COM 	nskp = malloc(sizeof (*nskp));
2867836SJohn.Forte@Sun.COM 	if (!nskp) {
2877836SJohn.Forte@Sun.COM #ifdef DEBUG
288*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr, gettext("nskernd: malloc(%d) failed\n"),
289*11576SSurya.Prakki@Sun.COM 		    sizeof (*nskp));
2907836SJohn.Forte@Sun.COM #endif
2917836SJohn.Forte@Sun.COM 		req->data1 = (uint64_t)ENOMEM;
2927836SJohn.Forte@Sun.COM 		return;
2937836SJohn.Forte@Sun.COM 	}
2947836SJohn.Forte@Sun.COM 
2957836SJohn.Forte@Sun.COM 	/* copy args for child */
2967836SJohn.Forte@Sun.COM 	bcopy(req, nskp, sizeof (*nskp));
2977836SJohn.Forte@Sun.COM 
2987836SJohn.Forte@Sun.COM 	rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE),
2997836SJohn.Forte@Sun.COM 		_newlwp, nskp, THR_BOUND|THR_DETACHED, &tid);
3007836SJohn.Forte@Sun.COM 
3017836SJohn.Forte@Sun.COM 	if (rc != 0) {
3027836SJohn.Forte@Sun.COM 		/* thr_create failed */
3037836SJohn.Forte@Sun.COM #ifdef DEBUG
304*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr,
305*11576SSurya.Prakki@Sun.COM 		    gettext("nskernd: thr_create failed: %s\n"),
306*11576SSurya.Prakki@Sun.COM 		    strerror(errno));
3077836SJohn.Forte@Sun.COM #endif
3087836SJohn.Forte@Sun.COM 		req->data1 = (uint64_t)errno;
3097836SJohn.Forte@Sun.COM 		free(nskp);
3107836SJohn.Forte@Sun.COM 	} else {
3117836SJohn.Forte@Sun.COM 		/* success - _newlwp() will free nskp */
3127836SJohn.Forte@Sun.COM 		req->data1 = (uint64_t)0;
3137836SJohn.Forte@Sun.COM 	}
3147836SJohn.Forte@Sun.COM }
3157836SJohn.Forte@Sun.COM 
3167836SJohn.Forte@Sun.COM static int
log_iibmp_err(char * set,int flags)3177836SJohn.Forte@Sun.COM log_iibmp_err(char *set, int flags)
3187836SJohn.Forte@Sun.COM {
3197836SJohn.Forte@Sun.COM 	CFGFILE *cfg;
3207836SJohn.Forte@Sun.COM 	char key[CFG_MAX_KEY];
3217836SJohn.Forte@Sun.COM 	char buf[CFG_MAX_BUF];
3227836SJohn.Forte@Sun.COM 	char newflags[CFG_MAX_BUF];
3237836SJohn.Forte@Sun.COM 	char outbuf[CFG_MAX_BUF];
3247836SJohn.Forte@Sun.COM 	char *mst, *shd, *bmp, *mode, *ovr, *cnode, *opt, *grp;
3257836SJohn.Forte@Sun.COM 	int setno, found = 0;
3267836SJohn.Forte@Sun.COM 	int setlen;
3277836SJohn.Forte@Sun.COM 	int rc = 0;
3287836SJohn.Forte@Sun.COM 	pid_t pid = -1;
3297836SJohn.Forte@Sun.COM 
3307836SJohn.Forte@Sun.COM 	if (set && *set) {
3317836SJohn.Forte@Sun.COM 		setlen = strlen(set);
3327836SJohn.Forte@Sun.COM 	} else {
3337836SJohn.Forte@Sun.COM 		return (EINVAL);
3347836SJohn.Forte@Sun.COM 	}
3357836SJohn.Forte@Sun.COM 
336*11576SSurya.Prakki@Sun.COM 	(void) mutex_lock(&cfg_mutex);
3377836SJohn.Forte@Sun.COM 	cfg = cfg_open("");
3387836SJohn.Forte@Sun.COM 	if (!cfg) {
339*11576SSurya.Prakki@Sun.COM 		(void) mutex_unlock(&cfg_mutex);
3407836SJohn.Forte@Sun.COM 		return (ENXIO);
3417836SJohn.Forte@Sun.COM 	}
3427836SJohn.Forte@Sun.COM 
3437836SJohn.Forte@Sun.COM 	if (!cfg_lock(cfg, CFG_WRLOCK)) {
3447836SJohn.Forte@Sun.COM 
345*11576SSurya.Prakki@Sun.COM 		(void) mutex_unlock(&cfg_mutex);
3467836SJohn.Forte@Sun.COM 		cfg_close(cfg);
3477836SJohn.Forte@Sun.COM 
3487836SJohn.Forte@Sun.COM 		pid = fork();
3497836SJohn.Forte@Sun.COM 
3507836SJohn.Forte@Sun.COM 		if (pid == -1) {
351*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr, gettext(
3527836SJohn.Forte@Sun.COM 			    "nskernd: Error forking\n"));
3537836SJohn.Forte@Sun.COM 			return (errno);
3547836SJohn.Forte@Sun.COM 		} else if (pid > 0) {
355*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stdout, gettext(
3567836SJohn.Forte@Sun.COM 			    "nskernd: Attempting deferred bitmap error\n"));
3577836SJohn.Forte@Sun.COM 			return (0);
3587836SJohn.Forte@Sun.COM 		}
3597836SJohn.Forte@Sun.COM 
360*11576SSurya.Prakki@Sun.COM 		(void) mutex_lock(&cfg_mutex);
3617836SJohn.Forte@Sun.COM 		cfg = cfg_open("");
3627836SJohn.Forte@Sun.COM 		if (!cfg) {
363*11576SSurya.Prakki@Sun.COM 			(void) mutex_unlock(&cfg_mutex);
364*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr, gettext(
3657836SJohn.Forte@Sun.COM 			    "nskernd: Failed cfg_open, deferred bitmap\n"));
3667836SJohn.Forte@Sun.COM 			return (ENXIO);
3677836SJohn.Forte@Sun.COM 		}
3687836SJohn.Forte@Sun.COM 
3697836SJohn.Forte@Sun.COM 		/* Sooner or later, this lock will be free */
3707836SJohn.Forte@Sun.COM 		while (!cfg_lock(cfg, CFG_WRLOCK))
371*11576SSurya.Prakki@Sun.COM 			(void) sleep(2);
3727836SJohn.Forte@Sun.COM 	}
3737836SJohn.Forte@Sun.COM 
3747836SJohn.Forte@Sun.COM 	/* find the proper set number */
3757836SJohn.Forte@Sun.COM 	for (setno = 1; !found; setno++) {
376*11576SSurya.Prakki@Sun.COM 		(void) snprintf(key, CFG_MAX_KEY, "ii.set%d", setno);
3777836SJohn.Forte@Sun.COM 		if (cfg_get_cstring(cfg, key, buf, CFG_MAX_BUF) < 0) {
3787836SJohn.Forte@Sun.COM 			break;
3797836SJohn.Forte@Sun.COM 		}
3807836SJohn.Forte@Sun.COM 
3817836SJohn.Forte@Sun.COM 		mst = strtok(buf, " ");
3827836SJohn.Forte@Sun.COM 		shd = strtok(NULL, " ");
3837836SJohn.Forte@Sun.COM 		if (strncmp(shd, set, setlen) == 0) {
3847836SJohn.Forte@Sun.COM 			found = 1;
3857836SJohn.Forte@Sun.COM 
3867836SJohn.Forte@Sun.COM 			bmp = strtok(NULL, " ");
3877836SJohn.Forte@Sun.COM 			mode = strtok(NULL, " ");
3887836SJohn.Forte@Sun.COM 			ovr = strtok(NULL, " ");
3897836SJohn.Forte@Sun.COM 			cnode = strtok(NULL, " ");
3907836SJohn.Forte@Sun.COM 			opt = strtok(NULL, " ");
3917836SJohn.Forte@Sun.COM 			grp = strtok(NULL, " ");
3927836SJohn.Forte@Sun.COM 			break;
3937836SJohn.Forte@Sun.COM 		}
3947836SJohn.Forte@Sun.COM 	}
3957836SJohn.Forte@Sun.COM 
3967836SJohn.Forte@Sun.COM 	if (found) {
3977836SJohn.Forte@Sun.COM 		/* were there flags in the options field already? */
398*11576SSurya.Prakki@Sun.COM 		(void) snprintf(newflags, CFG_MAX_BUF, "%s=0x%x",
3997836SJohn.Forte@Sun.COM 		    NSKERN_II_BMP_OPTION, flags);
4007836SJohn.Forte@Sun.COM 		if (opt && strcmp(opt, "-") != 0) {
4017836SJohn.Forte@Sun.COM 			bzero(newflags, CFG_MAX_BUF);
4027836SJohn.Forte@Sun.COM 			opt = strtok(opt, ";");
4037836SJohn.Forte@Sun.COM 			while (opt) {
4047836SJohn.Forte@Sun.COM 				if (strncmp(opt, NSKERN_II_BMP_OPTION,
4057836SJohn.Forte@Sun.COM 				    strlen(NSKERN_II_BMP_OPTION)) != 0) {
406*11576SSurya.Prakki@Sun.COM 					(void) strcat(newflags, ";");
407*11576SSurya.Prakki@Sun.COM 					(void) strcat(newflags, opt);
4087836SJohn.Forte@Sun.COM 				}
4097836SJohn.Forte@Sun.COM 			}
4107836SJohn.Forte@Sun.COM 		}
411*11576SSurya.Prakki@Sun.COM 		(void) snprintf(key, CFG_MAX_KEY, "ii.set%d", setno);
412*11576SSurya.Prakki@Sun.COM 		(void) snprintf(outbuf, CFG_MAX_BUF, "%s %s %s %s %s %s %s %s",
413*11576SSurya.Prakki@Sun.COM 		    mst, shd, bmp, mode, ovr, cnode, newflags, grp);
4147836SJohn.Forte@Sun.COM 		if (cfg_put_cstring(cfg, key, outbuf, CFG_MAX_BUF) < 0) {
415*11576SSurya.Prakki@Sun.COM 			(void) printf("Failed to put [%s]\n", outbuf);
4167836SJohn.Forte@Sun.COM 			rc = ENXIO;
4177836SJohn.Forte@Sun.COM 		} else {
418*11576SSurya.Prakki@Sun.COM 			(void) cfg_commit(cfg);
4197836SJohn.Forte@Sun.COM 			rc = 0;
4207836SJohn.Forte@Sun.COM 		}
4217836SJohn.Forte@Sun.COM 	} else {
422*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr, gettext(
423*11576SSurya.Prakki@Sun.COM 		    "nskernd: Failed deferred bitmap [%s]\n"), set);
4247836SJohn.Forte@Sun.COM 		rc = EINVAL;
4257836SJohn.Forte@Sun.COM 	}
4267836SJohn.Forte@Sun.COM 	cfg_unlock(cfg);
4277836SJohn.Forte@Sun.COM 	cfg_close(cfg);
428*11576SSurya.Prakki@Sun.COM 	(void) mutex_unlock(&cfg_mutex);
4297836SJohn.Forte@Sun.COM 
4307836SJohn.Forte@Sun.COM 	/*
4317836SJohn.Forte@Sun.COM 	 * if we are the fork'ed client, just exit, if parent just return
4327836SJohn.Forte@Sun.COM 	 */
4337836SJohn.Forte@Sun.COM 	if (pid == 0) {
4347836SJohn.Forte@Sun.COM 		exit(rc);
4357836SJohn.Forte@Sun.COM 		/*NOTREACHED*/
4367836SJohn.Forte@Sun.COM 	} else {
4377836SJohn.Forte@Sun.COM 		return (rc);
4387836SJohn.Forte@Sun.COM 	}
4397836SJohn.Forte@Sun.COM }
4407836SJohn.Forte@Sun.COM 
4417836SJohn.Forte@Sun.COM /*
4427836SJohn.Forte@Sun.COM  * First function run by a NSKERND_LOCK thread.
4437836SJohn.Forte@Sun.COM  *
4447836SJohn.Forte@Sun.COM  * Opens dscfg and locks it,
4457836SJohn.Forte@Sun.COM  * and then calls back into the kernel.
4467836SJohn.Forte@Sun.COM  *
4477836SJohn.Forte@Sun.COM  * Incoming:
4487836SJohn.Forte@Sun.COM  *	data1 is the kernel address of the sync structure.
4497836SJohn.Forte@Sun.COM  *	data2 is read(0)/write(1) lock mode.
4507836SJohn.Forte@Sun.COM  *
4517836SJohn.Forte@Sun.COM  * Returns:
4527836SJohn.Forte@Sun.COM  *	data1 as incoming.
4537836SJohn.Forte@Sun.COM  *	data2 errno.
4547836SJohn.Forte@Sun.COM  */
4557836SJohn.Forte@Sun.COM static void *
_dolock(void * arg)4567836SJohn.Forte@Sun.COM _dolock(void *arg)
4577836SJohn.Forte@Sun.COM {
4587836SJohn.Forte@Sun.COM 	struct nskernd nsk;
4597836SJohn.Forte@Sun.COM 	CFGFILE *cfg;
4607836SJohn.Forte@Sun.COM 	int locked;
4617836SJohn.Forte@Sun.COM 	int mode;
4627836SJohn.Forte@Sun.COM 	int rc = 0;
4637836SJohn.Forte@Sun.COM 
4647836SJohn.Forte@Sun.COM 	/* copy arguments onto stack and free heap memory */
4657836SJohn.Forte@Sun.COM 	bcopy(arg, &nsk, sizeof (nsk));
4667836SJohn.Forte@Sun.COM 	free(arg);
4677836SJohn.Forte@Sun.COM 
468*11576SSurya.Prakki@Sun.COM 	(void) mutex_lock(&cfg_mutex);
4697836SJohn.Forte@Sun.COM 	cfg = cfg_open("");
4707836SJohn.Forte@Sun.COM 	if (cfg == NULL) {
4717836SJohn.Forte@Sun.COM #ifdef DEBUG
472*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr,
473*11576SSurya.Prakki@Sun.COM 		    gettext("nskernd: cfg_open failed: %s\n"),
4747836SJohn.Forte@Sun.COM 		    strerror(errno));
4757836SJohn.Forte@Sun.COM #endif
4767836SJohn.Forte@Sun.COM 		rc = ENXIO;
4777836SJohn.Forte@Sun.COM 	}
4787836SJohn.Forte@Sun.COM 
4797836SJohn.Forte@Sun.COM 	if (nsk.data2 == 0) {
4807836SJohn.Forte@Sun.COM 		mode = CFG_RDLOCK;
4817836SJohn.Forte@Sun.COM 	} else {
4827836SJohn.Forte@Sun.COM 		mode = CFG_WRLOCK;
4837836SJohn.Forte@Sun.COM 	}
4847836SJohn.Forte@Sun.COM 
4857836SJohn.Forte@Sun.COM 	locked = 0;
4867836SJohn.Forte@Sun.COM 	if (rc == 0) {
4877836SJohn.Forte@Sun.COM 		if (cfg_lock(cfg, mode)) {
4887836SJohn.Forte@Sun.COM 			locked = 1;
4897836SJohn.Forte@Sun.COM 		} else {
4907836SJohn.Forte@Sun.COM #ifdef DEBUG
491*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr,
4927836SJohn.Forte@Sun.COM 			    gettext("nskernd: cfg_lock failed: %s\n"),
4937836SJohn.Forte@Sun.COM 			    strerror(errno));
4947836SJohn.Forte@Sun.COM #endif
4957836SJohn.Forte@Sun.COM 			rc = EINVAL;
4967836SJohn.Forte@Sun.COM 		}
4977836SJohn.Forte@Sun.COM 	}
4987836SJohn.Forte@Sun.COM 
4997836SJohn.Forte@Sun.COM 	/* return to kernel */
5007836SJohn.Forte@Sun.COM 
5017836SJohn.Forte@Sun.COM 	nsk.data2 = (uint64_t)rc;
5027836SJohn.Forte@Sun.COM 	if (nthread_inc()) {
5037836SJohn.Forte@Sun.COM 		(void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk);
5047836SJohn.Forte@Sun.COM 		nthread_dec();
5057836SJohn.Forte@Sun.COM 	}
5067836SJohn.Forte@Sun.COM 
5077836SJohn.Forte@Sun.COM 	/* cleanup */
5087836SJohn.Forte@Sun.COM 
5097836SJohn.Forte@Sun.COM 	if (locked) {
5107836SJohn.Forte@Sun.COM 		cfg_unlock(cfg);
5117836SJohn.Forte@Sun.COM 		locked = 0;
5127836SJohn.Forte@Sun.COM 	}
5137836SJohn.Forte@Sun.COM 
5147836SJohn.Forte@Sun.COM 	if (cfg != NULL) {
5157836SJohn.Forte@Sun.COM 		cfg_close(cfg);
5167836SJohn.Forte@Sun.COM 		cfg = NULL;
5177836SJohn.Forte@Sun.COM 	}
518*11576SSurya.Prakki@Sun.COM 	(void) mutex_unlock(&cfg_mutex);
5197836SJohn.Forte@Sun.COM 
5207836SJohn.Forte@Sun.COM 	return (NULL);
5217836SJohn.Forte@Sun.COM }
5227836SJohn.Forte@Sun.COM 
5237836SJohn.Forte@Sun.COM 
5247836SJohn.Forte@Sun.COM /*
5257836SJohn.Forte@Sun.COM  * Inter-node lock thread.
5267836SJohn.Forte@Sun.COM  *
5277836SJohn.Forte@Sun.COM  * This is the user level side of nsc_rmlock().
5287836SJohn.Forte@Sun.COM  */
5297836SJohn.Forte@Sun.COM static void
dolock(struct nskernd * req)5307836SJohn.Forte@Sun.COM dolock(struct nskernd *req)
5317836SJohn.Forte@Sun.COM {
5327836SJohn.Forte@Sun.COM 	struct nskernd *nskp;
5337836SJohn.Forte@Sun.COM 	thread_t tid;
5347836SJohn.Forte@Sun.COM 	int rc;
5357836SJohn.Forte@Sun.COM 
5367836SJohn.Forte@Sun.COM 	/* create a new thread to do the lock and return to kernel */
5377836SJohn.Forte@Sun.COM 
5387836SJohn.Forte@Sun.COM 	nskp = malloc(sizeof (*nskp));
5397836SJohn.Forte@Sun.COM 	if (!nskp) {
5407836SJohn.Forte@Sun.COM #ifdef DEBUG
541*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr,
542*11576SSurya.Prakki@Sun.COM 		    gettext("nskernd:dolock: malloc(%d) failed\n"),
5437836SJohn.Forte@Sun.COM 		    sizeof (*nskp));
5447836SJohn.Forte@Sun.COM #endif
5457836SJohn.Forte@Sun.COM 		req->data1 = (uint64_t)ENOMEM;
5467836SJohn.Forte@Sun.COM 		return;
5477836SJohn.Forte@Sun.COM 	}
5487836SJohn.Forte@Sun.COM 
5497836SJohn.Forte@Sun.COM 	/* copy args for child */
5507836SJohn.Forte@Sun.COM 	bcopy(req, nskp, sizeof (*nskp));
5517836SJohn.Forte@Sun.COM 
5527836SJohn.Forte@Sun.COM 	rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE),
5537836SJohn.Forte@Sun.COM 	    _dolock, nskp, THR_BOUND|THR_DETACHED, &tid);
5547836SJohn.Forte@Sun.COM 
5557836SJohn.Forte@Sun.COM 	if (rc != 0) {
5567836SJohn.Forte@Sun.COM 		/* thr_create failed */
5577836SJohn.Forte@Sun.COM #ifdef DEBUG
558*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr,
559*11576SSurya.Prakki@Sun.COM 		    gettext("nskernd: thr_create failed: %s\n"),
5607836SJohn.Forte@Sun.COM 		    strerror(errno));
5617836SJohn.Forte@Sun.COM #endif
5627836SJohn.Forte@Sun.COM 		req->data1 = (uint64_t)errno;
5637836SJohn.Forte@Sun.COM 		free(nskp);
5647836SJohn.Forte@Sun.COM 	} else {
5657836SJohn.Forte@Sun.COM 		/* success - _dolock() will free nskp */
5667836SJohn.Forte@Sun.COM 		req->data1 = (uint64_t)0;
5677836SJohn.Forte@Sun.COM 	}
5687836SJohn.Forte@Sun.COM }
5697836SJohn.Forte@Sun.COM 
5707836SJohn.Forte@Sun.COM 
5717836SJohn.Forte@Sun.COM /*
5727836SJohn.Forte@Sun.COM  * Convenience code for engineering test of multi-terabyte volumes.
5737836SJohn.Forte@Sun.COM  *
5747836SJohn.Forte@Sun.COM  * zvol (part of zfs) does not support DKIOCPARTITION but does use EFI
5757836SJohn.Forte@Sun.COM  * labels.  This code allocates a simple efi label structure and ioctls
5767836SJohn.Forte@Sun.COM  * to extract the size of a zvol.  It only handles the minimal EFI ioctl
5777836SJohn.Forte@Sun.COM  * implementation in zvol.
5787836SJohn.Forte@Sun.COM  */
5797836SJohn.Forte@Sun.COM 
5807836SJohn.Forte@Sun.COM static void
zvol_bsize(char * path,uint64_t * size,const int pnum)5817836SJohn.Forte@Sun.COM zvol_bsize(char *path, uint64_t *size, const int pnum)
5827836SJohn.Forte@Sun.COM {
5837836SJohn.Forte@Sun.COM 	struct stat64 stb1, stb2;
5847836SJohn.Forte@Sun.COM 	struct dk_minfo dkm;
5857836SJohn.Forte@Sun.COM 	int fd = -1;
5867836SJohn.Forte@Sun.COM 	int rc;
5877836SJohn.Forte@Sun.COM 
5887836SJohn.Forte@Sun.COM 	if (cl_nodeid || pnum != 0)
5897836SJohn.Forte@Sun.COM 		return;
5907836SJohn.Forte@Sun.COM 
5917836SJohn.Forte@Sun.COM 	if ((fd = open(path, O_RDONLY)) < 0) {
5927836SJohn.Forte@Sun.COM 		return;
5937836SJohn.Forte@Sun.COM 	}
5947836SJohn.Forte@Sun.COM 
5957836SJohn.Forte@Sun.COM 	if (stat64("/devices/pseudo/zfs@0:zfs", &stb1) != 0 ||
5967836SJohn.Forte@Sun.COM 	    fstat64(fd, &stb2) != 0 ||
5977836SJohn.Forte@Sun.COM 	    !S_ISCHR(stb1.st_mode) ||
5987836SJohn.Forte@Sun.COM 	    !S_ISCHR(stb2.st_mode) ||
5997836SJohn.Forte@Sun.COM 	    major(stb1.st_rdev) != major(stb2.st_rdev)) {
6007836SJohn.Forte@Sun.COM 		(void) close(fd);
6017836SJohn.Forte@Sun.COM 		return;
6027836SJohn.Forte@Sun.COM 	}
6037836SJohn.Forte@Sun.COM 
6047836SJohn.Forte@Sun.COM 	rc = ioctl(fd, DKIOCGMEDIAINFO, (void *)&dkm);
6057836SJohn.Forte@Sun.COM 	if (rc >= 0) {
6067836SJohn.Forte@Sun.COM 		*size = LE_64(dkm.dki_capacity) *
6077836SJohn.Forte@Sun.COM 			(dkm.dki_lbsize) / 512;
6087836SJohn.Forte@Sun.COM 	}
6097836SJohn.Forte@Sun.COM 
6107836SJohn.Forte@Sun.COM 	(void) close(fd);
6117836SJohn.Forte@Sun.COM }
6127836SJohn.Forte@Sun.COM 
6137836SJohn.Forte@Sun.COM /* ARGSUSED */
6147836SJohn.Forte@Sun.COM static void
get_bsize(uint64_t raw_fd,uint64_t * size,int * partitionp,char * path)6157836SJohn.Forte@Sun.COM get_bsize(uint64_t raw_fd, uint64_t *size, int *partitionp, char *path)
6167836SJohn.Forte@Sun.COM {
6177836SJohn.Forte@Sun.COM 	struct nscioc_bsize bsize;
6187836SJohn.Forte@Sun.COM #ifdef DKIOCPARTITION
6197836SJohn.Forte@Sun.COM 	struct partition64 p64;
6207836SJohn.Forte@Sun.COM #endif
6217836SJohn.Forte@Sun.COM 	struct dk_cinfo dki_info;
6227836SJohn.Forte@Sun.COM 	struct vtoc vtoc;
6237836SJohn.Forte@Sun.COM 	int fd;
6247836SJohn.Forte@Sun.COM 
6257836SJohn.Forte@Sun.COM 	*partitionp = -1;
6267836SJohn.Forte@Sun.COM 	*size = (uint64_t)0;
6277836SJohn.Forte@Sun.COM 
6287836SJohn.Forte@Sun.COM 	dki_info.dki_partition = (ushort_t)-1;
6297836SJohn.Forte@Sun.COM 	bsize.dki_info = (uint64_t)(unsigned long)&dki_info;
6307836SJohn.Forte@Sun.COM 	bsize.vtoc = (uint64_t)(unsigned long)&vtoc;
6317836SJohn.Forte@Sun.COM 	bsize.raw_fd = raw_fd;
6327836SJohn.Forte@Sun.COM 	bsize.efi = 0;
6337836SJohn.Forte@Sun.COM 
6347836SJohn.Forte@Sun.COM 	fd = open(rdev, O_RDONLY);
6357836SJohn.Forte@Sun.COM 	if (fd < 0)
6367836SJohn.Forte@Sun.COM 		return;
6377836SJohn.Forte@Sun.COM 
6387836SJohn.Forte@Sun.COM 	if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) {
6397836SJohn.Forte@Sun.COM 		if (dki_info.dki_partition != (ushort_t)-1) {
6407836SJohn.Forte@Sun.COM 			/* assume part# is ok and just the size failed */
6417836SJohn.Forte@Sun.COM 			*partitionp = (int)dki_info.dki_partition;
6427836SJohn.Forte@Sun.COM 
6437836SJohn.Forte@Sun.COM #ifdef DKIOCPARTITION
6447836SJohn.Forte@Sun.COM 			/* see if this is an EFI label */
6457836SJohn.Forte@Sun.COM 			bzero(&p64, sizeof (p64));
6467836SJohn.Forte@Sun.COM 			p64.p_partno = (uint_t)*partitionp;
6477836SJohn.Forte@Sun.COM 			if ((ioctl(fd, DKIOCPARTITION, &p64)) > 0) {
6487836SJohn.Forte@Sun.COM 				*size = (uint64_t)p64.p_size;
6497836SJohn.Forte@Sun.COM 			} else {
6507836SJohn.Forte@Sun.COM 				bsize.p64 = (uint64_t)(unsigned long)&p64;
6517836SJohn.Forte@Sun.COM 				bsize.efi = 1;
6527836SJohn.Forte@Sun.COM 
6537836SJohn.Forte@Sun.COM 				if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) {
6547836SJohn.Forte@Sun.COM 					/* see if this is a zvol */
6557836SJohn.Forte@Sun.COM 					zvol_bsize(path, size, *partitionp);
6567836SJohn.Forte@Sun.COM 				} else {
6577836SJohn.Forte@Sun.COM 					*size = (uint64_t)p64.p_size;
6587836SJohn.Forte@Sun.COM 				}
6597836SJohn.Forte@Sun.COM 			}
6607836SJohn.Forte@Sun.COM #endif	/* DKIOCPARTITION */
6617836SJohn.Forte@Sun.COM 		}
6627836SJohn.Forte@Sun.COM 
663*11576SSurya.Prakki@Sun.COM 		(void) close(fd);
6647836SJohn.Forte@Sun.COM 		return;
6657836SJohn.Forte@Sun.COM 	}
6667836SJohn.Forte@Sun.COM 
667*11576SSurya.Prakki@Sun.COM 	(void) close(fd);
6687836SJohn.Forte@Sun.COM 
6697836SJohn.Forte@Sun.COM 	*partitionp = (int)dki_info.dki_partition;
6707836SJohn.Forte@Sun.COM 
6717836SJohn.Forte@Sun.COM 	if (vtoc.v_sanity != VTOC_SANE)
6727836SJohn.Forte@Sun.COM 		return;
6737836SJohn.Forte@Sun.COM 
6747836SJohn.Forte@Sun.COM 	if (vtoc.v_version != V_VERSION && vtoc.v_version != 0)
6757836SJohn.Forte@Sun.COM 		return;
6767836SJohn.Forte@Sun.COM 
6777836SJohn.Forte@Sun.COM 	if (dki_info.dki_partition > V_NUMPAR)
6787836SJohn.Forte@Sun.COM 		return;
6797836SJohn.Forte@Sun.COM 
6807836SJohn.Forte@Sun.COM 	*size = (uint64_t)vtoc.v_part[(int)dki_info.dki_partition].p_size;
6817836SJohn.Forte@Sun.COM }
6827836SJohn.Forte@Sun.COM 
6837836SJohn.Forte@Sun.COM 
6847836SJohn.Forte@Sun.COM static int
iscluster(void)6857836SJohn.Forte@Sun.COM iscluster(void)
6867836SJohn.Forte@Sun.COM {
6877836SJohn.Forte@Sun.COM 	/*
6887836SJohn.Forte@Sun.COM 	 * Find out if we are running in a cluster
6897836SJohn.Forte@Sun.COM 	 */
6907836SJohn.Forte@Sun.COM 	cl_nodeid = cfg_iscluster();
6917836SJohn.Forte@Sun.COM 	if (cl_nodeid > 0) {
6927836SJohn.Forte@Sun.COM 		return (TRUE);
6937836SJohn.Forte@Sun.COM 	} else if (cl_nodeid == 0) {
6947836SJohn.Forte@Sun.COM 		return (FALSE);
6957836SJohn.Forte@Sun.COM 	}
6967836SJohn.Forte@Sun.COM 
697*11576SSurya.Prakki@Sun.COM 	(void) fprintf(stderr, "%s\n",
6987836SJohn.Forte@Sun.COM 	    gettext("nskernd: unable to ascertain environment"));
6997836SJohn.Forte@Sun.COM 	exit(1);
7007836SJohn.Forte@Sun.COM 	/* NOTREACHED */
7017836SJohn.Forte@Sun.COM }
7027836SJohn.Forte@Sun.COM 
7037836SJohn.Forte@Sun.COM /*
7047836SJohn.Forte@Sun.COM  * Runtime Solaris release checking - build release == runtime release
7057836SJohn.Forte@Sun.COM  * is always considered success, so only keep entries in the map for
7067836SJohn.Forte@Sun.COM  * the special cases.
7077836SJohn.Forte@Sun.COM  */
7087836SJohn.Forte@Sun.COM static nsc_release_t nskernd_rel_map[] = {
7097836SJohn.Forte@Sun.COM /*	{ "5.10", "5.10" },			*/
7107836SJohn.Forte@Sun.COM 	{ "5.11", "5.10" },
7117836SJohn.Forte@Sun.COM 	{ NULL, NULL }
7127836SJohn.Forte@Sun.COM };
7137836SJohn.Forte@Sun.COM 
7147836SJohn.Forte@Sun.COM 
7157836SJohn.Forte@Sun.COM #ifdef lint
7167836SJohn.Forte@Sun.COM #define	main	nskernd_main
7177836SJohn.Forte@Sun.COM #endif
7187836SJohn.Forte@Sun.COM /* ARGSUSED1 */
7197836SJohn.Forte@Sun.COM int
main(int argc,char * argv[])7207836SJohn.Forte@Sun.COM main(int argc, char *argv[])
7217836SJohn.Forte@Sun.COM {
7227836SJohn.Forte@Sun.COM 	const char *dir = "/";
7237836SJohn.Forte@Sun.COM 	struct nskernd data;
7247836SJohn.Forte@Sun.COM 	struct rlimit rl;
7257836SJohn.Forte@Sun.COM 	int i, run, rc;
7267836SJohn.Forte@Sun.COM 	int partition;
7277836SJohn.Forte@Sun.COM 	char *reqd;
7287836SJohn.Forte@Sun.COM 	int syncpipe[2];
7297836SJohn.Forte@Sun.COM 	int startup;
7307836SJohn.Forte@Sun.COM 
7317836SJohn.Forte@Sun.COM 	(void) setlocale(LC_ALL, "");
7327836SJohn.Forte@Sun.COM 	(void) textdomain("nskernd");
7337836SJohn.Forte@Sun.COM 
7347836SJohn.Forte@Sun.COM 	rc = nsc_check_release(BUILD_REV_STR, nskernd_rel_map, &reqd);
7357836SJohn.Forte@Sun.COM 	if (rc < 0) {
736*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr,
7377836SJohn.Forte@Sun.COM 		    gettext("nskernd: unable to determine the current "
7387836SJohn.Forte@Sun.COM 		    "Solaris release: %s\n"), strerror(errno));
7397836SJohn.Forte@Sun.COM 		exit(1);
7407836SJohn.Forte@Sun.COM 	} else if (rc == FALSE) {
741*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr,
7427836SJohn.Forte@Sun.COM 		    gettext("nskernd: incorrect Solaris release "
7437836SJohn.Forte@Sun.COM 		    "(requires %s)\n"), reqd);
7447836SJohn.Forte@Sun.COM 		exit(1);
7457836SJohn.Forte@Sun.COM 	}
7467836SJohn.Forte@Sun.COM 
7477836SJohn.Forte@Sun.COM 	rc = 0;
7487836SJohn.Forte@Sun.COM 
7497836SJohn.Forte@Sun.COM 	if (argc != 1)
7507836SJohn.Forte@Sun.COM 		usage();
7517836SJohn.Forte@Sun.COM 
7527836SJohn.Forte@Sun.COM 	/*
7537836SJohn.Forte@Sun.COM 	 * Usage: <progname> [-g] [-d <seconds to delay>]
7547836SJohn.Forte@Sun.COM 	 */
7557836SJohn.Forte@Sun.COM 	while ((i = getopt(argc, argv, "gd:")) != EOF) {
7567836SJohn.Forte@Sun.COM 		switch (i) {
7577836SJohn.Forte@Sun.COM 			case 'g':
7587836SJohn.Forte@Sun.COM 				display_msg = 1;
7597836SJohn.Forte@Sun.COM 				break;
7607836SJohn.Forte@Sun.COM 			case 'd':
7617836SJohn.Forte@Sun.COM 				delay_time = atoi(optarg);
7627836SJohn.Forte@Sun.COM 				if (delay_time <= 0) {
7637836SJohn.Forte@Sun.COM 					delay_time = 30;
7647836SJohn.Forte@Sun.COM 				}
7657836SJohn.Forte@Sun.COM 				break;
7667836SJohn.Forte@Sun.COM 			default:
7677836SJohn.Forte@Sun.COM 				syslog(LOG_ERR,
7687836SJohn.Forte@Sun.COM 				"Usage: nskernd [-g] [-d <seconds to delay>]");
7697836SJohn.Forte@Sun.COM 				exit(1);
7707836SJohn.Forte@Sun.COM 				break;
7717836SJohn.Forte@Sun.COM 		}
7727836SJohn.Forte@Sun.COM 	}
7737836SJohn.Forte@Sun.COM 
7747836SJohn.Forte@Sun.COM 	if (chroot(dir) < 0) {
775*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr, gettext("nskernd: chroot failed: %s\n"),
776*11576SSurya.Prakki@Sun.COM 		    strerror(errno));
7777836SJohn.Forte@Sun.COM 		exit(1);
7787836SJohn.Forte@Sun.COM 	}
7797836SJohn.Forte@Sun.COM 
7807836SJohn.Forte@Sun.COM 	if (chdir(dir) < 0) {
781*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr, gettext("nskernd: chdir failed: %s\n"),
782*11576SSurya.Prakki@Sun.COM 		    strerror(errno));
7837836SJohn.Forte@Sun.COM 		exit(1);
7847836SJohn.Forte@Sun.COM 	}
7857836SJohn.Forte@Sun.COM 
7867836SJohn.Forte@Sun.COM 	/*
7877836SJohn.Forte@Sun.COM 	 * Determine if we are in a Sun Cluster or not, before fork'ing
7887836SJohn.Forte@Sun.COM 	 */
7897836SJohn.Forte@Sun.COM 	(void) iscluster();
7907836SJohn.Forte@Sun.COM 
7917836SJohn.Forte@Sun.COM 	/*
7927836SJohn.Forte@Sun.COM 	 * create a pipe to synchronise the parent with the
7937836SJohn.Forte@Sun.COM 	 * child just before it enters its service loop.
7947836SJohn.Forte@Sun.COM 	 */
7957836SJohn.Forte@Sun.COM 	if (pipe(syncpipe) < 0) {
796*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr,
797*11576SSurya.Prakki@Sun.COM 		    gettext("nskernd: cannot create pipe: %s\n"),
7987836SJohn.Forte@Sun.COM 		    strerror(errno));
7997836SJohn.Forte@Sun.COM 		exit(1);
8007836SJohn.Forte@Sun.COM 	}
8017836SJohn.Forte@Sun.COM 	/*
8027836SJohn.Forte@Sun.COM 	 * Fork off a child that becomes the daemon.
8037836SJohn.Forte@Sun.COM 	 */
8047836SJohn.Forte@Sun.COM 
8057836SJohn.Forte@Sun.COM 	if ((rc = fork()) > 0) {
8067836SJohn.Forte@Sun.COM 		char c;
8077836SJohn.Forte@Sun.COM 		int n;
8087836SJohn.Forte@Sun.COM 		(void) close(syncpipe[1]);
8097836SJohn.Forte@Sun.COM 		/*
8107836SJohn.Forte@Sun.COM 		 * wait for the close of the pipe.
8117836SJohn.Forte@Sun.COM 		 * If we get a char back, indicates good
8127836SJohn.Forte@Sun.COM 		 * status from child, so exit 0.
8137836SJohn.Forte@Sun.COM 		 * If we get a zero length read, then the
8147836SJohn.Forte@Sun.COM 		 * child has failed, so we do too.
8157836SJohn.Forte@Sun.COM 		 */
8167836SJohn.Forte@Sun.COM 		n = read(syncpipe[0], &c, 1);
8177836SJohn.Forte@Sun.COM 		exit((n <= 0) ? 1 : 0);
8187836SJohn.Forte@Sun.COM 	} else if (rc < 0) {
819*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr, gettext("nskernd: cannot fork: %s\n"),
820*11576SSurya.Prakki@Sun.COM 		    strerror(errno));
8217836SJohn.Forte@Sun.COM 		exit(1);
8227836SJohn.Forte@Sun.COM 	}
8237836SJohn.Forte@Sun.COM 
8247836SJohn.Forte@Sun.COM 	/*
8257836SJohn.Forte@Sun.COM 	 * In child - become daemon.
8267836SJohn.Forte@Sun.COM 	 */
8277836SJohn.Forte@Sun.COM 
8287836SJohn.Forte@Sun.COM 	/* use closefrom(3C) from PSARC/2000/193 when possible */
8297836SJohn.Forte@Sun.COM 	for (i = 0; i < syncpipe[1]; i++) {
8307836SJohn.Forte@Sun.COM 		(void) close(i);
8317836SJohn.Forte@Sun.COM 	}
8327836SJohn.Forte@Sun.COM 	closefrom(syncpipe[1] + 1);
8337836SJohn.Forte@Sun.COM 
8347836SJohn.Forte@Sun.COM 	(void) open("/dev/console", O_WRONLY|O_APPEND);
8357836SJohn.Forte@Sun.COM 	(void) dup(0);
8367836SJohn.Forte@Sun.COM 	(void) dup(0);
8377836SJohn.Forte@Sun.COM 	(void) close(0);
8387836SJohn.Forte@Sun.COM 
839*11576SSurya.Prakki@Sun.COM 	(void) setpgrp();
8407836SJohn.Forte@Sun.COM 
8417836SJohn.Forte@Sun.COM 	/*
8427836SJohn.Forte@Sun.COM 	 * Ignore all signals apart from SIGTERM.
8437836SJohn.Forte@Sun.COM 	 */
8447836SJohn.Forte@Sun.COM 
8457836SJohn.Forte@Sun.COM 	for (i = 1; i < _sys_nsig; i++)
8467836SJohn.Forte@Sun.COM 		(void) sigset(i, SIG_IGN);
8477836SJohn.Forte@Sun.COM 
8487836SJohn.Forte@Sun.COM 	(void) sigset(SIGTERM, sighand);
8497836SJohn.Forte@Sun.COM 
8507836SJohn.Forte@Sun.COM 	/*
8517836SJohn.Forte@Sun.COM 	 * Increase the number of fd's that can be open.
8527836SJohn.Forte@Sun.COM 	 */
8537836SJohn.Forte@Sun.COM 
8547836SJohn.Forte@Sun.COM 	rl.rlim_cur = RLIM_INFINITY;
8557836SJohn.Forte@Sun.COM 	rl.rlim_max = RLIM_INFINITY;
8567836SJohn.Forte@Sun.COM 	if (setrlimit(RLIMIT_NOFILE, &rl) < 0) {
857*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr,
8587836SJohn.Forte@Sun.COM 		    gettext("nskernd: could not increase RLIMIT_NOFILE: %s\n"),
8597836SJohn.Forte@Sun.COM 		    strerror(errno));
860*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr,
8617836SJohn.Forte@Sun.COM 		    gettext("nskernd: the maximum number of nsctl open "
8627836SJohn.Forte@Sun.COM 		    "devices may be reduced\n"));
8637836SJohn.Forte@Sun.COM 	}
8647836SJohn.Forte@Sun.COM 
8657836SJohn.Forte@Sun.COM 	/*
8667836SJohn.Forte@Sun.COM 	 * Open /dev/nsctl and startup.
8677836SJohn.Forte@Sun.COM 	 */
8687836SJohn.Forte@Sun.COM 
8697836SJohn.Forte@Sun.COM 	nsctl_fd = open(rdev, O_RDONLY);
8707836SJohn.Forte@Sun.COM 	if (nsctl_fd < 0) {
871*11576SSurya.Prakki@Sun.COM 		(void) fprintf(stderr, gettext("nskernd: unable to open %s\n"),
872*11576SSurya.Prakki@Sun.COM 		    rdev);
8737836SJohn.Forte@Sun.COM 		exit(1);
8747836SJohn.Forte@Sun.COM 	}
8757836SJohn.Forte@Sun.COM 
8767836SJohn.Forte@Sun.COM 	bzero(&data, sizeof (data));
8777836SJohn.Forte@Sun.COM 
8787836SJohn.Forte@Sun.COM 	data.command = NSKERND_START;
8797836SJohn.Forte@Sun.COM 	data.data1 = (uint64_t)cl_nodeid;
8807836SJohn.Forte@Sun.COM 	run = 1;
8817836SJohn.Forte@Sun.COM 
8827836SJohn.Forte@Sun.COM 	startup = 1;
8837836SJohn.Forte@Sun.COM 	while (run) {
8847836SJohn.Forte@Sun.COM 		rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data);
8857836SJohn.Forte@Sun.COM 		if (rc < 0) {
8867836SJohn.Forte@Sun.COM 			/* try and do kernel cleanup and exit */
8877836SJohn.Forte@Sun.COM 			if (shutdown()) {
8887836SJohn.Forte@Sun.COM 				run = 0;
8897836SJohn.Forte@Sun.COM 			} else {
8907836SJohn.Forte@Sun.COM 				sigterm = 0;
8917836SJohn.Forte@Sun.COM 			}
8927836SJohn.Forte@Sun.COM 
893*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr,
8947836SJohn.Forte@Sun.COM 			    gettext("nskernd: NSCIOC_NSKERND failed: %s\n"),
8957836SJohn.Forte@Sun.COM 			    strerror(errno));
8967836SJohn.Forte@Sun.COM 			continue;
8977836SJohn.Forte@Sun.COM 		} else if (sigterm) {
8987836SJohn.Forte@Sun.COM 			/* SIGTERM received - terminate */
8997836SJohn.Forte@Sun.COM 			if (data.command != NSKERND_START &&
9007836SJohn.Forte@Sun.COM 			    (data.command != NSKERND_STOP ||
9017836SJohn.Forte@Sun.COM 			    data.data1 != (uint64_t)1)) {
9027836SJohn.Forte@Sun.COM 				/* need to do kernel cleanup */
9037836SJohn.Forte@Sun.COM 				if (shutdown()) {
9047836SJohn.Forte@Sun.COM 					run = 0;
9057836SJohn.Forte@Sun.COM 				} else {
9067836SJohn.Forte@Sun.COM 					sigterm = 0;
9077836SJohn.Forte@Sun.COM 					data.command = NSKERND_START;
9087836SJohn.Forte@Sun.COM 					data.data1 = (uint64_t)cl_nodeid;
9097836SJohn.Forte@Sun.COM 				}
9107836SJohn.Forte@Sun.COM 			} else {
9117836SJohn.Forte@Sun.COM 				/* just quit */
9127836SJohn.Forte@Sun.COM 				if (canshutdown()) {
9137836SJohn.Forte@Sun.COM 					run = 0;
9147836SJohn.Forte@Sun.COM 				} else {
9157836SJohn.Forte@Sun.COM 					/* cannot shutdown - threads active */
9167836SJohn.Forte@Sun.COM 					sigterm = 0;
9177836SJohn.Forte@Sun.COM 					data.command = NSKERND_START;
9187836SJohn.Forte@Sun.COM 					data.data1 = (uint64_t)cl_nodeid;
9197836SJohn.Forte@Sun.COM 				}
9207836SJohn.Forte@Sun.COM 			}
9217836SJohn.Forte@Sun.COM 			continue;
9227836SJohn.Forte@Sun.COM 		}
9237836SJohn.Forte@Sun.COM 		if (startup) {
9247836SJohn.Forte@Sun.COM 			char c = 0;
9257836SJohn.Forte@Sun.COM 			(void) write(syncpipe[1], &c, 1);
9267836SJohn.Forte@Sun.COM 			(void) close(syncpipe[1]);
9277836SJohn.Forte@Sun.COM 			startup = 0;
9287836SJohn.Forte@Sun.COM 		}
9297836SJohn.Forte@Sun.COM 		switch (data.command) {
9307836SJohn.Forte@Sun.COM 		case NSKERND_START:	/* (re)start completion */
9317836SJohn.Forte@Sun.COM 			if (rc == 1) {
932*11576SSurya.Prakki@Sun.COM 				(void) fprintf(stderr,
9337836SJohn.Forte@Sun.COM 				    gettext("nskernd: already started\n"));
9347836SJohn.Forte@Sun.COM 				run = 0;
9357836SJohn.Forte@Sun.COM 			} else if (rc == 2) {
936*11576SSurya.Prakki@Sun.COM 				(void) fprintf(stderr,
9377836SJohn.Forte@Sun.COM 				    gettext("nskernd: stopped by kernel\n"));
9387836SJohn.Forte@Sun.COM 				run = 0;
9397836SJohn.Forte@Sun.COM 			}
9407836SJohn.Forte@Sun.COM 			data.command = NSKERND_WAIT;
9417836SJohn.Forte@Sun.COM 			break;
9427836SJohn.Forte@Sun.COM 
9437836SJohn.Forte@Sun.COM 		case NSKERND_STOP:	/* kernel telling daemon to stop */
9447836SJohn.Forte@Sun.COM 			if (data.data1 != (uint64_t)1) {
9457836SJohn.Forte@Sun.COM 				(void) shutdown();
9467836SJohn.Forte@Sun.COM 				run = 0;
9477836SJohn.Forte@Sun.COM 			}
9487836SJohn.Forte@Sun.COM 			break;
9497836SJohn.Forte@Sun.COM 
9507836SJohn.Forte@Sun.COM 		case NSKERND_BSIZE:
9517836SJohn.Forte@Sun.COM 			/*
9527836SJohn.Forte@Sun.COM 			 * kernel requesting partsize
9537836SJohn.Forte@Sun.COM 			 * data1 - size return
9547836SJohn.Forte@Sun.COM 			 * data2 - raw_fd (entry)
9557836SJohn.Forte@Sun.COM 			 *	 - partition number (return)
9567836SJohn.Forte@Sun.COM 			 */
9577836SJohn.Forte@Sun.COM 			partition = -1;
9587836SJohn.Forte@Sun.COM 			get_bsize(data.data2, &data.data1,
9597836SJohn.Forte@Sun.COM 			    &partition, data.char1);
9607836SJohn.Forte@Sun.COM 			data.data2 = (uint64_t)partition;
9617836SJohn.Forte@Sun.COM 			data.command = NSKERND_WAIT;
9627836SJohn.Forte@Sun.COM 			break;
9637836SJohn.Forte@Sun.COM 
9647836SJohn.Forte@Sun.COM 		case NSKERND_NEWLWP:	/* kernel requesting a new LWP */
9657836SJohn.Forte@Sun.COM 			newlwp(&data);
9667836SJohn.Forte@Sun.COM 			data.command = NSKERND_WAIT;
9677836SJohn.Forte@Sun.COM 			break;
9687836SJohn.Forte@Sun.COM 
9697836SJohn.Forte@Sun.COM 		case NSKERND_LOCK:  	/* kernel requesting lock */
9707836SJohn.Forte@Sun.COM 			dolock(&data);
9717836SJohn.Forte@Sun.COM 			data.command = NSKERND_WAIT;
9727836SJohn.Forte@Sun.COM 			break;
9737836SJohn.Forte@Sun.COM 
9747836SJohn.Forte@Sun.COM 		case NSKERND_WAIT:	/* kernel retrying wait */
9757836SJohn.Forte@Sun.COM 			/*
9767836SJohn.Forte@Sun.COM 			 * the kernel thread can be woken by the dr config
9777836SJohn.Forte@Sun.COM 			 * utilities (ie cfgadm) therefore we just reissue
9787836SJohn.Forte@Sun.COM 			 * the wait.
9797836SJohn.Forte@Sun.COM 			 */
9807836SJohn.Forte@Sun.COM 			break;
9817836SJohn.Forte@Sun.COM 
9827836SJohn.Forte@Sun.COM 		case NSKERND_IIBITMAP:
9837836SJohn.Forte@Sun.COM 			rc = log_iibmp_err(data.char1, (int)data.data1);
9847836SJohn.Forte@Sun.COM 			data.data1 = (uint64_t)rc;
9857836SJohn.Forte@Sun.COM 			data.command = NSKERND_WAIT;
9867836SJohn.Forte@Sun.COM 			break;
9877836SJohn.Forte@Sun.COM 
9887836SJohn.Forte@Sun.COM 		default:
989*11576SSurya.Prakki@Sun.COM 			(void) fprintf(stderr,
9907836SJohn.Forte@Sun.COM 				gettext("nskernd: unknown command %d"),
9917836SJohn.Forte@Sun.COM 				data.command);
9927836SJohn.Forte@Sun.COM 			data.command = NSKERND_WAIT;
9937836SJohn.Forte@Sun.COM 			break;
9947836SJohn.Forte@Sun.COM 		}
9957836SJohn.Forte@Sun.COM 	}
9967836SJohn.Forte@Sun.COM 
9977836SJohn.Forte@Sun.COM 	(void) close(nsctl_fd);
9987836SJohn.Forte@Sun.COM 
9997836SJohn.Forte@Sun.COM 	return (rc);
10007836SJohn.Forte@Sun.COM }
1001