1*7836SJohn.Forte@Sun.COM /* 2*7836SJohn.Forte@Sun.COM * CDDL HEADER START 3*7836SJohn.Forte@Sun.COM * 4*7836SJohn.Forte@Sun.COM * The contents of this file are subject to the terms of the 5*7836SJohn.Forte@Sun.COM * Common Development and Distribution License (the "License"). 6*7836SJohn.Forte@Sun.COM * You may not use this file except in compliance with the License. 7*7836SJohn.Forte@Sun.COM * 8*7836SJohn.Forte@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*7836SJohn.Forte@Sun.COM * or http://www.opensolaris.org/os/licensing. 10*7836SJohn.Forte@Sun.COM * See the License for the specific language governing permissions 11*7836SJohn.Forte@Sun.COM * and limitations under the License. 12*7836SJohn.Forte@Sun.COM * 13*7836SJohn.Forte@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 14*7836SJohn.Forte@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*7836SJohn.Forte@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 16*7836SJohn.Forte@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 17*7836SJohn.Forte@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 18*7836SJohn.Forte@Sun.COM * 19*7836SJohn.Forte@Sun.COM * CDDL HEADER END 20*7836SJohn.Forte@Sun.COM */ 21*7836SJohn.Forte@Sun.COM /* 22*7836SJohn.Forte@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23*7836SJohn.Forte@Sun.COM * Use is subject to license terms. 24*7836SJohn.Forte@Sun.COM */ 25*7836SJohn.Forte@Sun.COM 26*7836SJohn.Forte@Sun.COM #include <sys/types.h> 27*7836SJohn.Forte@Sun.COM #include <sys/resource.h> 28*7836SJohn.Forte@Sun.COM #include <sys/priocntl.h> 29*7836SJohn.Forte@Sun.COM #include <sys/rtpriocntl.h> 30*7836SJohn.Forte@Sun.COM #include <sys/tspriocntl.h> 31*7836SJohn.Forte@Sun.COM #include <sys/wait.h> 32*7836SJohn.Forte@Sun.COM #include <sys/stat.h> 33*7836SJohn.Forte@Sun.COM 34*7836SJohn.Forte@Sun.COM #include <strings.h> 35*7836SJohn.Forte@Sun.COM #include <thread.h> 36*7836SJohn.Forte@Sun.COM #include <stdlib.h> 37*7836SJohn.Forte@Sun.COM #include <signal.h> 38*7836SJohn.Forte@Sun.COM #include <errno.h> 39*7836SJohn.Forte@Sun.COM #include <stdio.h> 40*7836SJohn.Forte@Sun.COM #include <fcntl.h> 41*7836SJohn.Forte@Sun.COM #include <locale.h> 42*7836SJohn.Forte@Sun.COM #include <unistd.h> 43*7836SJohn.Forte@Sun.COM #include <syslog.h> 44*7836SJohn.Forte@Sun.COM 45*7836SJohn.Forte@Sun.COM #include <sys/nsctl/cfg.h> 46*7836SJohn.Forte@Sun.COM #include <sys/nsctl/nsctl.h> 47*7836SJohn.Forte@Sun.COM #include <sys/nsctl/nsc_ioctl.h> 48*7836SJohn.Forte@Sun.COM #include <sys/nskernd.h> 49*7836SJohn.Forte@Sun.COM #include <nsctl.h> 50*7836SJohn.Forte@Sun.COM 51*7836SJohn.Forte@Sun.COM #include <sys/mkdev.h> 52*7836SJohn.Forte@Sun.COM #include <sys/nsctl/sv_efi.h> 53*7836SJohn.Forte@Sun.COM 54*7836SJohn.Forte@Sun.COM static const char *rdev = "/dev/nsctl"; 55*7836SJohn.Forte@Sun.COM 56*7836SJohn.Forte@Sun.COM /* 57*7836SJohn.Forte@Sun.COM * Define a minimal user stack size in bytes over and above the 58*7836SJohn.Forte@Sun.COM * libthread THR_STACK_MIN minimum value. 59*7836SJohn.Forte@Sun.COM * 60*7836SJohn.Forte@Sun.COM * This stack size needs to be sufficient to run _newlwp() and then 61*7836SJohn.Forte@Sun.COM * ioctl() down into the kernel. 62*7836SJohn.Forte@Sun.COM */ 63*7836SJohn.Forte@Sun.COM #define NSK_STACK_SIZE 512 64*7836SJohn.Forte@Sun.COM 65*7836SJohn.Forte@Sun.COM /* 66*7836SJohn.Forte@Sun.COM * LWP scheduling control switches. 67*7836SJohn.Forte@Sun.COM * 68*7836SJohn.Forte@Sun.COM * allow_pri - set to non-zero to enable priocntl() manipulations of 69*7836SJohn.Forte@Sun.COM * created LWPs. 70*7836SJohn.Forte@Sun.COM * allow_rt - set to non-zero to use the RT rather than the TS 71*7836SJohn.Forte@Sun.COM * scheduling class when manipulating the schduling 72*7836SJohn.Forte@Sun.COM * parameters for an LWP. Only used if allow_pri is 73*7836SJohn.Forte@Sun.COM * non-zero. 74*7836SJohn.Forte@Sun.COM */ 75*7836SJohn.Forte@Sun.COM static int allow_pri = 1; 76*7836SJohn.Forte@Sun.COM static int allow_rt = 0; /* disallow - bad interactions with timeout() */ 77*7836SJohn.Forte@Sun.COM 78*7836SJohn.Forte@Sun.COM static int nsctl_fd = -1; 79*7836SJohn.Forte@Sun.COM static int sigterm; 80*7836SJohn.Forte@Sun.COM 81*7836SJohn.Forte@Sun.COM static int nthreads; /* number of threads in the kernel */ 82*7836SJohn.Forte@Sun.COM static int exiting; /* shutdown in progress flag */ 83*7836SJohn.Forte@Sun.COM static mutex_t thr_mutex = DEFAULTMUTEX; 84*7836SJohn.Forte@Sun.COM static mutex_t cfg_mutex = DEFAULTMUTEX; 85*7836SJohn.Forte@Sun.COM 86*7836SJohn.Forte@Sun.COM static int cl_nodeid = -1; 87*7836SJohn.Forte@Sun.COM 88*7836SJohn.Forte@Sun.COM static int display_msg = 0; 89*7836SJohn.Forte@Sun.COM static int delay_time = 30; 90*7836SJohn.Forte@Sun.COM 91*7836SJohn.Forte@Sun.COM static void 92*7836SJohn.Forte@Sun.COM usage(void) 93*7836SJohn.Forte@Sun.COM { 94*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("usage: nskernd\n")); 95*7836SJohn.Forte@Sun.COM exit(255); 96*7836SJohn.Forte@Sun.COM } 97*7836SJohn.Forte@Sun.COM 98*7836SJohn.Forte@Sun.COM 99*7836SJohn.Forte@Sun.COM static void 100*7836SJohn.Forte@Sun.COM sighand(int sig) 101*7836SJohn.Forte@Sun.COM { 102*7836SJohn.Forte@Sun.COM if (sig == SIGTERM) { 103*7836SJohn.Forte@Sun.COM sigterm++; 104*7836SJohn.Forte@Sun.COM } 105*7836SJohn.Forte@Sun.COM } 106*7836SJohn.Forte@Sun.COM 107*7836SJohn.Forte@Sun.COM 108*7836SJohn.Forte@Sun.COM /* 109*7836SJohn.Forte@Sun.COM * Returns: 1 - can enter kernel; 0 - shutdown in progress, do not enter kernel 110*7836SJohn.Forte@Sun.COM */ 111*7836SJohn.Forte@Sun.COM int 112*7836SJohn.Forte@Sun.COM nthread_inc(void) 113*7836SJohn.Forte@Sun.COM { 114*7836SJohn.Forte@Sun.COM mutex_lock(&thr_mutex); 115*7836SJohn.Forte@Sun.COM if (exiting) { 116*7836SJohn.Forte@Sun.COM /* cannot enter kernel as nskernd is being shutdown - exit */ 117*7836SJohn.Forte@Sun.COM mutex_unlock(&thr_mutex); 118*7836SJohn.Forte@Sun.COM return (0); 119*7836SJohn.Forte@Sun.COM } 120*7836SJohn.Forte@Sun.COM nthreads++; 121*7836SJohn.Forte@Sun.COM mutex_unlock(&thr_mutex); 122*7836SJohn.Forte@Sun.COM return (1); 123*7836SJohn.Forte@Sun.COM } 124*7836SJohn.Forte@Sun.COM 125*7836SJohn.Forte@Sun.COM 126*7836SJohn.Forte@Sun.COM void 127*7836SJohn.Forte@Sun.COM nthread_dec(void) 128*7836SJohn.Forte@Sun.COM { 129*7836SJohn.Forte@Sun.COM mutex_lock(&thr_mutex); 130*7836SJohn.Forte@Sun.COM nthreads--; 131*7836SJohn.Forte@Sun.COM mutex_unlock(&thr_mutex); 132*7836SJohn.Forte@Sun.COM } 133*7836SJohn.Forte@Sun.COM 134*7836SJohn.Forte@Sun.COM 135*7836SJohn.Forte@Sun.COM /* 136*7836SJohn.Forte@Sun.COM * returns: 1 - can shutdown; 0 - unable to shutdown 137*7836SJohn.Forte@Sun.COM */ 138*7836SJohn.Forte@Sun.COM int 139*7836SJohn.Forte@Sun.COM canshutdown(void) 140*7836SJohn.Forte@Sun.COM { 141*7836SJohn.Forte@Sun.COM int rc = 1; 142*7836SJohn.Forte@Sun.COM time_t start_delay; 143*7836SJohn.Forte@Sun.COM 144*7836SJohn.Forte@Sun.COM mutex_lock(&thr_mutex); 145*7836SJohn.Forte@Sun.COM if (nthreads > 0) { 146*7836SJohn.Forte@Sun.COM if (display_msg) { 147*7836SJohn.Forte@Sun.COM fprintf(stderr, 148*7836SJohn.Forte@Sun.COM gettext("nskernd: unable to shutdown: " 149*7836SJohn.Forte@Sun.COM "%d kernel threads in use\n"), nthreads); 150*7836SJohn.Forte@Sun.COM } 151*7836SJohn.Forte@Sun.COM start_delay = time(0); 152*7836SJohn.Forte@Sun.COM while (nthreads > 0 && (time(0) - start_delay) < delay_time) { 153*7836SJohn.Forte@Sun.COM mutex_unlock(&thr_mutex); 154*7836SJohn.Forte@Sun.COM sleep(1); 155*7836SJohn.Forte@Sun.COM mutex_lock(&thr_mutex); 156*7836SJohn.Forte@Sun.COM fprintf(stderr, 157*7836SJohn.Forte@Sun.COM gettext("nskernd: delay shutdown: " 158*7836SJohn.Forte@Sun.COM "%d kernel threads in use\n"), nthreads); 159*7836SJohn.Forte@Sun.COM } 160*7836SJohn.Forte@Sun.COM if (nthreads > 0) { 161*7836SJohn.Forte@Sun.COM rc = 0; 162*7836SJohn.Forte@Sun.COM } else { 163*7836SJohn.Forte@Sun.COM exiting = 1; 164*7836SJohn.Forte@Sun.COM } 165*7836SJohn.Forte@Sun.COM } else { 166*7836SJohn.Forte@Sun.COM /* flag shutdown in progress */ 167*7836SJohn.Forte@Sun.COM exiting = 1; 168*7836SJohn.Forte@Sun.COM } 169*7836SJohn.Forte@Sun.COM mutex_unlock(&thr_mutex); 170*7836SJohn.Forte@Sun.COM 171*7836SJohn.Forte@Sun.COM return (rc); 172*7836SJohn.Forte@Sun.COM } 173*7836SJohn.Forte@Sun.COM 174*7836SJohn.Forte@Sun.COM 175*7836SJohn.Forte@Sun.COM /* 176*7836SJohn.Forte@Sun.COM * returns: 1 - shutdown successful; 0 - unable to shutdown 177*7836SJohn.Forte@Sun.COM */ 178*7836SJohn.Forte@Sun.COM int 179*7836SJohn.Forte@Sun.COM shutdown(void) 180*7836SJohn.Forte@Sun.COM { 181*7836SJohn.Forte@Sun.COM struct nskernd data; 182*7836SJohn.Forte@Sun.COM int rc; 183*7836SJohn.Forte@Sun.COM 184*7836SJohn.Forte@Sun.COM if (nsctl_fd < 0) 185*7836SJohn.Forte@Sun.COM return (1); 186*7836SJohn.Forte@Sun.COM 187*7836SJohn.Forte@Sun.COM bzero(&data, sizeof (data)); 188*7836SJohn.Forte@Sun.COM data.command = NSKERND_STOP; 189*7836SJohn.Forte@Sun.COM 190*7836SJohn.Forte@Sun.COM if (!canshutdown()) { 191*7836SJohn.Forte@Sun.COM return (0); 192*7836SJohn.Forte@Sun.COM } 193*7836SJohn.Forte@Sun.COM 194*7836SJohn.Forte@Sun.COM rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data); 195*7836SJohn.Forte@Sun.COM if (rc < 0) { 196*7836SJohn.Forte@Sun.COM if (errno != EINTR || !sigterm) { 197*7836SJohn.Forte@Sun.COM fprintf(stderr, 198*7836SJohn.Forte@Sun.COM gettext("nskernd: NSKERND_STOP failed\n")); 199*7836SJohn.Forte@Sun.COM } 200*7836SJohn.Forte@Sun.COM } 201*7836SJohn.Forte@Sun.COM 202*7836SJohn.Forte@Sun.COM return (1); 203*7836SJohn.Forte@Sun.COM } 204*7836SJohn.Forte@Sun.COM 205*7836SJohn.Forte@Sun.COM 206*7836SJohn.Forte@Sun.COM /* 207*7836SJohn.Forte@Sun.COM * First function run by a NSKERND_NEWLWP thread. 208*7836SJohn.Forte@Sun.COM * 209*7836SJohn.Forte@Sun.COM * Determines if it needs to change the scheduling priority of the LWP, 210*7836SJohn.Forte@Sun.COM * and then calls back into the kernel. 211*7836SJohn.Forte@Sun.COM */ 212*7836SJohn.Forte@Sun.COM static void * 213*7836SJohn.Forte@Sun.COM _newlwp(void *arg) 214*7836SJohn.Forte@Sun.COM { 215*7836SJohn.Forte@Sun.COM struct nskernd nsk; 216*7836SJohn.Forte@Sun.COM pcparms_t pcparms; 217*7836SJohn.Forte@Sun.COM pcinfo_t pcinfo; 218*7836SJohn.Forte@Sun.COM 219*7836SJohn.Forte@Sun.COM /* copy arguments onto stack and free heap memory */ 220*7836SJohn.Forte@Sun.COM bcopy(arg, &nsk, sizeof (nsk)); 221*7836SJohn.Forte@Sun.COM free(arg); 222*7836SJohn.Forte@Sun.COM 223*7836SJohn.Forte@Sun.COM if (nsk.data2 && allow_pri) { 224*7836SJohn.Forte@Sun.COM /* increase the scheduling priority of this LWP */ 225*7836SJohn.Forte@Sun.COM 226*7836SJohn.Forte@Sun.COM bzero(&pcinfo, sizeof (pcinfo)); 227*7836SJohn.Forte@Sun.COM strcpy(pcinfo.pc_clname, allow_rt ? "RT" : "TS"); 228*7836SJohn.Forte@Sun.COM 229*7836SJohn.Forte@Sun.COM if (priocntl(0, 0, PC_GETCID, (char *)&pcinfo) < 0) { 230*7836SJohn.Forte@Sun.COM fprintf(stderr, 231*7836SJohn.Forte@Sun.COM gettext( 232*7836SJohn.Forte@Sun.COM "nskernd: priocntl(PC_GETCID) failed: %s\n"), 233*7836SJohn.Forte@Sun.COM strerror(errno)); 234*7836SJohn.Forte@Sun.COM goto pri_done; 235*7836SJohn.Forte@Sun.COM } 236*7836SJohn.Forte@Sun.COM 237*7836SJohn.Forte@Sun.COM bzero(&pcparms, sizeof (pcparms)); 238*7836SJohn.Forte@Sun.COM pcparms.pc_cid = pcinfo.pc_cid; 239*7836SJohn.Forte@Sun.COM 240*7836SJohn.Forte@Sun.COM if (allow_rt) { 241*7836SJohn.Forte@Sun.COM ((rtparms_t *)pcparms.pc_clparms)->rt_pri = 242*7836SJohn.Forte@Sun.COM (pri_t)0; /* minimum RT priority */ 243*7836SJohn.Forte@Sun.COM ((rtparms_t *)pcparms.pc_clparms)->rt_tqsecs = 244*7836SJohn.Forte@Sun.COM (uint_t)RT_TQDEF; 245*7836SJohn.Forte@Sun.COM ((rtparms_t *)pcparms.pc_clparms)->rt_tqnsecs = 246*7836SJohn.Forte@Sun.COM RT_TQDEF; 247*7836SJohn.Forte@Sun.COM } else { 248*7836SJohn.Forte@Sun.COM ((tsparms_t *)pcparms.pc_clparms)->ts_uprilim = 249*7836SJohn.Forte@Sun.COM ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri; 250*7836SJohn.Forte@Sun.COM ((tsparms_t *)pcparms.pc_clparms)->ts_upri = 251*7836SJohn.Forte@Sun.COM ((tsinfo_t *)&pcinfo.pc_clinfo)->ts_maxupri; 252*7836SJohn.Forte@Sun.COM } 253*7836SJohn.Forte@Sun.COM 254*7836SJohn.Forte@Sun.COM if (priocntl(P_LWPID, P_MYID, 255*7836SJohn.Forte@Sun.COM PC_SETPARMS, (char *)&pcparms) < 0) { 256*7836SJohn.Forte@Sun.COM fprintf(stderr, 257*7836SJohn.Forte@Sun.COM gettext( 258*7836SJohn.Forte@Sun.COM "nskernd: priocntl(PC_SETPARMS) failed: %s\n"), 259*7836SJohn.Forte@Sun.COM strerror(errno)); 260*7836SJohn.Forte@Sun.COM } 261*7836SJohn.Forte@Sun.COM } 262*7836SJohn.Forte@Sun.COM 263*7836SJohn.Forte@Sun.COM pri_done: 264*7836SJohn.Forte@Sun.COM if (nthread_inc()) { 265*7836SJohn.Forte@Sun.COM (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk); 266*7836SJohn.Forte@Sun.COM nthread_dec(); 267*7836SJohn.Forte@Sun.COM } 268*7836SJohn.Forte@Sun.COM return (NULL); 269*7836SJohn.Forte@Sun.COM } 270*7836SJohn.Forte@Sun.COM 271*7836SJohn.Forte@Sun.COM 272*7836SJohn.Forte@Sun.COM /* 273*7836SJohn.Forte@Sun.COM * Start a new thread bound to an LWP. 274*7836SJohn.Forte@Sun.COM * 275*7836SJohn.Forte@Sun.COM * This is the user level side of nsc_create_process(). 276*7836SJohn.Forte@Sun.COM */ 277*7836SJohn.Forte@Sun.COM static void 278*7836SJohn.Forte@Sun.COM newlwp(struct nskernd *req) 279*7836SJohn.Forte@Sun.COM { 280*7836SJohn.Forte@Sun.COM struct nskernd *nskp; 281*7836SJohn.Forte@Sun.COM thread_t tid; 282*7836SJohn.Forte@Sun.COM int rc; 283*7836SJohn.Forte@Sun.COM 284*7836SJohn.Forte@Sun.COM nskp = malloc(sizeof (*nskp)); 285*7836SJohn.Forte@Sun.COM if (!nskp) { 286*7836SJohn.Forte@Sun.COM #ifdef DEBUG 287*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd: malloc(%d) failed\n"), 288*7836SJohn.Forte@Sun.COM sizeof (*nskp)); 289*7836SJohn.Forte@Sun.COM #endif 290*7836SJohn.Forte@Sun.COM req->data1 = (uint64_t)ENOMEM; 291*7836SJohn.Forte@Sun.COM return; 292*7836SJohn.Forte@Sun.COM } 293*7836SJohn.Forte@Sun.COM 294*7836SJohn.Forte@Sun.COM /* copy args for child */ 295*7836SJohn.Forte@Sun.COM bcopy(req, nskp, sizeof (*nskp)); 296*7836SJohn.Forte@Sun.COM 297*7836SJohn.Forte@Sun.COM rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE), 298*7836SJohn.Forte@Sun.COM _newlwp, nskp, THR_BOUND|THR_DETACHED, &tid); 299*7836SJohn.Forte@Sun.COM 300*7836SJohn.Forte@Sun.COM if (rc != 0) { 301*7836SJohn.Forte@Sun.COM /* thr_create failed */ 302*7836SJohn.Forte@Sun.COM #ifdef DEBUG 303*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd: thr_create failed: %s\n"), 304*7836SJohn.Forte@Sun.COM strerror(errno)); 305*7836SJohn.Forte@Sun.COM #endif 306*7836SJohn.Forte@Sun.COM req->data1 = (uint64_t)errno; 307*7836SJohn.Forte@Sun.COM free(nskp); 308*7836SJohn.Forte@Sun.COM } else { 309*7836SJohn.Forte@Sun.COM /* success - _newlwp() will free nskp */ 310*7836SJohn.Forte@Sun.COM req->data1 = (uint64_t)0; 311*7836SJohn.Forte@Sun.COM } 312*7836SJohn.Forte@Sun.COM } 313*7836SJohn.Forte@Sun.COM 314*7836SJohn.Forte@Sun.COM static int 315*7836SJohn.Forte@Sun.COM log_iibmp_err(char *set, int flags) 316*7836SJohn.Forte@Sun.COM { 317*7836SJohn.Forte@Sun.COM CFGFILE *cfg; 318*7836SJohn.Forte@Sun.COM char key[CFG_MAX_KEY]; 319*7836SJohn.Forte@Sun.COM char buf[CFG_MAX_BUF]; 320*7836SJohn.Forte@Sun.COM char newflags[CFG_MAX_BUF]; 321*7836SJohn.Forte@Sun.COM char outbuf[CFG_MAX_BUF]; 322*7836SJohn.Forte@Sun.COM char *mst, *shd, *bmp, *mode, *ovr, *cnode, *opt, *grp; 323*7836SJohn.Forte@Sun.COM int setno, found = 0; 324*7836SJohn.Forte@Sun.COM int setlen; 325*7836SJohn.Forte@Sun.COM int rc = 0; 326*7836SJohn.Forte@Sun.COM pid_t pid = -1; 327*7836SJohn.Forte@Sun.COM 328*7836SJohn.Forte@Sun.COM if (set && *set) { 329*7836SJohn.Forte@Sun.COM setlen = strlen(set); 330*7836SJohn.Forte@Sun.COM } else { 331*7836SJohn.Forte@Sun.COM return (EINVAL); 332*7836SJohn.Forte@Sun.COM } 333*7836SJohn.Forte@Sun.COM 334*7836SJohn.Forte@Sun.COM mutex_lock(&cfg_mutex); 335*7836SJohn.Forte@Sun.COM cfg = cfg_open(""); 336*7836SJohn.Forte@Sun.COM if (!cfg) { 337*7836SJohn.Forte@Sun.COM mutex_unlock(&cfg_mutex); 338*7836SJohn.Forte@Sun.COM return (ENXIO); 339*7836SJohn.Forte@Sun.COM } 340*7836SJohn.Forte@Sun.COM 341*7836SJohn.Forte@Sun.COM if (!cfg_lock(cfg, CFG_WRLOCK)) { 342*7836SJohn.Forte@Sun.COM 343*7836SJohn.Forte@Sun.COM mutex_unlock(&cfg_mutex); 344*7836SJohn.Forte@Sun.COM cfg_close(cfg); 345*7836SJohn.Forte@Sun.COM 346*7836SJohn.Forte@Sun.COM pid = fork(); 347*7836SJohn.Forte@Sun.COM 348*7836SJohn.Forte@Sun.COM if (pid == -1) { 349*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext( 350*7836SJohn.Forte@Sun.COM "nskernd: Error forking\n")); 351*7836SJohn.Forte@Sun.COM return (errno); 352*7836SJohn.Forte@Sun.COM } else if (pid > 0) { 353*7836SJohn.Forte@Sun.COM fprintf(stdout, gettext( 354*7836SJohn.Forte@Sun.COM "nskernd: Attempting deferred bitmap error\n")); 355*7836SJohn.Forte@Sun.COM return (0); 356*7836SJohn.Forte@Sun.COM } 357*7836SJohn.Forte@Sun.COM 358*7836SJohn.Forte@Sun.COM mutex_lock(&cfg_mutex); 359*7836SJohn.Forte@Sun.COM cfg = cfg_open(""); 360*7836SJohn.Forte@Sun.COM if (!cfg) { 361*7836SJohn.Forte@Sun.COM mutex_unlock(&cfg_mutex); 362*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext( 363*7836SJohn.Forte@Sun.COM "nskernd: Failed cfg_open, deferred bitmap\n")); 364*7836SJohn.Forte@Sun.COM return (ENXIO); 365*7836SJohn.Forte@Sun.COM } 366*7836SJohn.Forte@Sun.COM 367*7836SJohn.Forte@Sun.COM /* Sooner or later, this lock will be free */ 368*7836SJohn.Forte@Sun.COM while (!cfg_lock(cfg, CFG_WRLOCK)) 369*7836SJohn.Forte@Sun.COM sleep(2); 370*7836SJohn.Forte@Sun.COM } 371*7836SJohn.Forte@Sun.COM 372*7836SJohn.Forte@Sun.COM /* find the proper set number */ 373*7836SJohn.Forte@Sun.COM for (setno = 1; !found; setno++) { 374*7836SJohn.Forte@Sun.COM snprintf(key, CFG_MAX_KEY, "ii.set%d", setno); 375*7836SJohn.Forte@Sun.COM if (cfg_get_cstring(cfg, key, buf, CFG_MAX_BUF) < 0) { 376*7836SJohn.Forte@Sun.COM break; 377*7836SJohn.Forte@Sun.COM } 378*7836SJohn.Forte@Sun.COM 379*7836SJohn.Forte@Sun.COM mst = strtok(buf, " "); 380*7836SJohn.Forte@Sun.COM shd = strtok(NULL, " "); 381*7836SJohn.Forte@Sun.COM if (strncmp(shd, set, setlen) == 0) { 382*7836SJohn.Forte@Sun.COM found = 1; 383*7836SJohn.Forte@Sun.COM 384*7836SJohn.Forte@Sun.COM bmp = strtok(NULL, " "); 385*7836SJohn.Forte@Sun.COM mode = strtok(NULL, " "); 386*7836SJohn.Forte@Sun.COM ovr = strtok(NULL, " "); 387*7836SJohn.Forte@Sun.COM cnode = strtok(NULL, " "); 388*7836SJohn.Forte@Sun.COM opt = strtok(NULL, " "); 389*7836SJohn.Forte@Sun.COM grp = strtok(NULL, " "); 390*7836SJohn.Forte@Sun.COM break; 391*7836SJohn.Forte@Sun.COM } 392*7836SJohn.Forte@Sun.COM } 393*7836SJohn.Forte@Sun.COM 394*7836SJohn.Forte@Sun.COM if (found) { 395*7836SJohn.Forte@Sun.COM /* were there flags in the options field already? */ 396*7836SJohn.Forte@Sun.COM snprintf(newflags, CFG_MAX_BUF, "%s=0x%x", 397*7836SJohn.Forte@Sun.COM NSKERN_II_BMP_OPTION, flags); 398*7836SJohn.Forte@Sun.COM if (opt && strcmp(opt, "-") != 0) { 399*7836SJohn.Forte@Sun.COM bzero(newflags, CFG_MAX_BUF); 400*7836SJohn.Forte@Sun.COM opt = strtok(opt, ";"); 401*7836SJohn.Forte@Sun.COM while (opt) { 402*7836SJohn.Forte@Sun.COM if (strncmp(opt, NSKERN_II_BMP_OPTION, 403*7836SJohn.Forte@Sun.COM strlen(NSKERN_II_BMP_OPTION)) != 0) { 404*7836SJohn.Forte@Sun.COM strcat(newflags, ";"); 405*7836SJohn.Forte@Sun.COM strcat(newflags, opt); 406*7836SJohn.Forte@Sun.COM } 407*7836SJohn.Forte@Sun.COM } 408*7836SJohn.Forte@Sun.COM } 409*7836SJohn.Forte@Sun.COM snprintf(key, CFG_MAX_KEY, "ii.set%d", setno); 410*7836SJohn.Forte@Sun.COM snprintf(outbuf, CFG_MAX_BUF, "%s %s %s %s %s %s %s %s", 411*7836SJohn.Forte@Sun.COM mst, shd, bmp, mode, ovr, cnode, newflags, grp); 412*7836SJohn.Forte@Sun.COM if (cfg_put_cstring(cfg, key, outbuf, CFG_MAX_BUF) < 0) { 413*7836SJohn.Forte@Sun.COM printf("Failed to put [%s]\n", outbuf); 414*7836SJohn.Forte@Sun.COM rc = ENXIO; 415*7836SJohn.Forte@Sun.COM } else { 416*7836SJohn.Forte@Sun.COM cfg_commit(cfg); 417*7836SJohn.Forte@Sun.COM rc = 0; 418*7836SJohn.Forte@Sun.COM } 419*7836SJohn.Forte@Sun.COM } else { 420*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext( 421*7836SJohn.Forte@Sun.COM "nskernd: Failed deferred bitmap [%s]\n"), set); 422*7836SJohn.Forte@Sun.COM rc = EINVAL; 423*7836SJohn.Forte@Sun.COM } 424*7836SJohn.Forte@Sun.COM cfg_unlock(cfg); 425*7836SJohn.Forte@Sun.COM cfg_close(cfg); 426*7836SJohn.Forte@Sun.COM mutex_unlock(&cfg_mutex); 427*7836SJohn.Forte@Sun.COM 428*7836SJohn.Forte@Sun.COM /* 429*7836SJohn.Forte@Sun.COM * if we are the fork'ed client, just exit, if parent just return 430*7836SJohn.Forte@Sun.COM */ 431*7836SJohn.Forte@Sun.COM if (pid == 0) { 432*7836SJohn.Forte@Sun.COM exit(rc); 433*7836SJohn.Forte@Sun.COM /*NOTREACHED*/ 434*7836SJohn.Forte@Sun.COM } else { 435*7836SJohn.Forte@Sun.COM return (rc); 436*7836SJohn.Forte@Sun.COM } 437*7836SJohn.Forte@Sun.COM } 438*7836SJohn.Forte@Sun.COM 439*7836SJohn.Forte@Sun.COM /* 440*7836SJohn.Forte@Sun.COM * First function run by a NSKERND_LOCK thread. 441*7836SJohn.Forte@Sun.COM * 442*7836SJohn.Forte@Sun.COM * Opens dscfg and locks it, 443*7836SJohn.Forte@Sun.COM * and then calls back into the kernel. 444*7836SJohn.Forte@Sun.COM * 445*7836SJohn.Forte@Sun.COM * Incoming: 446*7836SJohn.Forte@Sun.COM * data1 is the kernel address of the sync structure. 447*7836SJohn.Forte@Sun.COM * data2 is read(0)/write(1) lock mode. 448*7836SJohn.Forte@Sun.COM * 449*7836SJohn.Forte@Sun.COM * Returns: 450*7836SJohn.Forte@Sun.COM * data1 as incoming. 451*7836SJohn.Forte@Sun.COM * data2 errno. 452*7836SJohn.Forte@Sun.COM */ 453*7836SJohn.Forte@Sun.COM static void * 454*7836SJohn.Forte@Sun.COM _dolock(void *arg) 455*7836SJohn.Forte@Sun.COM { 456*7836SJohn.Forte@Sun.COM struct nskernd nsk; 457*7836SJohn.Forte@Sun.COM CFGFILE *cfg; 458*7836SJohn.Forte@Sun.COM int locked; 459*7836SJohn.Forte@Sun.COM int mode; 460*7836SJohn.Forte@Sun.COM int rc = 0; 461*7836SJohn.Forte@Sun.COM 462*7836SJohn.Forte@Sun.COM /* copy arguments onto stack and free heap memory */ 463*7836SJohn.Forte@Sun.COM bcopy(arg, &nsk, sizeof (nsk)); 464*7836SJohn.Forte@Sun.COM free(arg); 465*7836SJohn.Forte@Sun.COM 466*7836SJohn.Forte@Sun.COM mutex_lock(&cfg_mutex); 467*7836SJohn.Forte@Sun.COM cfg = cfg_open(""); 468*7836SJohn.Forte@Sun.COM if (cfg == NULL) { 469*7836SJohn.Forte@Sun.COM #ifdef DEBUG 470*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd: cfg_open failed: %s\n"), 471*7836SJohn.Forte@Sun.COM strerror(errno)); 472*7836SJohn.Forte@Sun.COM #endif 473*7836SJohn.Forte@Sun.COM rc = ENXIO; 474*7836SJohn.Forte@Sun.COM } 475*7836SJohn.Forte@Sun.COM 476*7836SJohn.Forte@Sun.COM if (nsk.data2 == 0) { 477*7836SJohn.Forte@Sun.COM mode = CFG_RDLOCK; 478*7836SJohn.Forte@Sun.COM } else { 479*7836SJohn.Forte@Sun.COM mode = CFG_WRLOCK; 480*7836SJohn.Forte@Sun.COM } 481*7836SJohn.Forte@Sun.COM 482*7836SJohn.Forte@Sun.COM locked = 0; 483*7836SJohn.Forte@Sun.COM if (rc == 0) { 484*7836SJohn.Forte@Sun.COM if (cfg_lock(cfg, mode)) { 485*7836SJohn.Forte@Sun.COM locked = 1; 486*7836SJohn.Forte@Sun.COM } else { 487*7836SJohn.Forte@Sun.COM #ifdef DEBUG 488*7836SJohn.Forte@Sun.COM fprintf(stderr, 489*7836SJohn.Forte@Sun.COM gettext("nskernd: cfg_lock failed: %s\n"), 490*7836SJohn.Forte@Sun.COM strerror(errno)); 491*7836SJohn.Forte@Sun.COM #endif 492*7836SJohn.Forte@Sun.COM rc = EINVAL; 493*7836SJohn.Forte@Sun.COM } 494*7836SJohn.Forte@Sun.COM } 495*7836SJohn.Forte@Sun.COM 496*7836SJohn.Forte@Sun.COM /* return to kernel */ 497*7836SJohn.Forte@Sun.COM 498*7836SJohn.Forte@Sun.COM nsk.data2 = (uint64_t)rc; 499*7836SJohn.Forte@Sun.COM if (nthread_inc()) { 500*7836SJohn.Forte@Sun.COM (void) ioctl(nsctl_fd, NSCIOC_NSKERND, &nsk); 501*7836SJohn.Forte@Sun.COM nthread_dec(); 502*7836SJohn.Forte@Sun.COM } 503*7836SJohn.Forte@Sun.COM 504*7836SJohn.Forte@Sun.COM /* cleanup */ 505*7836SJohn.Forte@Sun.COM 506*7836SJohn.Forte@Sun.COM if (locked) { 507*7836SJohn.Forte@Sun.COM cfg_unlock(cfg); 508*7836SJohn.Forte@Sun.COM locked = 0; 509*7836SJohn.Forte@Sun.COM } 510*7836SJohn.Forte@Sun.COM 511*7836SJohn.Forte@Sun.COM if (cfg != NULL) { 512*7836SJohn.Forte@Sun.COM cfg_close(cfg); 513*7836SJohn.Forte@Sun.COM cfg = NULL; 514*7836SJohn.Forte@Sun.COM } 515*7836SJohn.Forte@Sun.COM mutex_unlock(&cfg_mutex); 516*7836SJohn.Forte@Sun.COM 517*7836SJohn.Forte@Sun.COM return (NULL); 518*7836SJohn.Forte@Sun.COM } 519*7836SJohn.Forte@Sun.COM 520*7836SJohn.Forte@Sun.COM 521*7836SJohn.Forte@Sun.COM /* 522*7836SJohn.Forte@Sun.COM * Inter-node lock thread. 523*7836SJohn.Forte@Sun.COM * 524*7836SJohn.Forte@Sun.COM * This is the user level side of nsc_rmlock(). 525*7836SJohn.Forte@Sun.COM */ 526*7836SJohn.Forte@Sun.COM static void 527*7836SJohn.Forte@Sun.COM dolock(struct nskernd *req) 528*7836SJohn.Forte@Sun.COM { 529*7836SJohn.Forte@Sun.COM struct nskernd *nskp; 530*7836SJohn.Forte@Sun.COM thread_t tid; 531*7836SJohn.Forte@Sun.COM int rc; 532*7836SJohn.Forte@Sun.COM 533*7836SJohn.Forte@Sun.COM /* create a new thread to do the lock and return to kernel */ 534*7836SJohn.Forte@Sun.COM 535*7836SJohn.Forte@Sun.COM nskp = malloc(sizeof (*nskp)); 536*7836SJohn.Forte@Sun.COM if (!nskp) { 537*7836SJohn.Forte@Sun.COM #ifdef DEBUG 538*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd:dolock: malloc(%d) failed\n"), 539*7836SJohn.Forte@Sun.COM sizeof (*nskp)); 540*7836SJohn.Forte@Sun.COM #endif 541*7836SJohn.Forte@Sun.COM req->data1 = (uint64_t)ENOMEM; 542*7836SJohn.Forte@Sun.COM return; 543*7836SJohn.Forte@Sun.COM } 544*7836SJohn.Forte@Sun.COM 545*7836SJohn.Forte@Sun.COM /* copy args for child */ 546*7836SJohn.Forte@Sun.COM bcopy(req, nskp, sizeof (*nskp)); 547*7836SJohn.Forte@Sun.COM 548*7836SJohn.Forte@Sun.COM rc = thr_create(NULL, (THR_MIN_STACK + NSK_STACK_SIZE), 549*7836SJohn.Forte@Sun.COM _dolock, nskp, THR_BOUND|THR_DETACHED, &tid); 550*7836SJohn.Forte@Sun.COM 551*7836SJohn.Forte@Sun.COM if (rc != 0) { 552*7836SJohn.Forte@Sun.COM /* thr_create failed */ 553*7836SJohn.Forte@Sun.COM #ifdef DEBUG 554*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd: thr_create failed: %s\n"), 555*7836SJohn.Forte@Sun.COM strerror(errno)); 556*7836SJohn.Forte@Sun.COM #endif 557*7836SJohn.Forte@Sun.COM req->data1 = (uint64_t)errno; 558*7836SJohn.Forte@Sun.COM free(nskp); 559*7836SJohn.Forte@Sun.COM } else { 560*7836SJohn.Forte@Sun.COM /* success - _dolock() will free nskp */ 561*7836SJohn.Forte@Sun.COM req->data1 = (uint64_t)0; 562*7836SJohn.Forte@Sun.COM } 563*7836SJohn.Forte@Sun.COM } 564*7836SJohn.Forte@Sun.COM 565*7836SJohn.Forte@Sun.COM 566*7836SJohn.Forte@Sun.COM /* 567*7836SJohn.Forte@Sun.COM * Convenience code for engineering test of multi-terabyte volumes. 568*7836SJohn.Forte@Sun.COM * 569*7836SJohn.Forte@Sun.COM * zvol (part of zfs) does not support DKIOCPARTITION but does use EFI 570*7836SJohn.Forte@Sun.COM * labels. This code allocates a simple efi label structure and ioctls 571*7836SJohn.Forte@Sun.COM * to extract the size of a zvol. It only handles the minimal EFI ioctl 572*7836SJohn.Forte@Sun.COM * implementation in zvol. 573*7836SJohn.Forte@Sun.COM */ 574*7836SJohn.Forte@Sun.COM 575*7836SJohn.Forte@Sun.COM static void 576*7836SJohn.Forte@Sun.COM zvol_bsize(char *path, uint64_t *size, const int pnum) 577*7836SJohn.Forte@Sun.COM { 578*7836SJohn.Forte@Sun.COM struct stat64 stb1, stb2; 579*7836SJohn.Forte@Sun.COM struct dk_minfo dkm; 580*7836SJohn.Forte@Sun.COM int fd = -1; 581*7836SJohn.Forte@Sun.COM int rc; 582*7836SJohn.Forte@Sun.COM 583*7836SJohn.Forte@Sun.COM if (cl_nodeid || pnum != 0) 584*7836SJohn.Forte@Sun.COM return; 585*7836SJohn.Forte@Sun.COM 586*7836SJohn.Forte@Sun.COM if ((fd = open(path, O_RDONLY)) < 0) { 587*7836SJohn.Forte@Sun.COM return; 588*7836SJohn.Forte@Sun.COM } 589*7836SJohn.Forte@Sun.COM 590*7836SJohn.Forte@Sun.COM if (stat64("/devices/pseudo/zfs@0:zfs", &stb1) != 0 || 591*7836SJohn.Forte@Sun.COM fstat64(fd, &stb2) != 0 || 592*7836SJohn.Forte@Sun.COM !S_ISCHR(stb1.st_mode) || 593*7836SJohn.Forte@Sun.COM !S_ISCHR(stb2.st_mode) || 594*7836SJohn.Forte@Sun.COM major(stb1.st_rdev) != major(stb2.st_rdev)) { 595*7836SJohn.Forte@Sun.COM (void) close(fd); 596*7836SJohn.Forte@Sun.COM return; 597*7836SJohn.Forte@Sun.COM } 598*7836SJohn.Forte@Sun.COM 599*7836SJohn.Forte@Sun.COM rc = ioctl(fd, DKIOCGMEDIAINFO, (void *)&dkm); 600*7836SJohn.Forte@Sun.COM if (rc >= 0) { 601*7836SJohn.Forte@Sun.COM *size = LE_64(dkm.dki_capacity) * 602*7836SJohn.Forte@Sun.COM (dkm.dki_lbsize) / 512; 603*7836SJohn.Forte@Sun.COM } 604*7836SJohn.Forte@Sun.COM 605*7836SJohn.Forte@Sun.COM (void) close(fd); 606*7836SJohn.Forte@Sun.COM } 607*7836SJohn.Forte@Sun.COM 608*7836SJohn.Forte@Sun.COM /* ARGSUSED */ 609*7836SJohn.Forte@Sun.COM static void 610*7836SJohn.Forte@Sun.COM get_bsize(uint64_t raw_fd, uint64_t *size, int *partitionp, char *path) 611*7836SJohn.Forte@Sun.COM { 612*7836SJohn.Forte@Sun.COM struct nscioc_bsize bsize; 613*7836SJohn.Forte@Sun.COM #ifdef DKIOCPARTITION 614*7836SJohn.Forte@Sun.COM struct partition64 p64; 615*7836SJohn.Forte@Sun.COM #endif 616*7836SJohn.Forte@Sun.COM struct dk_cinfo dki_info; 617*7836SJohn.Forte@Sun.COM struct vtoc vtoc; 618*7836SJohn.Forte@Sun.COM int fd; 619*7836SJohn.Forte@Sun.COM 620*7836SJohn.Forte@Sun.COM *partitionp = -1; 621*7836SJohn.Forte@Sun.COM *size = (uint64_t)0; 622*7836SJohn.Forte@Sun.COM 623*7836SJohn.Forte@Sun.COM dki_info.dki_partition = (ushort_t)-1; 624*7836SJohn.Forte@Sun.COM bsize.dki_info = (uint64_t)(unsigned long)&dki_info; 625*7836SJohn.Forte@Sun.COM bsize.vtoc = (uint64_t)(unsigned long)&vtoc; 626*7836SJohn.Forte@Sun.COM bsize.raw_fd = raw_fd; 627*7836SJohn.Forte@Sun.COM bsize.efi = 0; 628*7836SJohn.Forte@Sun.COM 629*7836SJohn.Forte@Sun.COM fd = open(rdev, O_RDONLY); 630*7836SJohn.Forte@Sun.COM if (fd < 0) 631*7836SJohn.Forte@Sun.COM return; 632*7836SJohn.Forte@Sun.COM 633*7836SJohn.Forte@Sun.COM if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) { 634*7836SJohn.Forte@Sun.COM if (dki_info.dki_partition != (ushort_t)-1) { 635*7836SJohn.Forte@Sun.COM /* assume part# is ok and just the size failed */ 636*7836SJohn.Forte@Sun.COM *partitionp = (int)dki_info.dki_partition; 637*7836SJohn.Forte@Sun.COM 638*7836SJohn.Forte@Sun.COM #ifdef DKIOCPARTITION 639*7836SJohn.Forte@Sun.COM /* see if this is an EFI label */ 640*7836SJohn.Forte@Sun.COM bzero(&p64, sizeof (p64)); 641*7836SJohn.Forte@Sun.COM p64.p_partno = (uint_t)*partitionp; 642*7836SJohn.Forte@Sun.COM if ((ioctl(fd, DKIOCPARTITION, &p64)) > 0) { 643*7836SJohn.Forte@Sun.COM *size = (uint64_t)p64.p_size; 644*7836SJohn.Forte@Sun.COM } else { 645*7836SJohn.Forte@Sun.COM bsize.p64 = (uint64_t)(unsigned long)&p64; 646*7836SJohn.Forte@Sun.COM bsize.efi = 1; 647*7836SJohn.Forte@Sun.COM 648*7836SJohn.Forte@Sun.COM if (ioctl(fd, NSCIOC_BSIZE, &bsize) < 0) { 649*7836SJohn.Forte@Sun.COM /* see if this is a zvol */ 650*7836SJohn.Forte@Sun.COM zvol_bsize(path, size, *partitionp); 651*7836SJohn.Forte@Sun.COM } else { 652*7836SJohn.Forte@Sun.COM *size = (uint64_t)p64.p_size; 653*7836SJohn.Forte@Sun.COM } 654*7836SJohn.Forte@Sun.COM } 655*7836SJohn.Forte@Sun.COM #endif /* DKIOCPARTITION */ 656*7836SJohn.Forte@Sun.COM } 657*7836SJohn.Forte@Sun.COM 658*7836SJohn.Forte@Sun.COM close(fd); 659*7836SJohn.Forte@Sun.COM return; 660*7836SJohn.Forte@Sun.COM } 661*7836SJohn.Forte@Sun.COM 662*7836SJohn.Forte@Sun.COM close(fd); 663*7836SJohn.Forte@Sun.COM 664*7836SJohn.Forte@Sun.COM *partitionp = (int)dki_info.dki_partition; 665*7836SJohn.Forte@Sun.COM 666*7836SJohn.Forte@Sun.COM if (vtoc.v_sanity != VTOC_SANE) 667*7836SJohn.Forte@Sun.COM return; 668*7836SJohn.Forte@Sun.COM 669*7836SJohn.Forte@Sun.COM if (vtoc.v_version != V_VERSION && vtoc.v_version != 0) 670*7836SJohn.Forte@Sun.COM return; 671*7836SJohn.Forte@Sun.COM 672*7836SJohn.Forte@Sun.COM if (dki_info.dki_partition > V_NUMPAR) 673*7836SJohn.Forte@Sun.COM return; 674*7836SJohn.Forte@Sun.COM 675*7836SJohn.Forte@Sun.COM *size = (uint64_t)vtoc.v_part[(int)dki_info.dki_partition].p_size; 676*7836SJohn.Forte@Sun.COM } 677*7836SJohn.Forte@Sun.COM 678*7836SJohn.Forte@Sun.COM 679*7836SJohn.Forte@Sun.COM static int 680*7836SJohn.Forte@Sun.COM iscluster(void) 681*7836SJohn.Forte@Sun.COM { 682*7836SJohn.Forte@Sun.COM /* 683*7836SJohn.Forte@Sun.COM * Find out if we are running in a cluster 684*7836SJohn.Forte@Sun.COM */ 685*7836SJohn.Forte@Sun.COM cl_nodeid = cfg_iscluster(); 686*7836SJohn.Forte@Sun.COM if (cl_nodeid > 0) { 687*7836SJohn.Forte@Sun.COM return (TRUE); 688*7836SJohn.Forte@Sun.COM } else if (cl_nodeid == 0) { 689*7836SJohn.Forte@Sun.COM return (FALSE); 690*7836SJohn.Forte@Sun.COM } 691*7836SJohn.Forte@Sun.COM 692*7836SJohn.Forte@Sun.COM fprintf(stderr, "%s\n", 693*7836SJohn.Forte@Sun.COM gettext("nskernd: unable to ascertain environment")); 694*7836SJohn.Forte@Sun.COM exit(1); 695*7836SJohn.Forte@Sun.COM /* NOTREACHED */ 696*7836SJohn.Forte@Sun.COM } 697*7836SJohn.Forte@Sun.COM 698*7836SJohn.Forte@Sun.COM /* 699*7836SJohn.Forte@Sun.COM * Runtime Solaris release checking - build release == runtime release 700*7836SJohn.Forte@Sun.COM * is always considered success, so only keep entries in the map for 701*7836SJohn.Forte@Sun.COM * the special cases. 702*7836SJohn.Forte@Sun.COM */ 703*7836SJohn.Forte@Sun.COM static nsc_release_t nskernd_rel_map[] = { 704*7836SJohn.Forte@Sun.COM /* { "5.10", "5.10" }, */ 705*7836SJohn.Forte@Sun.COM { "5.11", "5.10" }, 706*7836SJohn.Forte@Sun.COM { NULL, NULL } 707*7836SJohn.Forte@Sun.COM }; 708*7836SJohn.Forte@Sun.COM 709*7836SJohn.Forte@Sun.COM 710*7836SJohn.Forte@Sun.COM #ifdef lint 711*7836SJohn.Forte@Sun.COM #define main nskernd_main 712*7836SJohn.Forte@Sun.COM #endif 713*7836SJohn.Forte@Sun.COM /* ARGSUSED1 */ 714*7836SJohn.Forte@Sun.COM int 715*7836SJohn.Forte@Sun.COM main(int argc, char *argv[]) 716*7836SJohn.Forte@Sun.COM { 717*7836SJohn.Forte@Sun.COM const char *dir = "/"; 718*7836SJohn.Forte@Sun.COM struct nskernd data; 719*7836SJohn.Forte@Sun.COM struct rlimit rl; 720*7836SJohn.Forte@Sun.COM int i, run, rc; 721*7836SJohn.Forte@Sun.COM int partition; 722*7836SJohn.Forte@Sun.COM char *reqd; 723*7836SJohn.Forte@Sun.COM int syncpipe[2]; 724*7836SJohn.Forte@Sun.COM int startup; 725*7836SJohn.Forte@Sun.COM 726*7836SJohn.Forte@Sun.COM (void) setlocale(LC_ALL, ""); 727*7836SJohn.Forte@Sun.COM (void) textdomain("nskernd"); 728*7836SJohn.Forte@Sun.COM 729*7836SJohn.Forte@Sun.COM rc = nsc_check_release(BUILD_REV_STR, nskernd_rel_map, &reqd); 730*7836SJohn.Forte@Sun.COM if (rc < 0) { 731*7836SJohn.Forte@Sun.COM fprintf(stderr, 732*7836SJohn.Forte@Sun.COM gettext("nskernd: unable to determine the current " 733*7836SJohn.Forte@Sun.COM "Solaris release: %s\n"), strerror(errno)); 734*7836SJohn.Forte@Sun.COM exit(1); 735*7836SJohn.Forte@Sun.COM } else if (rc == FALSE) { 736*7836SJohn.Forte@Sun.COM fprintf(stderr, 737*7836SJohn.Forte@Sun.COM gettext("nskernd: incorrect Solaris release " 738*7836SJohn.Forte@Sun.COM "(requires %s)\n"), reqd); 739*7836SJohn.Forte@Sun.COM exit(1); 740*7836SJohn.Forte@Sun.COM } 741*7836SJohn.Forte@Sun.COM 742*7836SJohn.Forte@Sun.COM rc = 0; 743*7836SJohn.Forte@Sun.COM 744*7836SJohn.Forte@Sun.COM if (argc != 1) 745*7836SJohn.Forte@Sun.COM usage(); 746*7836SJohn.Forte@Sun.COM 747*7836SJohn.Forte@Sun.COM /* 748*7836SJohn.Forte@Sun.COM * Usage: <progname> [-g] [-d <seconds to delay>] 749*7836SJohn.Forte@Sun.COM */ 750*7836SJohn.Forte@Sun.COM while ((i = getopt(argc, argv, "gd:")) != EOF) { 751*7836SJohn.Forte@Sun.COM switch (i) { 752*7836SJohn.Forte@Sun.COM case 'g': 753*7836SJohn.Forte@Sun.COM display_msg = 1; 754*7836SJohn.Forte@Sun.COM break; 755*7836SJohn.Forte@Sun.COM case 'd': 756*7836SJohn.Forte@Sun.COM delay_time = atoi(optarg); 757*7836SJohn.Forte@Sun.COM if (delay_time <= 0) { 758*7836SJohn.Forte@Sun.COM delay_time = 30; 759*7836SJohn.Forte@Sun.COM } 760*7836SJohn.Forte@Sun.COM break; 761*7836SJohn.Forte@Sun.COM default: 762*7836SJohn.Forte@Sun.COM syslog(LOG_ERR, 763*7836SJohn.Forte@Sun.COM "Usage: nskernd [-g] [-d <seconds to delay>]"); 764*7836SJohn.Forte@Sun.COM exit(1); 765*7836SJohn.Forte@Sun.COM break; 766*7836SJohn.Forte@Sun.COM } 767*7836SJohn.Forte@Sun.COM } 768*7836SJohn.Forte@Sun.COM 769*7836SJohn.Forte@Sun.COM if (chroot(dir) < 0) { 770*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd: chroot failed: %s\n"), 771*7836SJohn.Forte@Sun.COM strerror(errno)); 772*7836SJohn.Forte@Sun.COM exit(1); 773*7836SJohn.Forte@Sun.COM } 774*7836SJohn.Forte@Sun.COM 775*7836SJohn.Forte@Sun.COM if (chdir(dir) < 0) { 776*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd: chdir failed: %s\n"), 777*7836SJohn.Forte@Sun.COM strerror(errno)); 778*7836SJohn.Forte@Sun.COM exit(1); 779*7836SJohn.Forte@Sun.COM } 780*7836SJohn.Forte@Sun.COM 781*7836SJohn.Forte@Sun.COM /* 782*7836SJohn.Forte@Sun.COM * Determine if we are in a Sun Cluster or not, before fork'ing 783*7836SJohn.Forte@Sun.COM */ 784*7836SJohn.Forte@Sun.COM (void) iscluster(); 785*7836SJohn.Forte@Sun.COM 786*7836SJohn.Forte@Sun.COM /* 787*7836SJohn.Forte@Sun.COM * create a pipe to synchronise the parent with the 788*7836SJohn.Forte@Sun.COM * child just before it enters its service loop. 789*7836SJohn.Forte@Sun.COM */ 790*7836SJohn.Forte@Sun.COM if (pipe(syncpipe) < 0) { 791*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd: cannot create pipe: %s\n"), 792*7836SJohn.Forte@Sun.COM strerror(errno)); 793*7836SJohn.Forte@Sun.COM exit(1); 794*7836SJohn.Forte@Sun.COM } 795*7836SJohn.Forte@Sun.COM /* 796*7836SJohn.Forte@Sun.COM * Fork off a child that becomes the daemon. 797*7836SJohn.Forte@Sun.COM */ 798*7836SJohn.Forte@Sun.COM 799*7836SJohn.Forte@Sun.COM if ((rc = fork()) > 0) { 800*7836SJohn.Forte@Sun.COM char c; 801*7836SJohn.Forte@Sun.COM int n; 802*7836SJohn.Forte@Sun.COM (void) close(syncpipe[1]); 803*7836SJohn.Forte@Sun.COM /* 804*7836SJohn.Forte@Sun.COM * wait for the close of the pipe. 805*7836SJohn.Forte@Sun.COM * If we get a char back, indicates good 806*7836SJohn.Forte@Sun.COM * status from child, so exit 0. 807*7836SJohn.Forte@Sun.COM * If we get a zero length read, then the 808*7836SJohn.Forte@Sun.COM * child has failed, so we do too. 809*7836SJohn.Forte@Sun.COM */ 810*7836SJohn.Forte@Sun.COM n = read(syncpipe[0], &c, 1); 811*7836SJohn.Forte@Sun.COM exit((n <= 0) ? 1 : 0); 812*7836SJohn.Forte@Sun.COM } else if (rc < 0) { 813*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd: cannot fork: %s\n"), 814*7836SJohn.Forte@Sun.COM strerror(errno)); 815*7836SJohn.Forte@Sun.COM exit(1); 816*7836SJohn.Forte@Sun.COM } 817*7836SJohn.Forte@Sun.COM 818*7836SJohn.Forte@Sun.COM /* 819*7836SJohn.Forte@Sun.COM * In child - become daemon. 820*7836SJohn.Forte@Sun.COM */ 821*7836SJohn.Forte@Sun.COM 822*7836SJohn.Forte@Sun.COM /* use closefrom(3C) from PSARC/2000/193 when possible */ 823*7836SJohn.Forte@Sun.COM for (i = 0; i < syncpipe[1]; i++) { 824*7836SJohn.Forte@Sun.COM (void) close(i); 825*7836SJohn.Forte@Sun.COM } 826*7836SJohn.Forte@Sun.COM closefrom(syncpipe[1] + 1); 827*7836SJohn.Forte@Sun.COM 828*7836SJohn.Forte@Sun.COM (void) open("/dev/console", O_WRONLY|O_APPEND); 829*7836SJohn.Forte@Sun.COM (void) dup(0); 830*7836SJohn.Forte@Sun.COM (void) dup(0); 831*7836SJohn.Forte@Sun.COM (void) close(0); 832*7836SJohn.Forte@Sun.COM 833*7836SJohn.Forte@Sun.COM setpgrp(); 834*7836SJohn.Forte@Sun.COM 835*7836SJohn.Forte@Sun.COM /* 836*7836SJohn.Forte@Sun.COM * Ignore all signals apart from SIGTERM. 837*7836SJohn.Forte@Sun.COM */ 838*7836SJohn.Forte@Sun.COM 839*7836SJohn.Forte@Sun.COM for (i = 1; i < _sys_nsig; i++) 840*7836SJohn.Forte@Sun.COM (void) sigset(i, SIG_IGN); 841*7836SJohn.Forte@Sun.COM 842*7836SJohn.Forte@Sun.COM (void) sigset(SIGTERM, sighand); 843*7836SJohn.Forte@Sun.COM 844*7836SJohn.Forte@Sun.COM /* 845*7836SJohn.Forte@Sun.COM * Increase the number of fd's that can be open. 846*7836SJohn.Forte@Sun.COM */ 847*7836SJohn.Forte@Sun.COM 848*7836SJohn.Forte@Sun.COM rl.rlim_cur = RLIM_INFINITY; 849*7836SJohn.Forte@Sun.COM rl.rlim_max = RLIM_INFINITY; 850*7836SJohn.Forte@Sun.COM if (setrlimit(RLIMIT_NOFILE, &rl) < 0) { 851*7836SJohn.Forte@Sun.COM fprintf(stderr, 852*7836SJohn.Forte@Sun.COM gettext("nskernd: could not increase RLIMIT_NOFILE: %s\n"), 853*7836SJohn.Forte@Sun.COM strerror(errno)); 854*7836SJohn.Forte@Sun.COM fprintf(stderr, 855*7836SJohn.Forte@Sun.COM gettext("nskernd: the maximum number of nsctl open " 856*7836SJohn.Forte@Sun.COM "devices may be reduced\n")); 857*7836SJohn.Forte@Sun.COM } 858*7836SJohn.Forte@Sun.COM 859*7836SJohn.Forte@Sun.COM /* 860*7836SJohn.Forte@Sun.COM * Open /dev/nsctl and startup. 861*7836SJohn.Forte@Sun.COM */ 862*7836SJohn.Forte@Sun.COM 863*7836SJohn.Forte@Sun.COM nsctl_fd = open(rdev, O_RDONLY); 864*7836SJohn.Forte@Sun.COM if (nsctl_fd < 0) { 865*7836SJohn.Forte@Sun.COM fprintf(stderr, gettext("nskernd: unable to open %s\n"), rdev); 866*7836SJohn.Forte@Sun.COM exit(1); 867*7836SJohn.Forte@Sun.COM } 868*7836SJohn.Forte@Sun.COM 869*7836SJohn.Forte@Sun.COM bzero(&data, sizeof (data)); 870*7836SJohn.Forte@Sun.COM 871*7836SJohn.Forte@Sun.COM data.command = NSKERND_START; 872*7836SJohn.Forte@Sun.COM data.data1 = (uint64_t)cl_nodeid; 873*7836SJohn.Forte@Sun.COM run = 1; 874*7836SJohn.Forte@Sun.COM 875*7836SJohn.Forte@Sun.COM startup = 1; 876*7836SJohn.Forte@Sun.COM while (run) { 877*7836SJohn.Forte@Sun.COM rc = ioctl(nsctl_fd, NSCIOC_NSKERND, &data); 878*7836SJohn.Forte@Sun.COM if (rc < 0) { 879*7836SJohn.Forte@Sun.COM /* try and do kernel cleanup and exit */ 880*7836SJohn.Forte@Sun.COM if (shutdown()) { 881*7836SJohn.Forte@Sun.COM run = 0; 882*7836SJohn.Forte@Sun.COM } else { 883*7836SJohn.Forte@Sun.COM sigterm = 0; 884*7836SJohn.Forte@Sun.COM } 885*7836SJohn.Forte@Sun.COM 886*7836SJohn.Forte@Sun.COM fprintf(stderr, 887*7836SJohn.Forte@Sun.COM gettext("nskernd: NSCIOC_NSKERND failed: %s\n"), 888*7836SJohn.Forte@Sun.COM strerror(errno)); 889*7836SJohn.Forte@Sun.COM continue; 890*7836SJohn.Forte@Sun.COM } else if (sigterm) { 891*7836SJohn.Forte@Sun.COM /* SIGTERM received - terminate */ 892*7836SJohn.Forte@Sun.COM if (data.command != NSKERND_START && 893*7836SJohn.Forte@Sun.COM (data.command != NSKERND_STOP || 894*7836SJohn.Forte@Sun.COM data.data1 != (uint64_t)1)) { 895*7836SJohn.Forte@Sun.COM /* need to do kernel cleanup */ 896*7836SJohn.Forte@Sun.COM if (shutdown()) { 897*7836SJohn.Forte@Sun.COM run = 0; 898*7836SJohn.Forte@Sun.COM } else { 899*7836SJohn.Forte@Sun.COM sigterm = 0; 900*7836SJohn.Forte@Sun.COM data.command = NSKERND_START; 901*7836SJohn.Forte@Sun.COM data.data1 = (uint64_t)cl_nodeid; 902*7836SJohn.Forte@Sun.COM } 903*7836SJohn.Forte@Sun.COM } else { 904*7836SJohn.Forte@Sun.COM /* just quit */ 905*7836SJohn.Forte@Sun.COM if (canshutdown()) { 906*7836SJohn.Forte@Sun.COM run = 0; 907*7836SJohn.Forte@Sun.COM } else { 908*7836SJohn.Forte@Sun.COM /* cannot shutdown - threads active */ 909*7836SJohn.Forte@Sun.COM sigterm = 0; 910*7836SJohn.Forte@Sun.COM data.command = NSKERND_START; 911*7836SJohn.Forte@Sun.COM data.data1 = (uint64_t)cl_nodeid; 912*7836SJohn.Forte@Sun.COM } 913*7836SJohn.Forte@Sun.COM } 914*7836SJohn.Forte@Sun.COM continue; 915*7836SJohn.Forte@Sun.COM } 916*7836SJohn.Forte@Sun.COM if (startup) { 917*7836SJohn.Forte@Sun.COM char c = 0; 918*7836SJohn.Forte@Sun.COM (void) write(syncpipe[1], &c, 1); 919*7836SJohn.Forte@Sun.COM (void) close(syncpipe[1]); 920*7836SJohn.Forte@Sun.COM startup = 0; 921*7836SJohn.Forte@Sun.COM } 922*7836SJohn.Forte@Sun.COM switch (data.command) { 923*7836SJohn.Forte@Sun.COM case NSKERND_START: /* (re)start completion */ 924*7836SJohn.Forte@Sun.COM if (rc == 1) { 925*7836SJohn.Forte@Sun.COM fprintf(stderr, 926*7836SJohn.Forte@Sun.COM gettext("nskernd: already started\n")); 927*7836SJohn.Forte@Sun.COM run = 0; 928*7836SJohn.Forte@Sun.COM } else if (rc == 2) { 929*7836SJohn.Forte@Sun.COM fprintf(stderr, 930*7836SJohn.Forte@Sun.COM gettext("nskernd: stopped by kernel\n")); 931*7836SJohn.Forte@Sun.COM run = 0; 932*7836SJohn.Forte@Sun.COM } 933*7836SJohn.Forte@Sun.COM data.command = NSKERND_WAIT; 934*7836SJohn.Forte@Sun.COM break; 935*7836SJohn.Forte@Sun.COM 936*7836SJohn.Forte@Sun.COM case NSKERND_STOP: /* kernel telling daemon to stop */ 937*7836SJohn.Forte@Sun.COM if (data.data1 != (uint64_t)1) { 938*7836SJohn.Forte@Sun.COM (void) shutdown(); 939*7836SJohn.Forte@Sun.COM run = 0; 940*7836SJohn.Forte@Sun.COM } 941*7836SJohn.Forte@Sun.COM break; 942*7836SJohn.Forte@Sun.COM 943*7836SJohn.Forte@Sun.COM case NSKERND_BSIZE: 944*7836SJohn.Forte@Sun.COM /* 945*7836SJohn.Forte@Sun.COM * kernel requesting partsize 946*7836SJohn.Forte@Sun.COM * data1 - size return 947*7836SJohn.Forte@Sun.COM * data2 - raw_fd (entry) 948*7836SJohn.Forte@Sun.COM * - partition number (return) 949*7836SJohn.Forte@Sun.COM */ 950*7836SJohn.Forte@Sun.COM partition = -1; 951*7836SJohn.Forte@Sun.COM get_bsize(data.data2, &data.data1, 952*7836SJohn.Forte@Sun.COM &partition, data.char1); 953*7836SJohn.Forte@Sun.COM data.data2 = (uint64_t)partition; 954*7836SJohn.Forte@Sun.COM data.command = NSKERND_WAIT; 955*7836SJohn.Forte@Sun.COM break; 956*7836SJohn.Forte@Sun.COM 957*7836SJohn.Forte@Sun.COM case NSKERND_NEWLWP: /* kernel requesting a new LWP */ 958*7836SJohn.Forte@Sun.COM newlwp(&data); 959*7836SJohn.Forte@Sun.COM data.command = NSKERND_WAIT; 960*7836SJohn.Forte@Sun.COM break; 961*7836SJohn.Forte@Sun.COM 962*7836SJohn.Forte@Sun.COM case NSKERND_LOCK: /* kernel requesting lock */ 963*7836SJohn.Forte@Sun.COM dolock(&data); 964*7836SJohn.Forte@Sun.COM data.command = NSKERND_WAIT; 965*7836SJohn.Forte@Sun.COM break; 966*7836SJohn.Forte@Sun.COM 967*7836SJohn.Forte@Sun.COM case NSKERND_WAIT: /* kernel retrying wait */ 968*7836SJohn.Forte@Sun.COM /* 969*7836SJohn.Forte@Sun.COM * the kernel thread can be woken by the dr config 970*7836SJohn.Forte@Sun.COM * utilities (ie cfgadm) therefore we just reissue 971*7836SJohn.Forte@Sun.COM * the wait. 972*7836SJohn.Forte@Sun.COM */ 973*7836SJohn.Forte@Sun.COM break; 974*7836SJohn.Forte@Sun.COM 975*7836SJohn.Forte@Sun.COM case NSKERND_IIBITMAP: 976*7836SJohn.Forte@Sun.COM rc = log_iibmp_err(data.char1, (int)data.data1); 977*7836SJohn.Forte@Sun.COM data.data1 = (uint64_t)rc; 978*7836SJohn.Forte@Sun.COM data.command = NSKERND_WAIT; 979*7836SJohn.Forte@Sun.COM break; 980*7836SJohn.Forte@Sun.COM 981*7836SJohn.Forte@Sun.COM default: 982*7836SJohn.Forte@Sun.COM fprintf(stderr, 983*7836SJohn.Forte@Sun.COM gettext("nskernd: unknown command %d"), 984*7836SJohn.Forte@Sun.COM data.command); 985*7836SJohn.Forte@Sun.COM data.command = NSKERND_WAIT; 986*7836SJohn.Forte@Sun.COM break; 987*7836SJohn.Forte@Sun.COM } 988*7836SJohn.Forte@Sun.COM } 989*7836SJohn.Forte@Sun.COM 990*7836SJohn.Forte@Sun.COM (void) close(nsctl_fd); 991*7836SJohn.Forte@Sun.COM 992*7836SJohn.Forte@Sun.COM return (rc); 993*7836SJohn.Forte@Sun.COM } 994