1 /* $NetBSD: statd.c,v 1.34 2019/12/02 19:23:53 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1995 5 * A.R. Gordon (andrew.gordon@net-tel.co.uk). All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed for the FreeBSD project 18 * 4. Neither the name of the author nor the names of any co-contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY ANDREW GORDON AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 __RCSID("$NetBSD: statd.c,v 1.34 2019/12/02 19:23:53 christos Exp $"); 39 #endif 40 41 /* main() function for status monitor daemon. Some of the code in this */ 42 /* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x */ 43 /* The actual program logic is in the file procs.c */ 44 45 #include <sys/param.h> 46 #include <sys/wait.h> 47 48 #include <err.h> 49 #include <ctype.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <signal.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <syslog.h> 57 #include <unistd.h> 58 #include <util.h> 59 #include <db.h> 60 #include <netconfig.h> 61 62 #include <rpc/rpc.h> 63 64 #include "statd.h" 65 66 struct sigaction sa; 67 int debug = 0; /* Controls syslog() for debug msgs */ 68 extern int _rpcsvcdirty; 69 static DB *db; /* Database file */ 70 71 Header status_info; 72 73 static char undefdata[] = "\0\1\2\3\4\5\6\7"; 74 static DBT undefkey = { 75 undefdata, 76 sizeof(undefdata) 77 }; 78 79 80 /* statd.c */ 81 static int walk_one(int (*fun )(DBT *, HostInfo *, void *), DBT *, DBT *, void *); 82 static int walk_db(int (*fun )(DBT *, HostInfo *, void *), void *); 83 static int reset_host(DBT *, HostInfo *, void *); 84 static int check_work(DBT *, HostInfo *, void *); 85 static int unmon_host(DBT *, HostInfo *, void *); 86 static int notify_one(DBT *, HostInfo *, void *); 87 static void init_file(const char *); 88 static int notify_one_host(const char *); 89 static void die(int) __dead; 90 91 int 92 main(int argc, char *argv[]) 93 { 94 int ch; 95 struct sigaction nsa; 96 int maxrec = RPC_MAXDATASIZE; 97 98 while ((ch = getopt(argc, argv, "d")) != (-1)) { 99 switch (ch) { 100 case 'd': 101 debug = 1; 102 break; 103 default: 104 case '?': 105 (void)fprintf(stderr, "usage: %s [-d]\n", 106 getprogname()); 107 exit(1); 108 /* NOTREACHED */ 109 } 110 } 111 (void)rpcb_unset(SM_PROG, SM_VERS, NULL); 112 113 rpc_control(RPC_SVC_CONNMAXREC_SET, &maxrec); 114 115 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "udp")) { 116 errx(EXIT_FAILURE, "cannot create udp service."); 117 /* NOTREACHED */ 118 } 119 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "tcp")) { 120 errx(EXIT_FAILURE, "cannot create udp service."); 121 /* NOTREACHED */ 122 } 123 124 init_file("/var/db/statd.status"); 125 126 /* 127 * Note that it is NOT sensible to run this program from inetd - the 128 * protocol assumes that it will run immediately at boot time. 129 */ 130 if (!debug) 131 daemon(0, 0); 132 133 sigemptyset(&nsa.sa_mask); 134 nsa.sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT; 135 nsa.sa_handler = SIG_IGN; 136 (void)sigaction(SIGCHLD, &nsa, NULL); 137 138 pidfile(NULL); 139 openlog("rpc.statd", 0, LOG_DAEMON); 140 if (debug) 141 syslog(LOG_INFO, "Starting - debug enabled"); 142 else 143 syslog(LOG_INFO, "Starting"); 144 145 sa.sa_handler = die; 146 sa.sa_flags = 0; 147 sigemptyset(&sa.sa_mask); 148 (void)sigaction(SIGTERM, &sa, NULL); 149 (void)sigaction(SIGQUIT, &sa, NULL); 150 (void)sigaction(SIGHUP, &sa, NULL); 151 (void)sigaction(SIGINT, &sa, NULL); 152 153 sa.sa_handler = SIG_IGN; 154 sa.sa_flags = SA_RESTART; 155 sigemptyset(&sa.sa_mask); 156 sigaddset(&sa.sa_mask, SIGALRM); 157 158 /* Initialisation now complete - start operating */ 159 160 /* Notify hosts that need it */ 161 notify_handler(0); 162 163 while (1) 164 svc_run(); /* Should never return */ 165 die(0); 166 } 167 168 /* notify_handler ---------------------------------------------------------- */ 169 /* 170 * Purpose: Catch SIGALRM and collect process status 171 * Returns: Nothing. 172 * Notes: No special action required, other than to collect the 173 * process status and hence allow the child to die: 174 * we only use child processes for asynchronous transmission 175 * of SM_NOTIFY to other systems, so it is normal for the 176 * children to exit when they have done their work. 177 */ 178 void 179 notify_handler(int sig) 180 { 181 time_t now; 182 183 NO_ALARM; 184 sa.sa_handler = SIG_IGN; 185 (void)sigaction(SIGALRM, &sa, NULL); 186 187 now = time(NULL); 188 189 (void) walk_db(notify_one, &now); 190 191 if (walk_db(check_work, &now) == 0) { 192 /* 193 * No more work to be done. 194 */ 195 CLR_ALARM; 196 return; 197 } 198 sync_file(); 199 ALARM; 200 alarm(5); 201 } 202 203 /* sync_file --------------------------------------------------------------- */ 204 /* 205 * Purpose: Packaged call of msync() to flush changes to mmap()ed file 206 * Returns: Nothing. Errors to syslog. 207 */ 208 void 209 sync_file() 210 { 211 DBT data; 212 213 data.data = &status_info; 214 data.size = sizeof(status_info); 215 switch ((*db->put)(db, &undefkey, &data, 0)) { 216 case 0: 217 return; 218 case -1: 219 goto bad; 220 default: 221 abort(); 222 } 223 if ((*db->sync)(db, 0) == -1) { 224 bad: 225 syslog(LOG_ERR, "database corrupted %m"); 226 die(1); 227 } 228 } 229 230 /* change_host -------------------------------------------------------------- */ 231 /* 232 * Purpose: Update/Create an entry for host 233 * Returns: Nothing 234 * Notes: 235 * 236 */ 237 void 238 change_host(char *hostnamep, HostInfo *hp) 239 { 240 DBT key, data; 241 char *ptr; 242 char hostname[MAXHOSTNAMELEN + 1]; 243 HostInfo h; 244 245 strncpy(hostname, hostnamep, MAXHOSTNAMELEN + 1); 246 h = *hp; 247 248 for (ptr = hostname; *ptr; ptr++) 249 if (isupper((unsigned char) *ptr)) 250 *ptr = tolower((unsigned char) *ptr); 251 252 key.data = hostname; 253 key.size = ptr - hostname + 1; 254 data.data = &h; 255 data.size = sizeof(h); 256 257 switch ((*db->put)(db, &key, &data, 0)) { 258 case -1: 259 syslog(LOG_ERR, "database corrupted %m"); 260 die(1); 261 case 0: 262 return; 263 default: 264 abort(); 265 } 266 } 267 268 269 /* find_host -------------------------------------------------------------- */ 270 /* 271 * Purpose: Find the entry in the status file for a given host 272 * Returns: Copy of entry in hd, or NULL 273 * Notes: 274 * 275 */ 276 HostInfo * 277 find_host(char *hostname, HostInfo *hp) 278 { 279 DBT key, data; 280 char *ptr; 281 282 for (ptr = hostname; *ptr; ptr++) 283 if (isupper((unsigned char) *ptr)) 284 *ptr = tolower((unsigned char) *ptr); 285 286 key.data = hostname; 287 key.size = ptr - hostname + 1; 288 switch ((*db->get)(db, &key, &data, 0)) { 289 case 0: 290 if (data.size != sizeof(*hp)) 291 goto bad; 292 return memcpy(hp, data.data, sizeof(*hp)); 293 case 1: 294 return NULL; 295 case -1: 296 goto bad; 297 default: 298 abort(); 299 } 300 301 bad: 302 syslog(LOG_ERR, "Database corrupted %m"); 303 return NULL; 304 } 305 306 /* walk_one ------------------------------------------------------------- */ 307 /* 308 * Purpose: Call the given function if the element is valid 309 * Returns: Nothing - exits on error 310 * Notes: 311 */ 312 static int 313 walk_one(int (*fun)(DBT *, HostInfo *, void *), DBT *key, DBT *data, void *ptr) 314 { 315 HostInfo h; 316 if (key->size == undefkey.size && 317 memcmp(key->data, undefkey.data, key->size) == 0) 318 return 0; 319 if (data->size != sizeof(HostInfo)) { 320 syslog(LOG_ERR, "Bad data in database"); 321 die(1); 322 } 323 memcpy(&h, data->data, sizeof(h)); 324 return (*fun)(key, &h, ptr); 325 } 326 327 /* walk_db -------------------------------------------------------------- */ 328 /* 329 * Purpose: Iterate over all elements calling the given function 330 * Returns: -1 if function failed, 0 on success 331 * Notes: 332 */ 333 static int 334 walk_db(int (*fun)(DBT *, HostInfo *, void *), void *ptr) 335 { 336 DBT key, data; 337 338 switch ((*db->seq)(db, &key, &data, R_FIRST)) { 339 case -1: 340 goto bad; 341 case 1: 342 /* We should have at least the magic entry at this point */ 343 abort(); 344 case 0: 345 if (walk_one(fun, &key, &data, ptr) == -1) 346 return -1; 347 break; 348 default: 349 abort(); 350 } 351 352 353 for (;;) 354 switch ((*db->seq)(db, &key, &data, R_NEXT)) { 355 case -1: 356 goto bad; 357 case 0: 358 if (walk_one(fun, &key, &data, ptr) == -1) 359 return -1; 360 break; 361 case 1: 362 return 0; 363 default: 364 abort(); 365 } 366 bad: 367 syslog(LOG_ERR, "Corrupted database %m"); 368 die(1); 369 } 370 371 /* reset_host ------------------------------------------------------------ */ 372 /* 373 * Purpose: Clean up existing hosts in file. 374 * Returns: Always success 0. 375 * Notes: Clean-up of existing file - monitored hosts will have a 376 * pointer to a list of clients, which refers to memory in 377 * the previous incarnation of the program and so are 378 * meaningless now. These pointers are zeroed and the fact 379 * that the host was previously monitored is recorded by 380 * setting the notifyReqd flag, which will in due course 381 * cause a SM_NOTIFY to be sent. 382 * 383 * Note that if we crash twice in quick succession, some hosts 384 * may already have notifyReqd set, where we didn't manage to 385 * notify them before the second crash occurred. 386 */ 387 static int 388 reset_host(DBT *key, HostInfo *hi, void *ptr) 389 { 390 391 if (hi->monList) { 392 hi->notifyReqd = *(time_t *) ptr; 393 hi->attempts = 0; 394 hi->monList = NULL; 395 change_host((char *)key->data, hi); 396 } 397 return 0; 398 } 399 400 /* check_work ------------------------------------------------------------ */ 401 /* 402 * Purpose: Check if there is work to be done. 403 * Returns: 0 if there is no work to be done -1 if there is. 404 * Notes: 405 */ 406 static int 407 check_work(DBT *key, HostInfo *hi, void *ptr) 408 { 409 return hi->notifyReqd ? -1 : 0; 410 } 411 412 /* unmon_host ------------------------------------------------------------ */ 413 /* 414 * Purpose: Unmonitor a host 415 * Returns: 0 416 * Notes: 417 */ 418 static int 419 unmon_host(DBT *key, HostInfo *hi, void *ptr) 420 { 421 char *name = key->data; 422 423 if (do_unmon(name, hi, ptr)) 424 change_host(name, hi); 425 return 0; 426 } 427 428 /* notify_one ------------------------------------------------------------ */ 429 /* 430 * Purpose: Notify one host. 431 * Returns: 0 if success -1 on failure 432 * Notes: 433 */ 434 static int 435 notify_one(DBT *key, HostInfo *hi, void *ptr) 436 { 437 time_t now = *(time_t *) ptr; 438 char *name = key->data; 439 int error; 440 441 if (hi->notifyReqd == 0 || hi->notifyReqd > now) 442 return 0; 443 444 /* 445 * If one of the initial attempts fails, we wait 446 * for a while and have another go. This is necessary 447 * because when we have crashed, (eg. a power outage) 448 * it is quite possible that we won't be able to 449 * contact all monitored hosts immediately on restart, 450 * either because they crashed too and take longer 451 * to come up (in which case the notification isn't 452 * really required), or more importantly if some 453 * router etc. needed to reach the monitored host 454 * has not come back up yet. In this case, we will 455 * be a bit late in re-establishing locks (after the 456 * grace period) but that is the best we can do. We 457 * try 10 times at 5 sec intervals, 10 more times at 458 * 1 minute intervals, then 24 more times at hourly 459 * intervals, finally giving up altogether if the 460 * host hasn't come back to life after 24 hours. 461 */ 462 if (notify_one_host(name) || hi->attempts++ >= 44) { 463 error = 0; 464 hi->notifyReqd = 0; 465 hi->attempts = 0; 466 } else { 467 error = -1; 468 if (hi->attempts < 10) 469 hi->notifyReqd += 5; 470 else if (hi->attempts < 20) 471 hi->notifyReqd += 60; 472 else 473 hi->notifyReqd += 60 * 60; 474 } 475 change_host(name, hi); 476 return error; 477 } 478 479 /* init_file -------------------------------------------------------------- */ 480 /* 481 * Purpose: Open file, create if necessary, initialise it. 482 * Returns: Nothing - exits on error 483 * Notes: Called before process becomes daemon, hence logs to 484 * stderr rather than syslog. 485 * Opens the file, then mmap()s it for ease of access. 486 * Also performs initial clean-up of the file, zeroing 487 * monitor list pointers, setting the notifyReqd flag in 488 * all hosts that had a monitor list, and incrementing 489 * the state number to the next even value. 490 */ 491 static void 492 init_file(const char *filename) 493 { 494 DBT data; 495 496 db = dbopen(filename, O_RDWR|O_CREAT|O_NDELAY|O_EXLOCK, 0644, DB_HASH, 497 NULL); 498 if (db == NULL) 499 err(EXIT_FAILURE, "Cannot open `%s'", filename); 500 501 switch ((*db->get)(db, &undefkey, &data, 0)) { 502 case 1: 503 /* New database */ 504 (void)memset(&status_info, 0, sizeof(status_info)); 505 sync_file(); 506 return; 507 508 case -1: 509 err(EXIT_FAILURE, "error accessing database (%s)", strerror(errno)); 510 case 0: 511 /* Existing database */ 512 if (data.size != sizeof(status_info)) 513 errx(EXIT_FAILURE, "database corrupted %lu != %lu", 514 (u_long)data.size, (u_long)sizeof(status_info)); 515 memcpy(&status_info, data.data, data.size); 516 break; 517 default: 518 abort(); 519 } 520 521 reset_database(); 522 return; 523 } 524 525 /* reset_database --------------------------------------------------------- */ 526 /* 527 * Purpose: Clears the statd database 528 * Returns: Nothing 529 * Notes: If this is not called on reset, it will leak memory. 530 */ 531 void 532 reset_database() 533 { 534 time_t now = time(NULL); 535 walk_db(reset_host, &now); 536 537 /* Select the next higher even number for the state counter */ 538 status_info.ourState = 539 (status_info.ourState + 2) & 0xfffffffe; 540 status_info.ourState++; /* XXX - ??? */ 541 sync_file(); 542 } 543 544 /* unmon_hosts --------------------------------------------------------- */ 545 /* 546 * Purpose: Unmonitor all the hosts 547 * Returns: Nothing 548 * Notes: 549 */ 550 void 551 unmon_hosts() 552 { 553 time_t now = time(NULL); 554 walk_db(unmon_host, &now); 555 sync_file(); 556 } 557 558 static int 559 notify_one_host(const char *hostname) 560 { 561 struct timeval timeout = {20, 0}; /* 20 secs timeout */ 562 CLIENT *cli; 563 char dummy; 564 stat_chge arg; 565 char our_hostname[MAXHOSTNAMELEN + 1]; 566 567 gethostname(our_hostname, sizeof(our_hostname)); 568 our_hostname[sizeof(our_hostname) - 1] = '\0'; 569 arg.mon_name = our_hostname; 570 arg.state = status_info.ourState; 571 572 if (debug) 573 syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s", 574 hostname, our_hostname); 575 576 cli = clnt_create(hostname, SM_PROG, SM_VERS, "udp"); 577 if (!cli) { 578 syslog(LOG_ERR, "Failed to contact host %s%s", hostname, 579 clnt_spcreateerror("")); 580 return (FALSE); 581 } 582 if (clnt_call(cli, SM_NOTIFY, xdr_stat_chge, &arg, xdr_void, 583 &dummy, timeout) != RPC_SUCCESS) { 584 syslog(LOG_ERR, "Failed to contact rpc.statd at host %s", 585 hostname); 586 clnt_destroy(cli); 587 return (FALSE); 588 } 589 clnt_destroy(cli); 590 return (TRUE); 591 } 592 593 594 static void 595 die(int n) 596 { 597 (*db->close)(db); 598 exit(n); 599 } 600