1 /* $NetBSD: statd.c,v 1.23 2004/01/14 10:29:46 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Christos Zoulas. All rights reserved. 5 * Copyright (c) 1995 6 * A.R. Gordon (andrew.gordon@net-tel.co.uk). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed for the FreeBSD project 19 * This product includes software developed by Christos Zoulas. 20 * 4. Neither the name of the author nor the names of any co-contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY ANDREW GORDON AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 */ 37 38 #include <sys/cdefs.h> 39 #ifndef lint 40 __RCSID("$NetBSD: statd.c,v 1.23 2004/01/14 10:29:46 yamt Exp $"); 41 #endif 42 43 /* main() function for status monitor daemon. Some of the code in this */ 44 /* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x */ 45 /* The actual program logic is in the file procs.c */ 46 47 #include <sys/param.h> 48 #include <sys/wait.h> 49 50 #include <err.h> 51 #include <ctype.h> 52 #include <errno.h> 53 #include <fcntl.h> 54 #include <signal.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <syslog.h> 59 #include <unistd.h> 60 #include <util.h> 61 #include <db.h> 62 #include <netconfig.h> 63 64 #include <rpc/rpc.h> 65 66 #include "statd.h" 67 68 struct sigaction sa; 69 int debug = 0; /* Controls syslog() for debug msgs */ 70 int _rpcsvcdirty = 0; /* XXX ??? */ 71 static DB *db; /* Database file */ 72 73 Header status_info; 74 75 static char undefdata[] = "\0\1\2\3\4\5\6\7"; 76 static DBT undefkey = { 77 undefdata, 78 sizeof(undefdata) 79 }; 80 81 82 /* statd.c */ 83 static int walk_one __P((int (*fun )__P ((DBT *, HostInfo *, void *)), DBT *, DBT *, void *)); 84 static int walk_db __P((int (*fun )__P ((DBT *, HostInfo *, void *)), void *)); 85 static int reset_host __P((DBT *, HostInfo *, void *)); 86 static int check_work __P((DBT *, HostInfo *, void *)); 87 static int unmon_host __P((DBT *, HostInfo *, void *)); 88 static int notify_one __P((DBT *, HostInfo *, void *)); 89 static void init_file __P((char *)); 90 static int notify_one_host __P((char *)); 91 static void die __P((int)) __attribute__((__noreturn__)); 92 93 int main __P((int, char **)); 94 95 int 96 main(argc, argv) 97 int argc; 98 char **argv; 99 { 100 int ch; 101 struct sigaction nsa; 102 int maxrec = RPC_MAXDATASIZE; 103 104 sigemptyset(&nsa.sa_mask); 105 nsa.sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT; 106 nsa.sa_handler = SIG_IGN; 107 (void)sigaction(SIGCHLD, &nsa, NULL); 108 109 while ((ch = getopt(argc, argv, "d")) != (-1)) { 110 switch (ch) { 111 case 'd': 112 debug = 1; 113 break; 114 default: 115 case '?': 116 (void)fprintf(stderr, "usage: %s [-d]\n", 117 getprogname()); 118 exit(1); 119 /* NOTREACHED */ 120 } 121 } 122 (void)rpcb_unset(SM_PROG, SM_VERS, NULL); 123 124 rpc_control(RPC_SVC_CONNMAXREC_SET, &maxrec); 125 126 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "udp")) { 127 errx(1, "cannot create udp service."); 128 /* NOTREACHED */ 129 } 130 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "tcp")) { 131 errx(1, "cannot create udp service."); 132 /* NOTREACHED */ 133 } 134 135 init_file("/var/db/statd.status"); 136 137 /* 138 * Note that it is NOT sensible to run this program from inetd - the 139 * protocol assumes that it will run immediately at boot time. 140 */ 141 if (!debug) 142 daemon(0, 0); 143 pidfile(NULL); 144 openlog("rpc.statd", 0, LOG_DAEMON); 145 if (debug) 146 syslog(LOG_INFO, "Starting - debug enabled"); 147 else 148 syslog(LOG_INFO, "Starting"); 149 150 sa.sa_handler = die; 151 sa.sa_flags = 0; 152 sigemptyset(&sa.sa_mask); 153 (void)sigaction(SIGTERM, &sa, NULL); 154 (void)sigaction(SIGQUIT, &sa, NULL); 155 (void)sigaction(SIGHUP, &sa, NULL); 156 (void)sigaction(SIGINT, &sa, NULL); 157 158 sa.sa_handler = SIG_IGN; 159 sa.sa_flags = SA_RESTART; 160 sigemptyset(&sa.sa_mask); 161 sigaddset(&sa.sa_mask, SIGALRM); 162 163 /* Initialisation now complete - start operating */ 164 165 /* Notify hosts that need it */ 166 notify_handler(0); 167 168 while (1) 169 svc_run(); /* Should never return */ 170 die(0); 171 } 172 173 /* notify_handler ---------------------------------------------------------- */ 174 /* 175 * Purpose: Catch SIGALRM and collect process status 176 * Returns: Nothing. 177 * Notes: No special action required, other than to collect the 178 * process status and hence allow the child to die: 179 * we only use child processes for asynchronous transmission 180 * of SM_NOTIFY to other systems, so it is normal for the 181 * children to exit when they have done their work. 182 */ 183 void 184 notify_handler(sig) 185 int sig; 186 { 187 time_t now; 188 189 NO_ALARM; 190 sa.sa_handler = SIG_IGN; 191 (void)sigaction(SIGALRM, &sa, NULL); 192 193 now = time(NULL); 194 195 (void) walk_db(notify_one, &now); 196 197 if (walk_db(check_work, &now) == 0) { 198 /* 199 * No more work to be done. 200 */ 201 CLR_ALARM; 202 return; 203 } 204 sync_file(); 205 ALARM; 206 alarm(5); 207 } 208 209 /* sync_file --------------------------------------------------------------- */ 210 /* 211 * Purpose: Packaged call of msync() to flush changes to mmap()ed file 212 * Returns: Nothing. Errors to syslog. 213 */ 214 void 215 sync_file() 216 { 217 DBT data; 218 219 data.data = &status_info; 220 data.size = sizeof(status_info); 221 switch ((*db->put)(db, &undefkey, &data, 0)) { 222 case 0: 223 return; 224 case -1: 225 goto bad; 226 default: 227 abort(); 228 } 229 if ((*db->sync)(db, 0) == -1) { 230 bad: 231 syslog(LOG_ERR, "database corrupted %m"); 232 die(1); 233 } 234 } 235 236 /* change_host -------------------------------------------------------------- */ 237 /* 238 * Purpose: Update/Create an entry for host 239 * Returns: Nothing 240 * Notes: 241 * 242 */ 243 void 244 change_host(hostname, hp) 245 char *hostname; 246 HostInfo *hp; 247 { 248 DBT key, data; 249 char *ptr; 250 251 for (ptr = hostname; *ptr; ptr++) 252 if (isupper((unsigned char) *ptr)) 253 *ptr = tolower((unsigned char) *ptr); 254 255 key.data = hostname; 256 key.size = ptr - hostname + 1; 257 data.data = hp; 258 data.size = sizeof(*hp); 259 260 switch ((*db->put)(db, &key, &data, 0)) { 261 case -1: 262 syslog(LOG_ERR, "database corrupted %m"); 263 die(1); 264 case 0: 265 return; 266 default: 267 abort(); 268 } 269 } 270 271 272 /* find_host -------------------------------------------------------------- */ 273 /* 274 * Purpose: Find the entry in the status file for a given host 275 * Returns: Copy of entry in hd, or NULL 276 * Notes: 277 * 278 */ 279 HostInfo * 280 find_host(hostname, hp) 281 char *hostname; 282 HostInfo *hp; 283 { 284 DBT key, data; 285 char *ptr; 286 287 for (ptr = hostname; *ptr; ptr++) 288 if (isupper((unsigned char) *ptr)) 289 *ptr = tolower((unsigned char) *ptr); 290 291 key.data = hostname; 292 key.size = ptr - hostname + 1; 293 switch ((*db->get)(db, &key, &data, 0)) { 294 case 0: 295 if (data.size != sizeof(*hp)) 296 goto bad; 297 return memcpy(hp, data.data, sizeof(*hp)); 298 case 1: 299 return NULL; 300 case -1: 301 goto bad; 302 default: 303 abort(); 304 } 305 306 bad: 307 syslog(LOG_ERR, "Database corrupted %m"); 308 return NULL; 309 } 310 311 /* walk_one ------------------------------------------------------------- */ 312 /* 313 * Purpose: Call the given function if the element is valid 314 * Returns: Nothing - exits on error 315 * Notes: 316 */ 317 static int 318 walk_one(fun, key, data, ptr) 319 int (*fun) __P((DBT *, HostInfo *, void *)); 320 DBT *key, *data; 321 void *ptr; 322 { 323 HostInfo h; 324 if (key->size == undefkey.size && 325 memcmp(key->data, undefkey.data, key->size) == 0) 326 return 0; 327 if (data->size != sizeof(HostInfo)) { 328 syslog(LOG_ERR, "Bad data in database"); 329 die(1); 330 } 331 memcpy(&h, data->data, sizeof(h)); 332 return (*fun)(key, &h, ptr); 333 } 334 335 /* walk_db -------------------------------------------------------------- */ 336 /* 337 * Purpose: Iterate over all elements calling the given function 338 * Returns: -1 if function failed, 0 on success 339 * Notes: 340 */ 341 static int 342 walk_db(fun, ptr) 343 int (*fun) __P((DBT *, HostInfo *, void *)); 344 void *ptr; 345 { 346 DBT key, data; 347 348 switch ((*db->seq)(db, &key, &data, R_FIRST)) { 349 case -1: 350 goto bad; 351 case 1: 352 /* We should have at least the magic entry at this point */ 353 abort(); 354 case 0: 355 if (walk_one(fun, &key, &data, ptr) == -1) 356 return -1; 357 break; 358 default: 359 abort(); 360 } 361 362 363 for (;;) 364 switch ((*db->seq)(db, &key, &data, R_NEXT)) { 365 case -1: 366 goto bad; 367 case 0: 368 if (walk_one(fun, &key, &data, ptr) == -1) 369 return -1; 370 break; 371 case 1: 372 return 0; 373 default: 374 abort(); 375 } 376 bad: 377 syslog(LOG_ERR, "Corrupted database %m"); 378 die(1); 379 } 380 381 /* reset_host ------------------------------------------------------------ */ 382 /* 383 * Purpose: Clean up existing hosts in file. 384 * Returns: Always success 0. 385 * Notes: Clean-up of existing file - monitored hosts will have a 386 * pointer to a list of clients, which refers to memory in 387 * the previous incarnation of the program and so are 388 * meaningless now. These pointers are zeroed and the fact 389 * that the host was previously monitored is recorded by 390 * setting the notifyReqd flag, which will in due course 391 * cause a SM_NOTIFY to be sent. 392 * 393 * Note that if we crash twice in quick succession, some hosts 394 * may already have notifyReqd set, where we didn't manage to 395 * notify them before the second crash occurred. 396 */ 397 static int 398 reset_host(key, hi, ptr) 399 DBT *key; 400 HostInfo *hi; 401 void *ptr; 402 { 403 404 if (hi->monList) { 405 hi->notifyReqd = *(time_t *) ptr; 406 hi->attempts = 0; 407 hi->monList = NULL; 408 change_host((char *)key->data, hi); 409 } 410 return 0; 411 } 412 413 /* check_work ------------------------------------------------------------ */ 414 /* 415 * Purpose: Check if there is work to be done. 416 * Returns: 0 if there is no work to be done -1 if there is. 417 * Notes: 418 */ 419 static int 420 check_work(key, hi, ptr) 421 DBT *key; 422 HostInfo *hi; 423 void *ptr; 424 { 425 return hi->notifyReqd ? -1 : 0; 426 } 427 428 /* unmon_host ------------------------------------------------------------ */ 429 /* 430 * Purpose: Unmonitor a host 431 * Returns: 0 432 * Notes: 433 */ 434 static int 435 unmon_host(key, hi, ptr) 436 DBT *key; 437 HostInfo *hi; 438 void *ptr; 439 { 440 char *name = key->data; 441 442 if (do_unmon(name, hi, ptr)) 443 change_host(name, hi); 444 return 0; 445 } 446 447 /* notify_one ------------------------------------------------------------ */ 448 /* 449 * Purpose: Notify one host. 450 * Returns: 0 if success -1 on failure 451 * Notes: 452 */ 453 static int 454 notify_one(key, hi, ptr) 455 DBT *key; 456 HostInfo *hi; 457 void *ptr; 458 { 459 time_t now = *(time_t *) ptr; 460 char *name = key->data; 461 DBT data; 462 463 if (hi->notifyReqd == 0 || hi->notifyReqd > now) 464 return 0; 465 466 if (notify_one_host(name)) { 467 give_up: 468 hi->notifyReqd = 0; 469 hi->attempts = 0; 470 data.data = hi; 471 data.size = sizeof(*hi); 472 switch ((*db->put)(db, key, &data, 0)) { 473 case -1: 474 syslog(LOG_ERR, "Error storing %s (%m)", name); 475 case 0: 476 return 0; 477 478 default: 479 abort(); 480 } 481 } 482 else { 483 /* 484 * If one of the initial attempts fails, we wait 485 * for a while and have another go. This is necessary 486 * because when we have crashed, (eg. a power outage) 487 * it is quite possible that we won't be able to 488 * contact all monitored hosts immediately on restart, 489 * either because they crashed too and take longer 490 * to come up (in which case the notification isn't 491 * really required), or more importantly if some 492 * router etc. needed to reach the monitored host 493 * has not come back up yet. In this case, we will 494 * be a bit late in re-establishing locks (after the 495 * grace period) but that is the best we can do. We 496 * try 10 times at 5 sec intervals, 10 more times at 497 * 1 minute intervals, then 24 more times at hourly 498 * intervals, finally giving up altogether if the 499 * host hasn't come back to life after 24 hours. 500 */ 501 if (hi->attempts++ >= 44) 502 goto give_up; 503 else if (hi->attempts < 10) 504 hi->notifyReqd += 5; 505 else if (hi->attempts < 20) 506 hi->notifyReqd += 60; 507 else 508 hi->notifyReqd += 60 * 60; 509 return -1; 510 } 511 } 512 513 /* init_file -------------------------------------------------------------- */ 514 /* 515 * Purpose: Open file, create if necessary, initialise it. 516 * Returns: Nothing - exits on error 517 * Notes: Called before process becomes daemon, hence logs to 518 * stderr rather than syslog. 519 * Opens the file, then mmap()s it for ease of access. 520 * Also performs initial clean-up of the file, zeroing 521 * monitor list pointers, setting the notifyReqd flag in 522 * all hosts that had a monitor list, and incrementing 523 * the state number to the next even value. 524 */ 525 static void 526 init_file(filename) 527 char *filename; 528 { 529 DBT data; 530 531 db = dbopen(filename, O_RDWR|O_CREAT|O_NDELAY|O_EXLOCK, 0644, DB_HASH, 532 NULL); 533 if (db == NULL) 534 err(1, "Cannot open `%s'", filename); 535 536 switch ((*db->get)(db, &undefkey, &data, 0)) { 537 case 1: 538 /* New database */ 539 (void)memset(&status_info, 0, sizeof(status_info)); 540 sync_file(); 541 return; 542 543 case -1: 544 err(1, "error accessing database (%m)"); 545 case 0: 546 /* Existing database */ 547 if (data.size != sizeof(status_info)) 548 errx(1, "database corrupted %lu != %lu", 549 (u_long)data.size, (u_long)sizeof(status_info)); 550 memcpy(&status_info, data.data, data.size); 551 break; 552 default: 553 abort(); 554 } 555 556 reset_database(); 557 return; 558 } 559 560 /* reset_database --------------------------------------------------------- */ 561 /* 562 * Purpose: Clears the statd database 563 * Returns: Nothing 564 * Notes: If this is not called on reset, it will leak memory. 565 */ 566 void 567 reset_database() 568 { 569 time_t now = time(NULL); 570 walk_db(reset_host, &now); 571 572 /* Select the next higher even number for the state counter */ 573 status_info.ourState = 574 (status_info.ourState + 2) & 0xfffffffe; 575 status_info.ourState++; /* XXX - ??? */ 576 sync_file(); 577 } 578 579 /* unmon_hosts --------------------------------------------------------- */ 580 /* 581 * Purpose: Unmonitor all the hosts 582 * Returns: Nothing 583 * Notes: 584 */ 585 void 586 unmon_hosts() 587 { 588 time_t now = time(NULL); 589 walk_db(unmon_host, &now); 590 sync_file(); 591 } 592 593 static int 594 notify_one_host(hostname) 595 char *hostname; 596 { 597 struct timeval timeout = {20, 0}; /* 20 secs timeout */ 598 CLIENT *cli; 599 char dummy; 600 stat_chge arg; 601 char our_hostname[MAXHOSTNAMELEN + 1]; 602 603 gethostname(our_hostname, sizeof(our_hostname)); 604 our_hostname[sizeof(our_hostname) - 1] = '\0'; 605 our_hostname[SM_MAXSTRLEN] = '\0'; 606 arg.mon_name = our_hostname; 607 arg.state = status_info.ourState; 608 609 if (debug) 610 syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s", 611 hostname, our_hostname); 612 613 cli = clnt_create(hostname, SM_PROG, SM_VERS, "udp"); 614 if (!cli) { 615 syslog(LOG_ERR, "Failed to contact host %s%s", hostname, 616 clnt_spcreateerror("")); 617 return (FALSE); 618 } 619 if (clnt_call(cli, SM_NOTIFY, xdr_stat_chge, &arg, xdr_void, 620 &dummy, timeout) != RPC_SUCCESS) { 621 syslog(LOG_ERR, "Failed to contact rpc.statd at host %s", 622 hostname); 623 clnt_destroy(cli); 624 return (FALSE); 625 } 626 clnt_destroy(cli); 627 return (TRUE); 628 } 629 630 631 static void 632 die(n) 633 int n; 634 { 635 (*db->close)(db); 636 exit(n); 637 } 638