1 /* $NetBSD: statd.c,v 1.18 2001/02/19 23:22:47 cgd Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Christos Zoulas. All rights reserved. 5 * Copyright (c) 1995 6 * A.R. Gordon (andrew.gordon@net-tel.co.uk). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed for the FreeBSD project 19 * This product includes software developed by Christos Zoulas. 20 * 4. Neither the name of the author nor the names of any co-contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY ANDREW GORDON AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 */ 37 38 #include <sys/cdefs.h> 39 #ifndef lint 40 __RCSID("$NetBSD: statd.c,v 1.18 2001/02/19 23:22:47 cgd Exp $"); 41 #endif 42 43 /* main() function for status monitor daemon. Some of the code in this */ 44 /* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x */ 45 /* The actual program logic is in the file procs.c */ 46 47 #include <sys/param.h> 48 49 #include <err.h> 50 #include <ctype.h> 51 #include <errno.h> 52 #include <fcntl.h> 53 #include <signal.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include <syslog.h> 58 #include <unistd.h> 59 #include <util.h> 60 #include <db.h> 61 #include <netconfig.h> 62 63 #include <rpc/rpc.h> 64 65 #include "statd.h" 66 67 struct sigaction sa; 68 int debug = 0; /* Controls syslog() for debug msgs */ 69 int _rpcsvcdirty = 0; /* XXX ??? */ 70 static DB *db; /* Database file */ 71 72 Header status_info; 73 74 static char undefdata[] = "\0\1\2\3\4\5\6\7"; 75 static DBT undefkey = { 76 undefdata, 77 sizeof(undefdata) 78 }; 79 80 81 /* statd.c */ 82 static int walk_one __P((int (*fun )__P ((DBT *, HostInfo *, void *)), DBT *, DBT *, void *)); 83 static int walk_db __P((int (*fun )__P ((DBT *, HostInfo *, void *)), void *)); 84 static int reset_host __P((DBT *, HostInfo *, void *)); 85 static int check_work __P((DBT *, HostInfo *, void *)); 86 static int unmon_host __P((DBT *, HostInfo *, void *)); 87 static int notify_one __P((DBT *, HostInfo *, void *)); 88 static void init_file __P((char *)); 89 static int notify_one_host __P((char *)); 90 static void die __P((int)) __attribute__((__noreturn__)); 91 92 int main __P((int, char **)); 93 94 int 95 main(argc, argv) 96 int argc; 97 char **argv; 98 { 99 int ch; 100 101 while ((ch = getopt(argc, argv, "d")) != (-1)) { 102 switch (ch) { 103 case 'd': 104 debug = 1; 105 break; 106 default: 107 case '?': 108 (void)fprintf(stderr, "usage: %s [-d]\n", 109 getprogname()); 110 exit(1); 111 /* NOTREACHED */ 112 } 113 } 114 (void)rpcb_unset(SM_PROG, SM_VERS, NULL); 115 116 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "udp")) { 117 errx(1, "cannot create udp service."); 118 /* NOTREACHED */ 119 } 120 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "tcp")) { 121 errx(1, "cannot create udp service."); 122 /* NOTREACHED */ 123 } 124 125 init_file("/var/db/statd.status"); 126 127 /* 128 * Note that it is NOT sensible to run this program from inetd - the 129 * protocol assumes that it will run immediately at boot time. 130 */ 131 if (!debug) 132 daemon(0, 0); 133 pidfile(NULL); 134 openlog("rpc.statd", 0, LOG_DAEMON); 135 if (debug) 136 syslog(LOG_INFO, "Starting - debug enabled"); 137 else 138 syslog(LOG_INFO, "Starting"); 139 140 sa.sa_handler = die; 141 sa.sa_flags = 0; 142 sigemptyset(&sa.sa_mask); 143 (void)sigaction(SIGTERM, &sa, NULL); 144 (void)sigaction(SIGQUIT, &sa, NULL); 145 (void)sigaction(SIGHUP, &sa, NULL); 146 (void)sigaction(SIGINT, &sa, NULL); 147 148 sa.sa_handler = SIG_IGN; 149 sa.sa_flags = SA_RESTART; 150 sigemptyset(&sa.sa_mask); 151 sigaddset(&sa.sa_mask, SIGALRM); 152 153 /* Initialisation now complete - start operating */ 154 155 /* Notify hosts that need it */ 156 notify_handler(0); 157 158 while (1) 159 svc_run(); /* Should never return */ 160 die(0); 161 } 162 163 /* notify_handler ---------------------------------------------------------- */ 164 /* 165 * Purpose: Catch SIGALRM and collect process status 166 * Returns: Nothing. 167 * Notes: No special action required, other than to collect the 168 * process status and hence allow the child to die: 169 * we only use child processes for asynchronous transmission 170 * of SM_NOTIFY to other systems, so it is normal for the 171 * children to exit when they have done their work. 172 */ 173 void 174 notify_handler(sig) 175 int sig; 176 { 177 time_t now; 178 179 NO_ALARM; 180 sa.sa_handler = SIG_IGN; 181 (void)sigaction(SIGALRM, &sa, NULL); 182 183 now = time(NULL); 184 185 (void) walk_db(notify_one, &now); 186 187 if (walk_db(check_work, &now) == 0) { 188 /* 189 * No more work to be done. 190 */ 191 CLR_ALARM; 192 return; 193 } 194 sync_file(); 195 ALARM; 196 alarm(5); 197 } 198 199 /* sync_file --------------------------------------------------------------- */ 200 /* 201 * Purpose: Packaged call of msync() to flush changes to mmap()ed file 202 * Returns: Nothing. Errors to syslog. 203 */ 204 void 205 sync_file() 206 { 207 DBT data; 208 209 data.data = &status_info; 210 data.size = sizeof(status_info); 211 switch ((*db->put)(db, &undefkey, &data, 0)) { 212 case 0: 213 return; 214 case -1: 215 goto bad; 216 default: 217 abort(); 218 } 219 if ((*db->sync)(db, 0) == -1) { 220 bad: 221 syslog(LOG_ERR, "database corrupted %m"); 222 die(1); 223 } 224 } 225 226 /* change_host -------------------------------------------------------------- */ 227 /* 228 * Purpose: Update/Create an entry for host 229 * Returns: Nothing 230 * Notes: 231 * 232 */ 233 void 234 change_host(hostname, hp) 235 char *hostname; 236 HostInfo *hp; 237 { 238 DBT key, data; 239 char *ptr; 240 241 for (ptr = hostname; *ptr; ptr++) 242 if (isupper((unsigned char) *ptr)) 243 *ptr = tolower((unsigned char) *ptr); 244 245 key.data = hostname; 246 key.size = ptr - hostname + 1; 247 data.data = hp; 248 data.size = sizeof(*hp); 249 250 switch ((*db->put)(db, &key, &data, 0)) { 251 case -1: 252 syslog(LOG_ERR, "database corrupted %m"); 253 die(1); 254 case 0: 255 return; 256 default: 257 abort(); 258 } 259 } 260 261 262 /* find_host -------------------------------------------------------------- */ 263 /* 264 * Purpose: Find the entry in the status file for a given host 265 * Returns: Copy of entry in hd, or NULL 266 * Notes: 267 * 268 */ 269 HostInfo * 270 find_host(hostname, hp) 271 char *hostname; 272 HostInfo *hp; 273 { 274 DBT key, data; 275 char *ptr; 276 277 for (ptr = hostname; *ptr; ptr++) 278 if (isupper((unsigned char) *ptr)) 279 *ptr = tolower((unsigned char) *ptr); 280 281 key.data = hostname; 282 key.size = ptr - hostname + 1; 283 switch ((*db->get)(db, &key, &data, 0)) { 284 case 0: 285 if (data.size != sizeof(*hp)) 286 goto bad; 287 return memcpy(hp, data.data, sizeof(*hp)); 288 case 1: 289 return NULL; 290 case -1: 291 goto bad; 292 default: 293 abort(); 294 } 295 296 bad: 297 syslog(LOG_ERR, "Database corrupted %m"); 298 return NULL; 299 } 300 301 /* walk_one ------------------------------------------------------------- */ 302 /* 303 * Purpose: Call the given function if the element is valid 304 * Returns: Nothing - exits on error 305 * Notes: 306 */ 307 static int 308 walk_one(fun, key, data, ptr) 309 int (*fun) __P((DBT *, HostInfo *, void *)); 310 DBT *key, *data; 311 void *ptr; 312 { 313 HostInfo h; 314 if (key->size == undefkey.size && 315 memcmp(key->data, undefkey.data, key->size) == 0) 316 return 0; 317 if (data->size != sizeof(HostInfo)) { 318 syslog(LOG_ERR, "Bad data in database"); 319 die(1); 320 } 321 memcpy(&h, data->data, sizeof(h)); 322 return (*fun)(key, &h, ptr); 323 } 324 325 /* walk_db -------------------------------------------------------------- */ 326 /* 327 * Purpose: Iterate over all elements calling the given function 328 * Returns: -1 if function failed, 0 on success 329 * Notes: 330 */ 331 static int 332 walk_db(fun, ptr) 333 int (*fun) __P((DBT *, HostInfo *, void *)); 334 void *ptr; 335 { 336 DBT key, data; 337 338 switch ((*db->seq)(db, &key, &data, R_FIRST)) { 339 case -1: 340 goto bad; 341 case 1: 342 /* We should have at least the magic entry at this point */ 343 abort(); 344 case 0: 345 if (walk_one(fun, &key, &data, ptr) == -1) 346 return -1; 347 break; 348 default: 349 abort(); 350 } 351 352 353 for (;;) 354 switch ((*db->seq)(db, &key, &data, R_NEXT)) { 355 case -1: 356 goto bad; 357 case 0: 358 if (walk_one(fun, &key, &data, ptr) == -1) 359 return -1; 360 break; 361 case 1: 362 return 0; 363 default: 364 abort(); 365 } 366 bad: 367 syslog(LOG_ERR, "Corrupted database %m"); 368 die(1); 369 } 370 371 /* reset_host ------------------------------------------------------------ */ 372 /* 373 * Purpose: Clean up existing hosts in file. 374 * Returns: Always success 0. 375 * Notes: Clean-up of existing file - monitored hosts will have a 376 * pointer to a list of clients, which refers to memory in 377 * the previous incarnation of the program and so are 378 * meaningless now. These pointers are zeroed and the fact 379 * that the host was previously monitored is recorded by 380 * setting the notifyReqd flag, which will in due course 381 * cause a SM_NOTIFY to be sent. 382 * 383 * Note that if we crash twice in quick succession, some hosts 384 * may already have notifyReqd set, where we didn't manage to 385 * notify them before the second crash occurred. 386 */ 387 static int 388 reset_host(key, hi, ptr) 389 DBT *key; 390 HostInfo *hi; 391 void *ptr; 392 { 393 394 if (hi->monList) { 395 hi->notifyReqd = *(time_t *) ptr; 396 hi->attempts = 0; 397 hi->monList = NULL; 398 } 399 return 0; 400 } 401 402 /* check_work ------------------------------------------------------------ */ 403 /* 404 * Purpose: Check if there is work to be done. 405 * Returns: 0 if there is no work to be done -1 if there is. 406 * Notes: 407 */ 408 static int 409 check_work(key, hi, ptr) 410 DBT *key; 411 HostInfo *hi; 412 void *ptr; 413 { 414 return hi->notifyReqd ? -1 : 0; 415 } 416 417 /* unmon_host ------------------------------------------------------------ */ 418 /* 419 * Purpose: Unmonitor a host 420 * Returns: 0 421 * Notes: 422 */ 423 static int 424 unmon_host(key, hi, ptr) 425 DBT *key; 426 HostInfo *hi; 427 void *ptr; 428 { 429 char *name = key->data; 430 431 if (do_unmon(name, hi, ptr)) 432 change_host(name, hi); 433 return 0; 434 } 435 436 /* notify_one ------------------------------------------------------------ */ 437 /* 438 * Purpose: Notify one host. 439 * Returns: 0 if success -1 on failure 440 * Notes: 441 */ 442 static int 443 notify_one(key, hi, ptr) 444 DBT *key; 445 HostInfo *hi; 446 void *ptr; 447 { 448 time_t now = *(time_t *) ptr; 449 char *name = key->data; 450 DBT data; 451 452 if (hi->notifyReqd == 0 || hi->notifyReqd > now) 453 return 0; 454 455 if (notify_one_host(name)) { 456 give_up: 457 hi->notifyReqd = 0; 458 hi->attempts = 0; 459 data.data = hi; 460 data.size = sizeof(*hi); 461 switch ((*db->put)(db, key, &data, 0)) { 462 case -1: 463 syslog(LOG_ERR, "Error storing %s (%m)", name); 464 case 0: 465 return 0; 466 467 default: 468 abort(); 469 } 470 } 471 else { 472 /* 473 * If one of the initial attempts fails, we wait 474 * for a while and have another go. This is necessary 475 * because when we have crashed, (eg. a power outage) 476 * it is quite possible that we won't be able to 477 * contact all monitored hosts immediately on restart, 478 * either because they crashed too and take longer 479 * to come up (in which case the notification isn't 480 * really required), or more importantly if some 481 * router etc. needed to reach the monitored host 482 * has not come back up yet. In this case, we will 483 * be a bit late in re-establishing locks (after the 484 * grace period) but that is the best we can do. We 485 * try 10 times at 5 sec intervals, 10 more times at 486 * 1 minute intervals, then 24 more times at hourly 487 * intervals, finally giving up altogether if the 488 * host hasn't come back to life after 24 hours. 489 */ 490 if (hi->attempts++ >= 44) 491 goto give_up; 492 else if (hi->attempts < 10) 493 hi->notifyReqd += 5; 494 else if (hi->attempts < 20) 495 hi->notifyReqd += 60; 496 else 497 hi->notifyReqd += 60 * 60; 498 return -1; 499 } 500 } 501 502 /* init_file -------------------------------------------------------------- */ 503 /* 504 * Purpose: Open file, create if necessary, initialise it. 505 * Returns: Nothing - exits on error 506 * Notes: Called before process becomes daemon, hence logs to 507 * stderr rather than syslog. 508 * Opens the file, then mmap()s it for ease of access. 509 * Also performs initial clean-up of the file, zeroing 510 * monitor list pointers, setting the notifyReqd flag in 511 * all hosts that had a monitor list, and incrementing 512 * the state number to the next even value. 513 */ 514 static void 515 init_file(filename) 516 char *filename; 517 { 518 DBT data; 519 520 db = dbopen(filename, O_RDWR|O_CREAT|O_NDELAY|O_EXLOCK, 0644, DB_HASH, 521 NULL); 522 if (db == NULL) 523 err(1, "Cannot open `%s'", filename); 524 525 switch ((*db->get)(db, &undefkey, &data, 0)) { 526 case 1: 527 /* New database */ 528 (void)memset(&status_info, 0, sizeof(status_info)); 529 sync_file(); 530 return; 531 532 case -1: 533 err(1, "error accessing database (%m)"); 534 case 0: 535 /* Existing database */ 536 if (data.size != sizeof(status_info)) 537 errx(1, "database corrupted %lu != %lu", 538 (u_long)data.size, (u_long)sizeof(status_info)); 539 break; 540 default: 541 abort(); 542 } 543 544 reset_database(); 545 return; 546 } 547 548 /* reset_database --------------------------------------------------------- */ 549 /* 550 * Purpose: Clears the statd database 551 * Returns: Nothing 552 * Notes: If this is not called on reset, it will leak memory. 553 */ 554 void 555 reset_database() 556 { 557 time_t now = time(NULL); 558 walk_db(reset_host, &now); 559 560 /* Select the next higher even number for the state counter */ 561 status_info.ourState = 562 (status_info.ourState + 2) & 0xfffffffe; 563 status_info.ourState++; /* XXX - ??? */ 564 sync_file(); 565 } 566 567 /* unmon_hosts --------------------------------------------------------- */ 568 /* 569 * Purpose: Unmonitor all the hosts 570 * Returns: Nothing 571 * Notes: 572 */ 573 void 574 unmon_hosts() 575 { 576 time_t now = time(NULL); 577 walk_db(unmon_host, &now); 578 sync_file(); 579 } 580 581 static int 582 notify_one_host(hostname) 583 char *hostname; 584 { 585 struct timeval timeout = {20, 0}; /* 20 secs timeout */ 586 CLIENT *cli; 587 char dummy; 588 stat_chge arg; 589 char our_hostname[MAXHOSTNAMELEN + 1]; 590 591 gethostname(our_hostname, sizeof(our_hostname)); 592 our_hostname[sizeof(our_hostname) - 1] = '\0'; 593 our_hostname[SM_MAXSTRLEN] = '\0'; 594 arg.mon_name = our_hostname; 595 arg.state = status_info.ourState; 596 597 if (debug) 598 syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s", 599 hostname, our_hostname); 600 601 cli = clnt_create(hostname, SM_PROG, SM_VERS, "udp"); 602 if (!cli) { 603 syslog(LOG_ERR, "Failed to contact host %s%s", hostname, 604 clnt_spcreateerror("")); 605 return (FALSE); 606 } 607 if (clnt_call(cli, SM_NOTIFY, xdr_stat_chge, &arg, xdr_void, 608 &dummy, timeout) != RPC_SUCCESS) { 609 syslog(LOG_ERR, "Failed to contact rpc.statd at host %s", 610 hostname); 611 clnt_destroy(cli); 612 return (FALSE); 613 } 614 clnt_destroy(cli); 615 return (TRUE); 616 } 617 618 619 static void 620 die(n) 621 int n; 622 { 623 (*db->close)(db); 624 exit(n); 625 } 626