1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include "mpd_defs.h" 30*0Sstevel@tonic-gate #include "mpd_tables.h" 31*0Sstevel@tonic-gate 32*0Sstevel@tonic-gate int debug = 0; /* Debug flag */ 33*0Sstevel@tonic-gate static int pollfd_num = 0; /* Num. of poll descriptors */ 34*0Sstevel@tonic-gate static struct pollfd *pollfds = NULL; /* Array of poll descriptors */ 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gate /* All times below in ms */ 37*0Sstevel@tonic-gate int user_failure_detection_time; /* user specified failure detection */ 38*0Sstevel@tonic-gate /* time (fdt) */ 39*0Sstevel@tonic-gate int user_probe_interval; /* derived from user specified fdt */ 40*0Sstevel@tonic-gate 41*0Sstevel@tonic-gate static int rtsock_v4; /* AF_INET routing socket */ 42*0Sstevel@tonic-gate static int rtsock_v6; /* AF_INET6 routing socket */ 43*0Sstevel@tonic-gate int ifsock_v4 = -1; /* IPv4 socket for ioctls */ 44*0Sstevel@tonic-gate int ifsock_v6 = -1; /* IPv6 socket for ioctls */ 45*0Sstevel@tonic-gate static int lsock_v4; /* Listen socket to detect mpathd */ 46*0Sstevel@tonic-gate static int lsock_v6; /* Listen socket to detect mpathd */ 47*0Sstevel@tonic-gate static int mibfd = -1; /* fd to get mib info */ 48*0Sstevel@tonic-gate static boolean_t force_mcast = _B_FALSE; /* Only for test purposes */ 49*0Sstevel@tonic-gate 50*0Sstevel@tonic-gate boolean_t full_scan_required = _B_FALSE; 51*0Sstevel@tonic-gate static uint_t last_initifs_time; /* Time when initifs was last run */ 52*0Sstevel@tonic-gate static char **argv0; /* Saved for re-exec on SIGHUP */ 53*0Sstevel@tonic-gate boolean_t handle_link_notifications = _B_TRUE; 54*0Sstevel@tonic-gate 55*0Sstevel@tonic-gate static void initlog(void); 56*0Sstevel@tonic-gate static void run_timeouts(void); 57*0Sstevel@tonic-gate static void initifs(void); 58*0Sstevel@tonic-gate static void check_if_removed(struct phyint_instance *pii); 59*0Sstevel@tonic-gate static void select_test_ifs(void); 60*0Sstevel@tonic-gate static void ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len); 61*0Sstevel@tonic-gate static void ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len); 62*0Sstevel@tonic-gate static void router_add_v4(mib2_ipRouteEntry_t *rp1, 63*0Sstevel@tonic-gate struct in_addr nexthop_v4); 64*0Sstevel@tonic-gate static void router_add_v6(mib2_ipv6RouteEntry_t *rp1, 65*0Sstevel@tonic-gate struct in6_addr nexthop_v6); 66*0Sstevel@tonic-gate static void router_add_common(int af, char *ifname, 67*0Sstevel@tonic-gate struct in6_addr nexthop); 68*0Sstevel@tonic-gate static void init_router_targets(); 69*0Sstevel@tonic-gate static void cleanup(void); 70*0Sstevel@tonic-gate static int setup_listener(int af); 71*0Sstevel@tonic-gate static void check_config(void); 72*0Sstevel@tonic-gate static void check_addr_unique(int af, char *name); 73*0Sstevel@tonic-gate static void init_host_targets(void); 74*0Sstevel@tonic-gate static void dup_host_targets(struct phyint_instance *desired_pii); 75*0Sstevel@tonic-gate static void loopback_cmd(int sock, int family); 76*0Sstevel@tonic-gate static int poll_remove(int fd); 77*0Sstevel@tonic-gate static boolean_t daemonize(void); 78*0Sstevel@tonic-gate static int closefunc(void *, int); 79*0Sstevel@tonic-gate static unsigned int process_cmd(int newfd, union mi_commands *mpi); 80*0Sstevel@tonic-gate static unsigned int process_query(int fd, mi_query_t *miq); 81*0Sstevel@tonic-gate static unsigned int send_groupinfo(int fd, ipmp_groupinfo_t *grinfop); 82*0Sstevel@tonic-gate static unsigned int send_grouplist(int fd, ipmp_grouplist_t *grlistp); 83*0Sstevel@tonic-gate static unsigned int send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop); 84*0Sstevel@tonic-gate static unsigned int send_result(int fd, unsigned int error, int syserror); 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate /* 87*0Sstevel@tonic-gate * Return the current time in milliseconds (from an arbitrary reference) 88*0Sstevel@tonic-gate * truncated to fit into an int. Truncation is ok since we are interested 89*0Sstevel@tonic-gate * only in differences and not the absolute values. 90*0Sstevel@tonic-gate */ 91*0Sstevel@tonic-gate uint_t 92*0Sstevel@tonic-gate getcurrenttime(void) 93*0Sstevel@tonic-gate { 94*0Sstevel@tonic-gate uint_t cur_time; /* In ms */ 95*0Sstevel@tonic-gate 96*0Sstevel@tonic-gate /* 97*0Sstevel@tonic-gate * Use of a non-user-adjustable source of time is 98*0Sstevel@tonic-gate * required. However millisecond precision is sufficient. 99*0Sstevel@tonic-gate * divide by 10^6 100*0Sstevel@tonic-gate */ 101*0Sstevel@tonic-gate cur_time = (uint_t)(gethrtime() / 1000000LL); 102*0Sstevel@tonic-gate return (cur_time); 103*0Sstevel@tonic-gate } 104*0Sstevel@tonic-gate 105*0Sstevel@tonic-gate /* 106*0Sstevel@tonic-gate * Add fd to the set being polled. Returns 0 if ok; -1 if failed. 107*0Sstevel@tonic-gate */ 108*0Sstevel@tonic-gate int 109*0Sstevel@tonic-gate poll_add(int fd) 110*0Sstevel@tonic-gate { 111*0Sstevel@tonic-gate int i; 112*0Sstevel@tonic-gate int new_num; 113*0Sstevel@tonic-gate struct pollfd *newfds; 114*0Sstevel@tonic-gate retry: 115*0Sstevel@tonic-gate /* Check if already present */ 116*0Sstevel@tonic-gate for (i = 0; i < pollfd_num; i++) { 117*0Sstevel@tonic-gate if (pollfds[i].fd == fd) 118*0Sstevel@tonic-gate return (0); 119*0Sstevel@tonic-gate } 120*0Sstevel@tonic-gate /* Check for empty spot already present */ 121*0Sstevel@tonic-gate for (i = 0; i < pollfd_num; i++) { 122*0Sstevel@tonic-gate if (pollfds[i].fd == -1) { 123*0Sstevel@tonic-gate pollfds[i].fd = fd; 124*0Sstevel@tonic-gate return (0); 125*0Sstevel@tonic-gate } 126*0Sstevel@tonic-gate } 127*0Sstevel@tonic-gate 128*0Sstevel@tonic-gate /* Allocate space for 32 more fds and initialize to -1 */ 129*0Sstevel@tonic-gate new_num = pollfd_num + 32; 130*0Sstevel@tonic-gate newfds = realloc(pollfds, new_num * sizeof (struct pollfd)); 131*0Sstevel@tonic-gate if (newfds == NULL) { 132*0Sstevel@tonic-gate logperror("poll_add: realloc"); 133*0Sstevel@tonic-gate return (-1); 134*0Sstevel@tonic-gate } 135*0Sstevel@tonic-gate for (i = pollfd_num; i < new_num; i++) { 136*0Sstevel@tonic-gate newfds[i].fd = -1; 137*0Sstevel@tonic-gate newfds[i].events = POLLIN; 138*0Sstevel@tonic-gate } 139*0Sstevel@tonic-gate pollfd_num = new_num; 140*0Sstevel@tonic-gate pollfds = newfds; 141*0Sstevel@tonic-gate goto retry; 142*0Sstevel@tonic-gate } 143*0Sstevel@tonic-gate 144*0Sstevel@tonic-gate /* 145*0Sstevel@tonic-gate * Remove fd from the set being polled. Returns 0 if ok; -1 if failed. 146*0Sstevel@tonic-gate */ 147*0Sstevel@tonic-gate static int 148*0Sstevel@tonic-gate poll_remove(int fd) 149*0Sstevel@tonic-gate { 150*0Sstevel@tonic-gate int i; 151*0Sstevel@tonic-gate 152*0Sstevel@tonic-gate /* Check if already present */ 153*0Sstevel@tonic-gate for (i = 0; i < pollfd_num; i++) { 154*0Sstevel@tonic-gate if (pollfds[i].fd == fd) { 155*0Sstevel@tonic-gate pollfds[i].fd = -1; 156*0Sstevel@tonic-gate return (0); 157*0Sstevel@tonic-gate } 158*0Sstevel@tonic-gate } 159*0Sstevel@tonic-gate return (-1); 160*0Sstevel@tonic-gate } 161*0Sstevel@tonic-gate 162*0Sstevel@tonic-gate /* 163*0Sstevel@tonic-gate * Extract information about the phyint instance. If the phyint instance still 164*0Sstevel@tonic-gate * exists in the kernel then set pii_in_use, else clear it. check_if_removed() 165*0Sstevel@tonic-gate * will use it to detect phyint instances that don't exist any longer and 166*0Sstevel@tonic-gate * remove them, from our database of phyint instances. 167*0Sstevel@tonic-gate * Return value: 168*0Sstevel@tonic-gate * returns true if the phyint instance exists in the kernel, 169*0Sstevel@tonic-gate * returns false otherwise 170*0Sstevel@tonic-gate */ 171*0Sstevel@tonic-gate static boolean_t 172*0Sstevel@tonic-gate pii_process(int af, char *name, struct phyint_instance **pii_p) 173*0Sstevel@tonic-gate { 174*0Sstevel@tonic-gate int err; 175*0Sstevel@tonic-gate struct phyint_instance *pii; 176*0Sstevel@tonic-gate struct phyint_instance *pii_other; 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gate if (debug & D_PHYINT) 179*0Sstevel@tonic-gate logdebug("pii_process(%s %s)\n", AF_STR(af), name); 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate pii = phyint_inst_lookup(af, name); 182*0Sstevel@tonic-gate if (pii == NULL) { 183*0Sstevel@tonic-gate /* 184*0Sstevel@tonic-gate * Phyint instance does not exist in our tables, 185*0Sstevel@tonic-gate * create new phyint instance 186*0Sstevel@tonic-gate */ 187*0Sstevel@tonic-gate pii = phyint_inst_init_from_k(af, name); 188*0Sstevel@tonic-gate } else { 189*0Sstevel@tonic-gate /* Phyint exists in our tables */ 190*0Sstevel@tonic-gate err = phyint_inst_update_from_k(pii); 191*0Sstevel@tonic-gate 192*0Sstevel@tonic-gate switch (err) { 193*0Sstevel@tonic-gate case PI_IOCTL_ERROR: 194*0Sstevel@tonic-gate /* Some ioctl error. don't change anything */ 195*0Sstevel@tonic-gate pii->pii_in_use = 1; 196*0Sstevel@tonic-gate break; 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate case PI_GROUP_CHANGED: 199*0Sstevel@tonic-gate /* 200*0Sstevel@tonic-gate * The phyint has changed group. 201*0Sstevel@tonic-gate */ 202*0Sstevel@tonic-gate restore_phyint(pii->pii_phyint); 203*0Sstevel@tonic-gate /* FALLTHRU */ 204*0Sstevel@tonic-gate 205*0Sstevel@tonic-gate case PI_IFINDEX_CHANGED: 206*0Sstevel@tonic-gate /* 207*0Sstevel@tonic-gate * Interface index has changed. Delete and 208*0Sstevel@tonic-gate * recreate the phyint as it is quite likely 209*0Sstevel@tonic-gate * the interface has been unplumbed and replumbed. 210*0Sstevel@tonic-gate */ 211*0Sstevel@tonic-gate pii_other = phyint_inst_other(pii); 212*0Sstevel@tonic-gate if (pii_other != NULL) 213*0Sstevel@tonic-gate phyint_inst_delete(pii_other); 214*0Sstevel@tonic-gate phyint_inst_delete(pii); 215*0Sstevel@tonic-gate pii = phyint_inst_init_from_k(af, name); 216*0Sstevel@tonic-gate break; 217*0Sstevel@tonic-gate 218*0Sstevel@tonic-gate case PI_DELETED: 219*0Sstevel@tonic-gate /* Phyint instance has disappeared from kernel */ 220*0Sstevel@tonic-gate pii->pii_in_use = 0; 221*0Sstevel@tonic-gate break; 222*0Sstevel@tonic-gate 223*0Sstevel@tonic-gate case PI_OK: 224*0Sstevel@tonic-gate /* Phyint instance exists and is fine */ 225*0Sstevel@tonic-gate pii->pii_in_use = 1; 226*0Sstevel@tonic-gate break; 227*0Sstevel@tonic-gate 228*0Sstevel@tonic-gate default: 229*0Sstevel@tonic-gate /* Unknown status */ 230*0Sstevel@tonic-gate logerr("pii_process: Unknown status %d\n", err); 231*0Sstevel@tonic-gate break; 232*0Sstevel@tonic-gate } 233*0Sstevel@tonic-gate } 234*0Sstevel@tonic-gate 235*0Sstevel@tonic-gate *pii_p = pii; 236*0Sstevel@tonic-gate if (pii != NULL) 237*0Sstevel@tonic-gate return (pii->pii_in_use ? _B_TRUE : _B_FALSE); 238*0Sstevel@tonic-gate else 239*0Sstevel@tonic-gate return (_B_FALSE); 240*0Sstevel@tonic-gate } 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gate /* 243*0Sstevel@tonic-gate * This phyint is leaving the group. Try to restore the phyint to its 244*0Sstevel@tonic-gate * initial state. Return the addresses that belong to other group members, 245*0Sstevel@tonic-gate * to the group, and take back any addresses owned by this phyint 246*0Sstevel@tonic-gate */ 247*0Sstevel@tonic-gate void 248*0Sstevel@tonic-gate restore_phyint(struct phyint *pi) 249*0Sstevel@tonic-gate { 250*0Sstevel@tonic-gate if (pi->pi_group == phyint_anongroup) 251*0Sstevel@tonic-gate return; 252*0Sstevel@tonic-gate 253*0Sstevel@tonic-gate /* 254*0Sstevel@tonic-gate * Move everthing to some other member in the group. 255*0Sstevel@tonic-gate * The phyint has changed group in the kernel. But we 256*0Sstevel@tonic-gate * have yet to do it in our tables. 257*0Sstevel@tonic-gate */ 258*0Sstevel@tonic-gate if (!pi->pi_empty) 259*0Sstevel@tonic-gate (void) try_failover(pi, FAILOVER_TO_ANY); 260*0Sstevel@tonic-gate /* 261*0Sstevel@tonic-gate * Move all addresses owned by 'pi' back to pi, from each 262*0Sstevel@tonic-gate * of the other members of the group 263*0Sstevel@tonic-gate */ 264*0Sstevel@tonic-gate (void) try_failback(pi, _B_FALSE); 265*0Sstevel@tonic-gate } 266*0Sstevel@tonic-gate 267*0Sstevel@tonic-gate /* 268*0Sstevel@tonic-gate * Scan all interfaces to detect changes as well as new and deleted interfaces 269*0Sstevel@tonic-gate */ 270*0Sstevel@tonic-gate static void 271*0Sstevel@tonic-gate initifs() 272*0Sstevel@tonic-gate { 273*0Sstevel@tonic-gate int n; 274*0Sstevel@tonic-gate int af; 275*0Sstevel@tonic-gate char *cp; 276*0Sstevel@tonic-gate char *buf; 277*0Sstevel@tonic-gate int numifs; 278*0Sstevel@tonic-gate struct lifnum lifn; 279*0Sstevel@tonic-gate struct lifconf lifc; 280*0Sstevel@tonic-gate struct lifreq *lifr; 281*0Sstevel@tonic-gate struct logint *li; 282*0Sstevel@tonic-gate struct phyint_instance *pii; 283*0Sstevel@tonic-gate struct phyint_instance *next_pii; 284*0Sstevel@tonic-gate char pi_name[LIFNAMSIZ + 1]; 285*0Sstevel@tonic-gate boolean_t exists; 286*0Sstevel@tonic-gate struct phyint *pi; 287*0Sstevel@tonic-gate 288*0Sstevel@tonic-gate if (debug & D_PHYINT) 289*0Sstevel@tonic-gate logdebug("initifs: Scanning interfaces\n"); 290*0Sstevel@tonic-gate 291*0Sstevel@tonic-gate last_initifs_time = getcurrenttime(); 292*0Sstevel@tonic-gate 293*0Sstevel@tonic-gate /* 294*0Sstevel@tonic-gate * Mark the interfaces so that we can find phyints and logints 295*0Sstevel@tonic-gate * which have disappeared from the kernel. pii_process() and 296*0Sstevel@tonic-gate * logint_init_from_k() will set {pii,li}_in_use when they find 297*0Sstevel@tonic-gate * the interface in the kernel. Also, clear dupaddr bit on probe 298*0Sstevel@tonic-gate * logint. check_addr_unique() will set the dupaddr bit on the 299*0Sstevel@tonic-gate * probe logint, if the testaddress is not unique. 300*0Sstevel@tonic-gate */ 301*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 302*0Sstevel@tonic-gate pii->pii_in_use = 0; 303*0Sstevel@tonic-gate for (li = pii->pii_logint; li != NULL; li = li->li_next) { 304*0Sstevel@tonic-gate li->li_in_use = 0; 305*0Sstevel@tonic-gate if (pii->pii_probe_logint == li) 306*0Sstevel@tonic-gate li->li_dupaddr = 0; 307*0Sstevel@tonic-gate } 308*0Sstevel@tonic-gate } 309*0Sstevel@tonic-gate 310*0Sstevel@tonic-gate lifn.lifn_family = AF_UNSPEC; 311*0Sstevel@tonic-gate lifn.lifn_flags = 0; 312*0Sstevel@tonic-gate if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) { 313*0Sstevel@tonic-gate logperror("initifs: ioctl (get interface numbers)"); 314*0Sstevel@tonic-gate return; 315*0Sstevel@tonic-gate } 316*0Sstevel@tonic-gate numifs = lifn.lifn_count; 317*0Sstevel@tonic-gate 318*0Sstevel@tonic-gate buf = (char *)calloc(numifs, sizeof (struct lifreq)); 319*0Sstevel@tonic-gate if (buf == NULL) { 320*0Sstevel@tonic-gate logperror("initifs: calloc"); 321*0Sstevel@tonic-gate return; 322*0Sstevel@tonic-gate } 323*0Sstevel@tonic-gate 324*0Sstevel@tonic-gate lifc.lifc_family = AF_UNSPEC; 325*0Sstevel@tonic-gate lifc.lifc_flags = 0; 326*0Sstevel@tonic-gate lifc.lifc_len = numifs * sizeof (struct lifreq); 327*0Sstevel@tonic-gate lifc.lifc_buf = buf; 328*0Sstevel@tonic-gate 329*0Sstevel@tonic-gate if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) { 330*0Sstevel@tonic-gate /* 331*0Sstevel@tonic-gate * EINVAL is commonly encountered, when things change 332*0Sstevel@tonic-gate * underneath us rapidly, (eg. at boot, when new interfaces 333*0Sstevel@tonic-gate * are plumbed successively) and the kernel finds the buffer 334*0Sstevel@tonic-gate * size we passed as too small. We will retry again 335*0Sstevel@tonic-gate * when we see the next routing socket msg, or at worst after 336*0Sstevel@tonic-gate * IF_SCAN_INTERVAL ms. 337*0Sstevel@tonic-gate */ 338*0Sstevel@tonic-gate if (errno != EINVAL) { 339*0Sstevel@tonic-gate logperror("initifs: ioctl" 340*0Sstevel@tonic-gate " (get interface configuration)"); 341*0Sstevel@tonic-gate } 342*0Sstevel@tonic-gate free(buf); 343*0Sstevel@tonic-gate return; 344*0Sstevel@tonic-gate } 345*0Sstevel@tonic-gate 346*0Sstevel@tonic-gate lifr = (struct lifreq *)lifc.lifc_req; 347*0Sstevel@tonic-gate 348*0Sstevel@tonic-gate /* 349*0Sstevel@tonic-gate * For each lifreq returned by SIOGGLIFCONF, call pii_process() 350*0Sstevel@tonic-gate * and get the state of the corresponding phyint_instance. If it is 351*0Sstevel@tonic-gate * successful, then call logint_init_from_k() to get the state of the 352*0Sstevel@tonic-gate * logint. 353*0Sstevel@tonic-gate */ 354*0Sstevel@tonic-gate for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifr++) { 355*0Sstevel@tonic-gate af = lifr->lifr_addr.ss_family; 356*0Sstevel@tonic-gate 357*0Sstevel@tonic-gate /* 358*0Sstevel@tonic-gate * Need to pass a phyint name to pii_process. Insert the 359*0Sstevel@tonic-gate * null where the ':' IF_SEPARATOR is found in the logical 360*0Sstevel@tonic-gate * name. 361*0Sstevel@tonic-gate */ 362*0Sstevel@tonic-gate (void) strncpy(pi_name, lifr->lifr_name, sizeof (pi_name)); 363*0Sstevel@tonic-gate pi_name[sizeof (pi_name) - 1] = '\0'; 364*0Sstevel@tonic-gate if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL) 365*0Sstevel@tonic-gate *cp = '\0'; 366*0Sstevel@tonic-gate 367*0Sstevel@tonic-gate exists = pii_process(af, pi_name, &pii); 368*0Sstevel@tonic-gate if (exists) { 369*0Sstevel@tonic-gate /* The phyint is fine. So process the logint */ 370*0Sstevel@tonic-gate logint_init_from_k(pii, lifr->lifr_name); 371*0Sstevel@tonic-gate } 372*0Sstevel@tonic-gate check_addr_unique(af, lifr->lifr_name); 373*0Sstevel@tonic-gate } 374*0Sstevel@tonic-gate 375*0Sstevel@tonic-gate free(buf); 376*0Sstevel@tonic-gate 377*0Sstevel@tonic-gate /* 378*0Sstevel@tonic-gate * If the test address is now unique, and if it was not unique 379*0Sstevel@tonic-gate * previously, clear the li_dupaddrmsg_printed flag and log a 380*0Sstevel@tonic-gate * recovery message 381*0Sstevel@tonic-gate */ 382*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 383*0Sstevel@tonic-gate struct logint *li; 384*0Sstevel@tonic-gate char abuf[INET6_ADDRSTRLEN]; 385*0Sstevel@tonic-gate 386*0Sstevel@tonic-gate li = pii->pii_probe_logint; 387*0Sstevel@tonic-gate if ((li != NULL) && !li->li_dupaddr && 388*0Sstevel@tonic-gate li->li_dupaddrmsg_printed) { 389*0Sstevel@tonic-gate logerr("Test address %s is unique; enabling probe-" 390*0Sstevel@tonic-gate "based failure detection\n", 391*0Sstevel@tonic-gate pr_addr(pii->pii_af, li->li_addr, abuf, 392*0Sstevel@tonic-gate sizeof (abuf))); 393*0Sstevel@tonic-gate li->li_dupaddrmsg_printed = 0; 394*0Sstevel@tonic-gate } 395*0Sstevel@tonic-gate } 396*0Sstevel@tonic-gate 397*0Sstevel@tonic-gate /* 398*0Sstevel@tonic-gate * Scan for phyints and logints that have disappeared from the 399*0Sstevel@tonic-gate * kernel, and delete them. 400*0Sstevel@tonic-gate */ 401*0Sstevel@tonic-gate pii = phyint_instances; 402*0Sstevel@tonic-gate 403*0Sstevel@tonic-gate while (pii != NULL) { 404*0Sstevel@tonic-gate next_pii = pii->pii_next; 405*0Sstevel@tonic-gate check_if_removed(pii); 406*0Sstevel@tonic-gate pii = next_pii; 407*0Sstevel@tonic-gate } 408*0Sstevel@tonic-gate 409*0Sstevel@tonic-gate /* 410*0Sstevel@tonic-gate * Select a test address for sending probes on each phyint instance 411*0Sstevel@tonic-gate */ 412*0Sstevel@tonic-gate select_test_ifs(); 413*0Sstevel@tonic-gate 414*0Sstevel@tonic-gate /* 415*0Sstevel@tonic-gate * Handle link up/down notifications from the NICs. 416*0Sstevel@tonic-gate */ 417*0Sstevel@tonic-gate process_link_state_changes(); 418*0Sstevel@tonic-gate 419*0Sstevel@tonic-gate for (pi = phyints; pi != NULL; pi = pi->pi_next) { 420*0Sstevel@tonic-gate /* 421*0Sstevel@tonic-gate * If this is a case of group failure, we don't have much 422*0Sstevel@tonic-gate * to do until the group recovers again. 423*0Sstevel@tonic-gate */ 424*0Sstevel@tonic-gate if (GROUP_FAILED(pi->pi_group)) 425*0Sstevel@tonic-gate continue; 426*0Sstevel@tonic-gate 427*0Sstevel@tonic-gate /* 428*0Sstevel@tonic-gate * Try/Retry any pending failovers / failbacks, that did not 429*0Sstevel@tonic-gate * not complete, or that could not be initiated previously. 430*0Sstevel@tonic-gate * This implements the 3 invariants described in the big block 431*0Sstevel@tonic-gate * comment at the beginning of probe.c 432*0Sstevel@tonic-gate */ 433*0Sstevel@tonic-gate if (pi->pi_flags & IFF_INACTIVE) { 434*0Sstevel@tonic-gate if (!pi->pi_empty) 435*0Sstevel@tonic-gate (void) try_failover(pi, FAILOVER_TO_NONSTANDBY); 436*0Sstevel@tonic-gate } else { 437*0Sstevel@tonic-gate struct phyint_instance *pii; 438*0Sstevel@tonic-gate 439*0Sstevel@tonic-gate pii = pi->pi_v4; 440*0Sstevel@tonic-gate if (LINK_UP(pi) && !PROBE_CAPABLE(pii)) 441*0Sstevel@tonic-gate pii = pi->pi_v6; 442*0Sstevel@tonic-gate if (LINK_UP(pi) && !PROBE_CAPABLE(pii)) 443*0Sstevel@tonic-gate continue; 444*0Sstevel@tonic-gate /* 445*0Sstevel@tonic-gate * It is possible that the phyint has started 446*0Sstevel@tonic-gate * receiving packets, after it has been marked 447*0Sstevel@tonic-gate * PI_FAILED. Don't initiate failover, if the 448*0Sstevel@tonic-gate * phyint has started recovering. failure_state() 449*0Sstevel@tonic-gate * captures this check. A similar logic is used 450*0Sstevel@tonic-gate * for failback/repair case. 451*0Sstevel@tonic-gate */ 452*0Sstevel@tonic-gate if (pi->pi_state == PI_FAILED && !pi->pi_empty && 453*0Sstevel@tonic-gate (failure_state(pii) == PHYINT_FAILURE)) { 454*0Sstevel@tonic-gate (void) try_failover(pi, FAILOVER_NORMAL); 455*0Sstevel@tonic-gate } else if (pi->pi_state == PI_RUNNING && !pi->pi_full) { 456*0Sstevel@tonic-gate if (try_failback(pi, _B_FALSE) != 457*0Sstevel@tonic-gate IPMP_FAILURE) { 458*0Sstevel@tonic-gate (void) change_lif_flags(pi, IFF_FAILED, 459*0Sstevel@tonic-gate _B_FALSE); 460*0Sstevel@tonic-gate /* Per state diagram */ 461*0Sstevel@tonic-gate pi->pi_empty = 0; 462*0Sstevel@tonic-gate } 463*0Sstevel@tonic-gate } 464*0Sstevel@tonic-gate } 465*0Sstevel@tonic-gate } 466*0Sstevel@tonic-gate } 467*0Sstevel@tonic-gate 468*0Sstevel@tonic-gate /* 469*0Sstevel@tonic-gate * Check that test/probe addresses are always unique. link-locals and 470*0Sstevel@tonic-gate * ptp unnumbered may not be unique, and bind to such an (IFF_NOFAILOVER) 471*0Sstevel@tonic-gate * address can produce unexpected results. Log an error and alert the user. 472*0Sstevel@tonic-gate */ 473*0Sstevel@tonic-gate static void 474*0Sstevel@tonic-gate check_addr_unique(int af, char *name) 475*0Sstevel@tonic-gate { 476*0Sstevel@tonic-gate struct lifreq lifr; 477*0Sstevel@tonic-gate struct phyint *pi; 478*0Sstevel@tonic-gate struct in6_addr addr; 479*0Sstevel@tonic-gate struct phyint_instance *pii; 480*0Sstevel@tonic-gate struct sockaddr_in *sin; 481*0Sstevel@tonic-gate struct sockaddr_in6 *sin6; 482*0Sstevel@tonic-gate int ifsock; 483*0Sstevel@tonic-gate char abuf[INET6_ADDRSTRLEN]; 484*0Sstevel@tonic-gate 485*0Sstevel@tonic-gate /* Get the socket for doing ioctls */ 486*0Sstevel@tonic-gate ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6; 487*0Sstevel@tonic-gate 488*0Sstevel@tonic-gate (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); 489*0Sstevel@tonic-gate lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 490*0Sstevel@tonic-gate /* 491*0Sstevel@tonic-gate * Get the address corresponding to 'name'. We cannot 492*0Sstevel@tonic-gate * do a logint lookup in our tables, because, not all logints 493*0Sstevel@tonic-gate * in the system are tracked by mpathd. (eg. things not in a group) 494*0Sstevel@tonic-gate */ 495*0Sstevel@tonic-gate if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) { 496*0Sstevel@tonic-gate if (errno == ENXIO) { 497*0Sstevel@tonic-gate /* Interface has vanished */ 498*0Sstevel@tonic-gate return; 499*0Sstevel@tonic-gate } else { 500*0Sstevel@tonic-gate logperror("ioctl (get addr)"); 501*0Sstevel@tonic-gate return; 502*0Sstevel@tonic-gate } 503*0Sstevel@tonic-gate } 504*0Sstevel@tonic-gate 505*0Sstevel@tonic-gate if (af == AF_INET) { 506*0Sstevel@tonic-gate sin = (struct sockaddr_in *)&lifr.lifr_addr; 507*0Sstevel@tonic-gate IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &addr); 508*0Sstevel@tonic-gate } else { 509*0Sstevel@tonic-gate sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; 510*0Sstevel@tonic-gate addr = sin6->sin6_addr; 511*0Sstevel@tonic-gate } 512*0Sstevel@tonic-gate 513*0Sstevel@tonic-gate /* 514*0Sstevel@tonic-gate * Does the address 'addr' match any known test address ? If so 515*0Sstevel@tonic-gate * it is a duplicate, unless we are looking at the same logint 516*0Sstevel@tonic-gate */ 517*0Sstevel@tonic-gate for (pi = phyints; pi != NULL; pi = pi->pi_next) { 518*0Sstevel@tonic-gate pii = PHYINT_INSTANCE(pi, af); 519*0Sstevel@tonic-gate if (pii == NULL || pii->pii_probe_logint == NULL) 520*0Sstevel@tonic-gate continue; 521*0Sstevel@tonic-gate 522*0Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&addr, 523*0Sstevel@tonic-gate &pii->pii_probe_logint->li_addr)) { 524*0Sstevel@tonic-gate continue; 525*0Sstevel@tonic-gate } 526*0Sstevel@tonic-gate 527*0Sstevel@tonic-gate if (strncmp(pii->pii_probe_logint->li_name, name, 528*0Sstevel@tonic-gate sizeof (pii->pii_probe_logint->li_name)) == 0) { 529*0Sstevel@tonic-gate continue; 530*0Sstevel@tonic-gate } 531*0Sstevel@tonic-gate 532*0Sstevel@tonic-gate /* 533*0Sstevel@tonic-gate * This test address is not unique. Set the dupaddr bit 534*0Sstevel@tonic-gate */ 535*0Sstevel@tonic-gate pii->pii_probe_logint->li_dupaddr = 1; 536*0Sstevel@tonic-gate 537*0Sstevel@tonic-gate /* 538*0Sstevel@tonic-gate * Log an error message if not already logged 539*0Sstevel@tonic-gate */ 540*0Sstevel@tonic-gate if (pii->pii_probe_logint->li_dupaddrmsg_printed) 541*0Sstevel@tonic-gate continue; 542*0Sstevel@tonic-gate 543*0Sstevel@tonic-gate logerr("Test address %s is not unique; disabling " 544*0Sstevel@tonic-gate "probe-based failure detection\n", 545*0Sstevel@tonic-gate pr_addr(af, addr, abuf, sizeof (abuf))); 546*0Sstevel@tonic-gate 547*0Sstevel@tonic-gate pii->pii_probe_logint->li_dupaddrmsg_printed = 1; 548*0Sstevel@tonic-gate } 549*0Sstevel@tonic-gate } 550*0Sstevel@tonic-gate 551*0Sstevel@tonic-gate /* 552*0Sstevel@tonic-gate * The pii_probe_logint used for probing, must satisfy the following properties 553*0Sstevel@tonic-gate * with respect to its li_flags. 554*0Sstevel@tonic-gate * IFF_NOFAILOVER - must be set (except in singleton group case) 555*0Sstevel@tonic-gate * IFF_UP - must be set 556*0Sstevel@tonic-gate * IFF_NOXMIT - must be clear 557*0Sstevel@tonic-gate * IFF_NOLOCAL - must be clear 558*0Sstevel@tonic-gate * IFF_DEPRECATED - preferably set (for IPv4) 559*0Sstevel@tonic-gate */ 560*0Sstevel@tonic-gate #define BEST_FLAG_SET (IFF_NOFAILOVER | IFF_UP | IFF_DEPRECATED) 561*0Sstevel@tonic-gate #define CLEAR_FLAG_SET (IFF_NOXMIT | IFF_NOLOCAL) 562*0Sstevel@tonic-gate #define TEST_CLEAR_FLAG_SET CLEAR_FLAG_SET 563*0Sstevel@tonic-gate #define TEST_MINIMAL_FLAG_SET (IFF_UP | CLEAR_FLAG_SET) 564*0Sstevel@tonic-gate #define TEST_BEST_FLAG_SET (BEST_FLAG_SET | CLEAR_FLAG_SET) 565*0Sstevel@tonic-gate 566*0Sstevel@tonic-gate /* 567*0Sstevel@tonic-gate * Stop probing an interface. Called when an interface is offlined. 568*0Sstevel@tonic-gate * The probe socket is closed on each interface instance, and the 569*0Sstevel@tonic-gate * interface state set to PI_OFFLINE. 570*0Sstevel@tonic-gate */ 571*0Sstevel@tonic-gate static void 572*0Sstevel@tonic-gate stop_probing(struct phyint *pi) 573*0Sstevel@tonic-gate { 574*0Sstevel@tonic-gate struct phyint_instance *pii; 575*0Sstevel@tonic-gate 576*0Sstevel@tonic-gate pii = pi->pi_v4; 577*0Sstevel@tonic-gate if (pii != NULL) { 578*0Sstevel@tonic-gate if (pii->pii_probe_sock != -1) 579*0Sstevel@tonic-gate close_probe_socket(pii, _B_TRUE); 580*0Sstevel@tonic-gate pii->pii_probe_logint = NULL; 581*0Sstevel@tonic-gate } 582*0Sstevel@tonic-gate 583*0Sstevel@tonic-gate pii = pi->pi_v6; 584*0Sstevel@tonic-gate if (pii != NULL) { 585*0Sstevel@tonic-gate if (pii->pii_probe_sock != -1) 586*0Sstevel@tonic-gate close_probe_socket(pii, _B_TRUE); 587*0Sstevel@tonic-gate pii->pii_probe_logint = NULL; 588*0Sstevel@tonic-gate } 589*0Sstevel@tonic-gate 590*0Sstevel@tonic-gate phyint_chstate(pi, PI_OFFLINE); 591*0Sstevel@tonic-gate } 592*0Sstevel@tonic-gate 593*0Sstevel@tonic-gate /* 594*0Sstevel@tonic-gate * Do the test address selection for each phyint instance. Pick an 595*0Sstevel@tonic-gate * IFF_NOFAILOVER address as test address. For singleton case, 596*0Sstevel@tonic-gate * if user didn't configure an IFF_NOFAILOVER address, we will pick a 597*0Sstevel@tonic-gate * normal address as test address. For (multiple adapter) groups, 598*0Sstevel@tonic-gate * user is required to configure IFF_NOFAILOVER test address. Call 599*0Sstevel@tonic-gate * phyint_inst_sockinit() to complete the initializations. 600*0Sstevel@tonic-gate */ 601*0Sstevel@tonic-gate static void 602*0Sstevel@tonic-gate select_test_ifs(void) 603*0Sstevel@tonic-gate { 604*0Sstevel@tonic-gate struct phyint *pi; 605*0Sstevel@tonic-gate struct phyint_instance *pii; 606*0Sstevel@tonic-gate struct phyint_instance *next_pii; 607*0Sstevel@tonic-gate struct logint *li; 608*0Sstevel@tonic-gate struct logint *test_logint; 609*0Sstevel@tonic-gate boolean_t target_scan_reqd = _B_FALSE; 610*0Sstevel@tonic-gate struct target *tg; 611*0Sstevel@tonic-gate 612*0Sstevel@tonic-gate if (debug & D_PHYINT) 613*0Sstevel@tonic-gate logdebug("select_test_ifs\n"); 614*0Sstevel@tonic-gate 615*0Sstevel@tonic-gate /* 616*0Sstevel@tonic-gate * For each phyint instance, do the test address selection 617*0Sstevel@tonic-gate */ 618*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = next_pii) { 619*0Sstevel@tonic-gate next_pii = pii->pii_next; 620*0Sstevel@tonic-gate /* 621*0Sstevel@tonic-gate * An interface that is offline, should not be probed. 622*0Sstevel@tonic-gate * Offline interfaces should always in PI_OFFLINE state, 623*0Sstevel@tonic-gate * unless some other entity has set the offline flag. 624*0Sstevel@tonic-gate */ 625*0Sstevel@tonic-gate if (pii->pii_phyint->pi_flags & IFF_OFFLINE) { 626*0Sstevel@tonic-gate if (pii->pii_phyint->pi_state != PI_OFFLINE) { 627*0Sstevel@tonic-gate logerr("shouldn't be probing offline" 628*0Sstevel@tonic-gate " interface %s (state is: %u)." 629*0Sstevel@tonic-gate " Stopping probes.\n", 630*0Sstevel@tonic-gate pii->pii_phyint->pi_name, 631*0Sstevel@tonic-gate pii->pii_phyint->pi_state); 632*0Sstevel@tonic-gate stop_probing(pii->pii_phyint); 633*0Sstevel@tonic-gate } 634*0Sstevel@tonic-gate continue; 635*0Sstevel@tonic-gate } 636*0Sstevel@tonic-gate 637*0Sstevel@tonic-gate test_logint = pii->pii_probe_logint; 638*0Sstevel@tonic-gate 639*0Sstevel@tonic-gate if (test_logint != NULL) { 640*0Sstevel@tonic-gate if ((test_logint->li_flags & TEST_BEST_FLAG_SET) 641*0Sstevel@tonic-gate == BEST_FLAG_SET) 642*0Sstevel@tonic-gate continue; 643*0Sstevel@tonic-gate 644*0Sstevel@tonic-gate /* 645*0Sstevel@tonic-gate * If user configures IFF_NOXMIT or IFF_NOLOCAL 646*0Sstevel@tonic-gate * flags on test addresses after in.mpathd has 647*0Sstevel@tonic-gate * has started, the daemon aborts. In future 648*0Sstevel@tonic-gate * this can be better handling, i.e. instead 649*0Sstevel@tonic-gate * of abort the daemon, a more appropriate 650*0Sstevel@tonic-gate * action may be issuing a warning and choose 651*0Sstevel@tonic-gate * a different test address. 652*0Sstevel@tonic-gate */ 653*0Sstevel@tonic-gate assert((test_logint->li_flags & TEST_CLEAR_FLAG_SET) 654*0Sstevel@tonic-gate == 0); 655*0Sstevel@tonic-gate } 656*0Sstevel@tonic-gate 657*0Sstevel@tonic-gate /* 658*0Sstevel@tonic-gate * Walk the logints of this phyint instance, and select 659*0Sstevel@tonic-gate * the best available test address 660*0Sstevel@tonic-gate */ 661*0Sstevel@tonic-gate for (li = pii->pii_logint; li != NULL; li = li->li_next) { 662*0Sstevel@tonic-gate /* 663*0Sstevel@tonic-gate * Skip any IPv6 logints that are not link-local, 664*0Sstevel@tonic-gate * since we should always have a link-local address 665*0Sstevel@tonic-gate * anyway and in6_data() expects link-local replies. 666*0Sstevel@tonic-gate */ 667*0Sstevel@tonic-gate if (pii->pii_af == AF_INET6 && 668*0Sstevel@tonic-gate !IN6_IS_ADDR_LINKLOCAL(&li->li_addr)) 669*0Sstevel@tonic-gate continue; 670*0Sstevel@tonic-gate 671*0Sstevel@tonic-gate if ((li->li_flags & TEST_MINIMAL_FLAG_SET) == IFF_UP) { 672*0Sstevel@tonic-gate /* 673*0Sstevel@tonic-gate * Now we have a testaddress, that satisfies 674*0Sstevel@tonic-gate * the minimal properties. 675*0Sstevel@tonic-gate */ 676*0Sstevel@tonic-gate if ((li->li_flags & TEST_BEST_FLAG_SET) 677*0Sstevel@tonic-gate == BEST_FLAG_SET) { 678*0Sstevel@tonic-gate /* 679*0Sstevel@tonic-gate * This is the best possible address. 680*0Sstevel@tonic-gate * So break, and continue to the 681*0Sstevel@tonic-gate * next phyint 682*0Sstevel@tonic-gate */ 683*0Sstevel@tonic-gate test_logint = li; 684*0Sstevel@tonic-gate break; 685*0Sstevel@tonic-gate } 686*0Sstevel@tonic-gate if ((test_logint == NULL) || 687*0Sstevel@tonic-gate (!(test_logint->li_flags & 688*0Sstevel@tonic-gate IFF_NOFAILOVER) && 689*0Sstevel@tonic-gate (li->li_flags & IFF_NOFAILOVER))) 690*0Sstevel@tonic-gate /* 691*0Sstevel@tonic-gate * This is a possible candidate, 692*0Sstevel@tonic-gate * unless we find a better one. 693*0Sstevel@tonic-gate */ 694*0Sstevel@tonic-gate test_logint = li; 695*0Sstevel@tonic-gate } 696*0Sstevel@tonic-gate } 697*0Sstevel@tonic-gate 698*0Sstevel@tonic-gate /* 699*0Sstevel@tonic-gate * If we've gone from a singleton group to a multiple adapter 700*0Sstevel@tonic-gate * group, and we haven't found an IFF_NOFAILOVER test address 701*0Sstevel@tonic-gate * by now, the old test address is no longer valid. If we are 702*0Sstevel@tonic-gate * not dealing with a singleton group, and the above test 703*0Sstevel@tonic-gate * address selection loop has selected a non IFF_NOFAILOVER 704*0Sstevel@tonic-gate * address as a candidate, we will correct that here. 705*0Sstevel@tonic-gate */ 706*0Sstevel@tonic-gate if ((test_logint != NULL) && 707*0Sstevel@tonic-gate !SINGLETON_GROUP(pii->pii_phyint) && 708*0Sstevel@tonic-gate !(test_logint->li_flags & IFF_NOFAILOVER)) { 709*0Sstevel@tonic-gate test_logint = NULL; 710*0Sstevel@tonic-gate if (pii->pii_probe_sock != -1) 711*0Sstevel@tonic-gate close_probe_socket(pii, _B_TRUE); 712*0Sstevel@tonic-gate pii->pii_probe_logint = NULL; 713*0Sstevel@tonic-gate } 714*0Sstevel@tonic-gate 715*0Sstevel@tonic-gate if (test_logint == NULL) { 716*0Sstevel@tonic-gate /* 717*0Sstevel@tonic-gate * We don't have a test address. Don't print an 718*0Sstevel@tonic-gate * error message immediately. check_config() will 719*0Sstevel@tonic-gate * take care of it. Zero out the probe stats array 720*0Sstevel@tonic-gate * since it is no longer relevant. Optimize by 721*0Sstevel@tonic-gate * checking if it is already zeroed out. 722*0Sstevel@tonic-gate */ 723*0Sstevel@tonic-gate int pr_ndx; 724*0Sstevel@tonic-gate 725*0Sstevel@tonic-gate pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next); 726*0Sstevel@tonic-gate if (pii->pii_probes[pr_ndx].pr_status != PR_UNUSED) { 727*0Sstevel@tonic-gate clear_pii_probe_stats(pii); 728*0Sstevel@tonic-gate reset_crtt_all(pii->pii_phyint); 729*0Sstevel@tonic-gate } 730*0Sstevel@tonic-gate continue; 731*0Sstevel@tonic-gate } else if (test_logint == pii->pii_probe_logint) { 732*0Sstevel@tonic-gate /* 733*0Sstevel@tonic-gate * If we didn't find any new test addr, go to the 734*0Sstevel@tonic-gate * next phyint. 735*0Sstevel@tonic-gate */ 736*0Sstevel@tonic-gate continue; 737*0Sstevel@tonic-gate } 738*0Sstevel@tonic-gate 739*0Sstevel@tonic-gate /* 740*0Sstevel@tonic-gate * The phyint is either being assigned a new testaddr 741*0Sstevel@tonic-gate * or is being assigned a testaddr for the 1st time. 742*0Sstevel@tonic-gate * Need to initialize the phyint socket 743*0Sstevel@tonic-gate */ 744*0Sstevel@tonic-gate pii->pii_probe_logint = test_logint; 745*0Sstevel@tonic-gate if (!phyint_inst_sockinit(pii)) { 746*0Sstevel@tonic-gate if (debug & D_PHYINT) { 747*0Sstevel@tonic-gate logdebug("select_test_ifs: " 748*0Sstevel@tonic-gate "phyint_sockinit failed\n"); 749*0Sstevel@tonic-gate } 750*0Sstevel@tonic-gate phyint_inst_delete(pii); 751*0Sstevel@tonic-gate continue; 752*0Sstevel@tonic-gate } 753*0Sstevel@tonic-gate 754*0Sstevel@tonic-gate /* 755*0Sstevel@tonic-gate * This phyint instance is now enabled for probes; this 756*0Sstevel@tonic-gate * impacts our state machine in two ways: 757*0Sstevel@tonic-gate * 758*0Sstevel@tonic-gate * 1. If we're probe *capable* as well (i.e., we have 759*0Sstevel@tonic-gate * probe targets) and the interface is in PI_NOTARGETS, 760*0Sstevel@tonic-gate * then transition to PI_RUNNING. 761*0Sstevel@tonic-gate * 762*0Sstevel@tonic-gate * 2. If we're not probe capable, and the other phyint 763*0Sstevel@tonic-gate * instance is also not probe capable, and we were in 764*0Sstevel@tonic-gate * PI_RUNNING, then transition to PI_NOTARGETS. 765*0Sstevel@tonic-gate * 766*0Sstevel@tonic-gate * Also see the state diagram in mpd_probe.c. 767*0Sstevel@tonic-gate */ 768*0Sstevel@tonic-gate if (PROBE_CAPABLE(pii)) { 769*0Sstevel@tonic-gate if (pii->pii_phyint->pi_state == PI_NOTARGETS) 770*0Sstevel@tonic-gate phyint_chstate(pii->pii_phyint, PI_RUNNING); 771*0Sstevel@tonic-gate } else if (!PROBE_CAPABLE(phyint_inst_other(pii))) { 772*0Sstevel@tonic-gate if (pii->pii_phyint->pi_state == PI_RUNNING) 773*0Sstevel@tonic-gate phyint_chstate(pii->pii_phyint, PI_NOTARGETS); 774*0Sstevel@tonic-gate } 775*0Sstevel@tonic-gate 776*0Sstevel@tonic-gate if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) { 777*0Sstevel@tonic-gate tg = pii->pii_targets; 778*0Sstevel@tonic-gate if (tg != NULL) 779*0Sstevel@tonic-gate target_delete(tg); 780*0Sstevel@tonic-gate assert(pii->pii_targets == NULL); 781*0Sstevel@tonic-gate assert(pii->pii_target_next == NULL); 782*0Sstevel@tonic-gate assert(pii->pii_ntargets == 0); 783*0Sstevel@tonic-gate target_create(pii, test_logint->li_dstaddr, 784*0Sstevel@tonic-gate _B_TRUE); 785*0Sstevel@tonic-gate } 786*0Sstevel@tonic-gate 787*0Sstevel@tonic-gate /* 788*0Sstevel@tonic-gate * If no targets are currently known for this phyint 789*0Sstevel@tonic-gate * we need to call init_router_targets. Since 790*0Sstevel@tonic-gate * init_router_targets() initializes the list of targets 791*0Sstevel@tonic-gate * for all phyints it is done below the loop. 792*0Sstevel@tonic-gate */ 793*0Sstevel@tonic-gate if (pii->pii_targets == NULL) 794*0Sstevel@tonic-gate target_scan_reqd = _B_TRUE; 795*0Sstevel@tonic-gate 796*0Sstevel@tonic-gate /* 797*0Sstevel@tonic-gate * Start the probe timer for this instance. 798*0Sstevel@tonic-gate */ 799*0Sstevel@tonic-gate if (!pii->pii_basetime_inited && pii->pii_probe_sock != -1) { 800*0Sstevel@tonic-gate start_timer(pii); 801*0Sstevel@tonic-gate pii->pii_basetime_inited = 1; 802*0Sstevel@tonic-gate } 803*0Sstevel@tonic-gate } 804*0Sstevel@tonic-gate 805*0Sstevel@tonic-gate /* 806*0Sstevel@tonic-gate * Check the interface list for any interfaces that are marked 807*0Sstevel@tonic-gate * PI_FAILED but no longer enabled to send probes, and call 808*0Sstevel@tonic-gate * phyint_check_for_repair() to see if the link now indicates that the 809*0Sstevel@tonic-gate * interface should be repaired. Also see the state diagram in 810*0Sstevel@tonic-gate * mpd_probe.c. 811*0Sstevel@tonic-gate */ 812*0Sstevel@tonic-gate for (pi = phyints; pi != NULL; pi = pi->pi_next) { 813*0Sstevel@tonic-gate if (pi->pi_state == PI_FAILED && 814*0Sstevel@tonic-gate !PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) { 815*0Sstevel@tonic-gate phyint_check_for_repair(pi); 816*0Sstevel@tonic-gate } 817*0Sstevel@tonic-gate } 818*0Sstevel@tonic-gate 819*0Sstevel@tonic-gate /* 820*0Sstevel@tonic-gate * Try to populate the target list. init_router_targets populates 821*0Sstevel@tonic-gate * the target list from the routing table. If our target list is 822*0Sstevel@tonic-gate * still empty, init_host_targets adds host targets based on the 823*0Sstevel@tonic-gate * host target list of other phyints in the group. 824*0Sstevel@tonic-gate */ 825*0Sstevel@tonic-gate if (target_scan_reqd) { 826*0Sstevel@tonic-gate init_router_targets(); 827*0Sstevel@tonic-gate init_host_targets(); 828*0Sstevel@tonic-gate } 829*0Sstevel@tonic-gate } 830*0Sstevel@tonic-gate 831*0Sstevel@tonic-gate /* 832*0Sstevel@tonic-gate * Check phyint group configuration, to detect any inconsistencies, 833*0Sstevel@tonic-gate * and log an error message. This is called from runtimeouts every 834*0Sstevel@tonic-gate * 20 secs. But the error message is displayed once. If the 835*0Sstevel@tonic-gate * consistency is resolved by the admin, a recovery message is displayed 836*0Sstevel@tonic-gate * once. 837*0Sstevel@tonic-gate */ 838*0Sstevel@tonic-gate static void 839*0Sstevel@tonic-gate check_config(void) 840*0Sstevel@tonic-gate { 841*0Sstevel@tonic-gate struct phyint_group *pg; 842*0Sstevel@tonic-gate struct phyint *pi; 843*0Sstevel@tonic-gate boolean_t v4_in_group; 844*0Sstevel@tonic-gate boolean_t v6_in_group; 845*0Sstevel@tonic-gate 846*0Sstevel@tonic-gate /* 847*0Sstevel@tonic-gate * All phyints of a group must be homogenous to ensure that 848*0Sstevel@tonic-gate * failover or failback can be done. If any phyint in a group 849*0Sstevel@tonic-gate * has IPv4 plumbed, check that all phyints have IPv4 plumbed. 850*0Sstevel@tonic-gate * Do a similar check for IPv6. 851*0Sstevel@tonic-gate */ 852*0Sstevel@tonic-gate for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) { 853*0Sstevel@tonic-gate if (pg == phyint_anongroup) 854*0Sstevel@tonic-gate continue; 855*0Sstevel@tonic-gate 856*0Sstevel@tonic-gate v4_in_group = _B_FALSE; 857*0Sstevel@tonic-gate v6_in_group = _B_FALSE; 858*0Sstevel@tonic-gate /* 859*0Sstevel@tonic-gate * 1st pass. Determine if at least 1 phyint in the group 860*0Sstevel@tonic-gate * has IPv4 plumbed and if so set v4_in_group to true. 861*0Sstevel@tonic-gate * Repeat similarly for IPv6. 862*0Sstevel@tonic-gate */ 863*0Sstevel@tonic-gate for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 864*0Sstevel@tonic-gate if (pi->pi_v4 != NULL) 865*0Sstevel@tonic-gate v4_in_group = _B_TRUE; 866*0Sstevel@tonic-gate if (pi->pi_v6 != NULL) 867*0Sstevel@tonic-gate v6_in_group = _B_TRUE; 868*0Sstevel@tonic-gate } 869*0Sstevel@tonic-gate 870*0Sstevel@tonic-gate /* 871*0Sstevel@tonic-gate * 2nd pass. If v4_in_group is true, check that phyint 872*0Sstevel@tonic-gate * has IPv4 plumbed. Repeat similarly for IPv6. Print 873*0Sstevel@tonic-gate * out a message the 1st time only. 874*0Sstevel@tonic-gate */ 875*0Sstevel@tonic-gate for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 876*0Sstevel@tonic-gate if (pi->pi_flags & IFF_OFFLINE) 877*0Sstevel@tonic-gate continue; 878*0Sstevel@tonic-gate 879*0Sstevel@tonic-gate if (v4_in_group == _B_TRUE && pi->pi_v4 == NULL) { 880*0Sstevel@tonic-gate if (!pi->pi_cfgmsg_printed) { 881*0Sstevel@tonic-gate logerr("NIC %s of group %s is" 882*0Sstevel@tonic-gate " not plumbed for IPv4 and may" 883*0Sstevel@tonic-gate " affect failover capability\n", 884*0Sstevel@tonic-gate pi->pi_name, 885*0Sstevel@tonic-gate pi->pi_group->pg_name); 886*0Sstevel@tonic-gate pi->pi_cfgmsg_printed = 1; 887*0Sstevel@tonic-gate } 888*0Sstevel@tonic-gate } else if (v6_in_group == _B_TRUE && 889*0Sstevel@tonic-gate pi->pi_v6 == NULL) { 890*0Sstevel@tonic-gate if (!pi->pi_cfgmsg_printed) { 891*0Sstevel@tonic-gate logerr("NIC %s of group %s is" 892*0Sstevel@tonic-gate " not plumbed for IPv6 and may" 893*0Sstevel@tonic-gate " affect failover capability\n", 894*0Sstevel@tonic-gate pi->pi_name, 895*0Sstevel@tonic-gate pi->pi_group->pg_name); 896*0Sstevel@tonic-gate pi->pi_cfgmsg_printed = 1; 897*0Sstevel@tonic-gate } 898*0Sstevel@tonic-gate } else { 899*0Sstevel@tonic-gate /* 900*0Sstevel@tonic-gate * The phyint matches the group configuration, 901*0Sstevel@tonic-gate * if we have reached this point. If it was 902*0Sstevel@tonic-gate * improperly configured earlier, log an 903*0Sstevel@tonic-gate * error recovery message 904*0Sstevel@tonic-gate */ 905*0Sstevel@tonic-gate if (pi->pi_cfgmsg_printed) { 906*0Sstevel@tonic-gate logerr("NIC %s is now consistent with " 907*0Sstevel@tonic-gate "group %s and failover capability " 908*0Sstevel@tonic-gate "is restored\n", pi->pi_name, 909*0Sstevel@tonic-gate pi->pi_group->pg_name); 910*0Sstevel@tonic-gate pi->pi_cfgmsg_printed = 0; 911*0Sstevel@tonic-gate } 912*0Sstevel@tonic-gate } 913*0Sstevel@tonic-gate 914*0Sstevel@tonic-gate } 915*0Sstevel@tonic-gate } 916*0Sstevel@tonic-gate 917*0Sstevel@tonic-gate /* 918*0Sstevel@tonic-gate * In order to perform probe-based failure detection, a phyint must 919*0Sstevel@tonic-gate * have at least 1 test/probe address for sending and receiving probes 920*0Sstevel@tonic-gate * (either on IPv4 or IPv6 instance or both). If no test address has 921*0Sstevel@tonic-gate * been configured, notify the administrator, but continue on since we 922*0Sstevel@tonic-gate * can still perform load spreading, along with "link up/down" based 923*0Sstevel@tonic-gate * failure detection. 924*0Sstevel@tonic-gate * 925*0Sstevel@tonic-gate * Note: In the singleton group case, when user didn't configure 926*0Sstevel@tonic-gate * a test address, the probe address is picked by this daemon. 927*0Sstevel@tonic-gate */ 928*0Sstevel@tonic-gate for (pi = phyints; pi != NULL; pi = pi->pi_next) { 929*0Sstevel@tonic-gate if (pi->pi_flags & IFF_OFFLINE) 930*0Sstevel@tonic-gate continue; 931*0Sstevel@tonic-gate 932*0Sstevel@tonic-gate if ((pi->pi_v4 == NULL || 933*0Sstevel@tonic-gate pi->pi_v4->pii_probe_logint == NULL) && 934*0Sstevel@tonic-gate (pi->pi_v6 == NULL || 935*0Sstevel@tonic-gate pi->pi_v6->pii_probe_logint == NULL)) { 936*0Sstevel@tonic-gate if (!pi->pi_taddrmsg_printed) { 937*0Sstevel@tonic-gate logerr("No test address configured on " 938*0Sstevel@tonic-gate "interface %s; disabling probe-based " 939*0Sstevel@tonic-gate "failure detection on it\n", pi->pi_name); 940*0Sstevel@tonic-gate pi->pi_taddrmsg_printed = 1; 941*0Sstevel@tonic-gate } 942*0Sstevel@tonic-gate } else if (pi->pi_taddrmsg_printed) { 943*0Sstevel@tonic-gate logerr("Test address now configured on interface %s; " 944*0Sstevel@tonic-gate "enabling probe-based failure detection on it\n", 945*0Sstevel@tonic-gate pi->pi_name); 946*0Sstevel@tonic-gate pi->pi_taddrmsg_printed = 0; 947*0Sstevel@tonic-gate } 948*0Sstevel@tonic-gate 949*0Sstevel@tonic-gate } 950*0Sstevel@tonic-gate } 951*0Sstevel@tonic-gate 952*0Sstevel@tonic-gate /* 953*0Sstevel@tonic-gate * Timer mechanism using relative time (in milliseconds) from the 954*0Sstevel@tonic-gate * previous timer event. Timers exceeding TIMER_INFINITY milliseconds 955*0Sstevel@tonic-gate * will fire after TIMER_INFINITY milliseconds. 956*0Sstevel@tonic-gate * Unsigned arithmetic note: We assume a 32-bit circular sequence space for 957*0Sstevel@tonic-gate * time values. Hence 2 consecutive timer events cannot be spaced farther 958*0Sstevel@tonic-gate * than 0x7fffffff. We call this TIMER_INFINITY, and it is the maximum value 959*0Sstevel@tonic-gate * that can be passed for the delay parameter of timer_schedule() 960*0Sstevel@tonic-gate */ 961*0Sstevel@tonic-gate static uint_t timer_next; /* Currently scheduled timeout */ 962*0Sstevel@tonic-gate static boolean_t timer_active = _B_FALSE; /* SIGALRM has not yet occurred */ 963*0Sstevel@tonic-gate 964*0Sstevel@tonic-gate static void 965*0Sstevel@tonic-gate timer_init(void) 966*0Sstevel@tonic-gate { 967*0Sstevel@tonic-gate timer_next = getcurrenttime() + TIMER_INFINITY; 968*0Sstevel@tonic-gate /* 969*0Sstevel@tonic-gate * The call to run_timeouts() will get the timer started 970*0Sstevel@tonic-gate * Since there are no phyints at this point, the timer will 971*0Sstevel@tonic-gate * be set for IF_SCAN_INTERVAL ms. 972*0Sstevel@tonic-gate */ 973*0Sstevel@tonic-gate run_timeouts(); 974*0Sstevel@tonic-gate } 975*0Sstevel@tonic-gate 976*0Sstevel@tonic-gate /* 977*0Sstevel@tonic-gate * Make sure the next SIGALRM occurs delay milliseconds from the current 978*0Sstevel@tonic-gate * time if not earlier. We are interested only in time differences. 979*0Sstevel@tonic-gate */ 980*0Sstevel@tonic-gate void 981*0Sstevel@tonic-gate timer_schedule(uint_t delay) 982*0Sstevel@tonic-gate { 983*0Sstevel@tonic-gate uint_t now; 984*0Sstevel@tonic-gate struct itimerval itimerval; 985*0Sstevel@tonic-gate 986*0Sstevel@tonic-gate if (debug & D_TIMER) 987*0Sstevel@tonic-gate logdebug("timer_schedule(%u)\n", delay); 988*0Sstevel@tonic-gate 989*0Sstevel@tonic-gate assert(delay <= TIMER_INFINITY); 990*0Sstevel@tonic-gate 991*0Sstevel@tonic-gate now = getcurrenttime(); 992*0Sstevel@tonic-gate if (delay == 0) { 993*0Sstevel@tonic-gate /* Minimum allowed delay */ 994*0Sstevel@tonic-gate delay = 1; 995*0Sstevel@tonic-gate } 996*0Sstevel@tonic-gate /* Will this timer occur before the currently scheduled SIGALRM? */ 997*0Sstevel@tonic-gate if (timer_active && TIME_GE(now + delay, timer_next)) { 998*0Sstevel@tonic-gate if (debug & D_TIMER) { 999*0Sstevel@tonic-gate logdebug("timer_schedule(%u) - no action: " 1000*0Sstevel@tonic-gate "now %u next %u\n", delay, now, timer_next); 1001*0Sstevel@tonic-gate } 1002*0Sstevel@tonic-gate return; 1003*0Sstevel@tonic-gate } 1004*0Sstevel@tonic-gate timer_next = now + delay; 1005*0Sstevel@tonic-gate 1006*0Sstevel@tonic-gate itimerval.it_value.tv_sec = delay / 1000; 1007*0Sstevel@tonic-gate itimerval.it_value.tv_usec = (delay % 1000) * 1000; 1008*0Sstevel@tonic-gate itimerval.it_interval.tv_sec = 0; 1009*0Sstevel@tonic-gate itimerval.it_interval.tv_usec = 0; 1010*0Sstevel@tonic-gate if (debug & D_TIMER) { 1011*0Sstevel@tonic-gate logdebug("timer_schedule(%u): sec %ld usec %ld\n", 1012*0Sstevel@tonic-gate delay, itimerval.it_value.tv_sec, 1013*0Sstevel@tonic-gate itimerval.it_value.tv_usec); 1014*0Sstevel@tonic-gate } 1015*0Sstevel@tonic-gate timer_active = _B_TRUE; 1016*0Sstevel@tonic-gate if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0) { 1017*0Sstevel@tonic-gate logperror("timer_schedule: setitimer"); 1018*0Sstevel@tonic-gate exit(2); 1019*0Sstevel@tonic-gate } 1020*0Sstevel@tonic-gate } 1021*0Sstevel@tonic-gate 1022*0Sstevel@tonic-gate /* 1023*0Sstevel@tonic-gate * Timer has fired. Determine when the next timer event will occur by asking 1024*0Sstevel@tonic-gate * all the timer routines. Should not be called from a timer routine. 1025*0Sstevel@tonic-gate */ 1026*0Sstevel@tonic-gate static void 1027*0Sstevel@tonic-gate run_timeouts(void) 1028*0Sstevel@tonic-gate { 1029*0Sstevel@tonic-gate uint_t next; 1030*0Sstevel@tonic-gate uint_t next_event_time; 1031*0Sstevel@tonic-gate struct phyint_instance *pii; 1032*0Sstevel@tonic-gate struct phyint_instance *next_pii; 1033*0Sstevel@tonic-gate static boolean_t timeout_running; 1034*0Sstevel@tonic-gate 1035*0Sstevel@tonic-gate /* assert that recursive timeouts don't happen. */ 1036*0Sstevel@tonic-gate assert(!timeout_running); 1037*0Sstevel@tonic-gate 1038*0Sstevel@tonic-gate timeout_running = _B_TRUE; 1039*0Sstevel@tonic-gate 1040*0Sstevel@tonic-gate if (debug & D_TIMER) 1041*0Sstevel@tonic-gate logdebug("run_timeouts()\n"); 1042*0Sstevel@tonic-gate 1043*0Sstevel@tonic-gate next = TIMER_INFINITY; 1044*0Sstevel@tonic-gate 1045*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = next_pii) { 1046*0Sstevel@tonic-gate next_pii = pii->pii_next; 1047*0Sstevel@tonic-gate next_event_time = phyint_inst_timer(pii); 1048*0Sstevel@tonic-gate if (next_event_time != TIMER_INFINITY && next_event_time < next) 1049*0Sstevel@tonic-gate next = next_event_time; 1050*0Sstevel@tonic-gate 1051*0Sstevel@tonic-gate if (debug & D_TIMER) { 1052*0Sstevel@tonic-gate logdebug("run_timeouts(%s %s): next scheduled for" 1053*0Sstevel@tonic-gate " this phyint inst %u, next scheduled global" 1054*0Sstevel@tonic-gate " %u ms\n", 1055*0Sstevel@tonic-gate AF_STR(pii->pii_af), pii->pii_phyint->pi_name, 1056*0Sstevel@tonic-gate next_event_time, next); 1057*0Sstevel@tonic-gate } 1058*0Sstevel@tonic-gate } 1059*0Sstevel@tonic-gate 1060*0Sstevel@tonic-gate /* 1061*0Sstevel@tonic-gate * Make sure initifs() is called at least once every 1062*0Sstevel@tonic-gate * IF_SCAN_INTERVAL, to make sure that we are in sync 1063*0Sstevel@tonic-gate * with the kernel, in case we have missed any routing 1064*0Sstevel@tonic-gate * socket messages. 1065*0Sstevel@tonic-gate */ 1066*0Sstevel@tonic-gate if (next > IF_SCAN_INTERVAL) 1067*0Sstevel@tonic-gate next = IF_SCAN_INTERVAL; 1068*0Sstevel@tonic-gate 1069*0Sstevel@tonic-gate if ((getcurrenttime() - last_initifs_time) > IF_SCAN_INTERVAL) { 1070*0Sstevel@tonic-gate initifs(); 1071*0Sstevel@tonic-gate check_config(); 1072*0Sstevel@tonic-gate } 1073*0Sstevel@tonic-gate 1074*0Sstevel@tonic-gate if (debug & D_TIMER) 1075*0Sstevel@tonic-gate logdebug("run_timeouts: %u ms\n", next); 1076*0Sstevel@tonic-gate 1077*0Sstevel@tonic-gate timer_schedule(next); 1078*0Sstevel@tonic-gate timeout_running = _B_FALSE; 1079*0Sstevel@tonic-gate } 1080*0Sstevel@tonic-gate 1081*0Sstevel@tonic-gate static int eventpipe_read = -1; /* Used for synchronous signal delivery */ 1082*0Sstevel@tonic-gate static int eventpipe_write = -1; 1083*0Sstevel@tonic-gate static boolean_t cleanup_started = _B_FALSE; 1084*0Sstevel@tonic-gate /* Don't write to eventpipe if in cleanup */ 1085*0Sstevel@tonic-gate /* 1086*0Sstevel@tonic-gate * Ensure that signals are processed synchronously with the rest of 1087*0Sstevel@tonic-gate * the code by just writing a one character signal number on the pipe. 1088*0Sstevel@tonic-gate * The poll loop will pick this up and process the signal event. 1089*0Sstevel@tonic-gate */ 1090*0Sstevel@tonic-gate static void 1091*0Sstevel@tonic-gate sig_handler(int signo) 1092*0Sstevel@tonic-gate { 1093*0Sstevel@tonic-gate uchar_t buf = (uchar_t)signo; 1094*0Sstevel@tonic-gate 1095*0Sstevel@tonic-gate /* 1096*0Sstevel@tonic-gate * Don't write to pipe if cleanup has already begun. cleanup() 1097*0Sstevel@tonic-gate * might have closed the pipe already 1098*0Sstevel@tonic-gate */ 1099*0Sstevel@tonic-gate if (cleanup_started) 1100*0Sstevel@tonic-gate return; 1101*0Sstevel@tonic-gate 1102*0Sstevel@tonic-gate if (eventpipe_write == -1) { 1103*0Sstevel@tonic-gate logerr("sig_handler: no pipe found\n"); 1104*0Sstevel@tonic-gate return; 1105*0Sstevel@tonic-gate } 1106*0Sstevel@tonic-gate if (write(eventpipe_write, &buf, sizeof (buf)) < 0) 1107*0Sstevel@tonic-gate logperror("sig_handler: write"); 1108*0Sstevel@tonic-gate } 1109*0Sstevel@tonic-gate 1110*0Sstevel@tonic-gate extern struct probes_missed probes_missed; 1111*0Sstevel@tonic-gate 1112*0Sstevel@tonic-gate /* 1113*0Sstevel@tonic-gate * Pick up a signal "byte" from the pipe and process it. 1114*0Sstevel@tonic-gate */ 1115*0Sstevel@tonic-gate static void 1116*0Sstevel@tonic-gate in_signal(int fd) 1117*0Sstevel@tonic-gate { 1118*0Sstevel@tonic-gate uchar_t buf; 1119*0Sstevel@tonic-gate uint64_t sent, acked, lost, unacked, unknown; 1120*0Sstevel@tonic-gate struct phyint_instance *pii; 1121*0Sstevel@tonic-gate int pr_ndx; 1122*0Sstevel@tonic-gate 1123*0Sstevel@tonic-gate switch (read(fd, &buf, sizeof (buf))) { 1124*0Sstevel@tonic-gate case -1: 1125*0Sstevel@tonic-gate logperror("in_signal: read"); 1126*0Sstevel@tonic-gate exit(1); 1127*0Sstevel@tonic-gate /* NOTREACHED */ 1128*0Sstevel@tonic-gate case 1: 1129*0Sstevel@tonic-gate break; 1130*0Sstevel@tonic-gate case 0: 1131*0Sstevel@tonic-gate logerr("in_signal: read end of file\n"); 1132*0Sstevel@tonic-gate exit(1); 1133*0Sstevel@tonic-gate /* NOTREACHED */ 1134*0Sstevel@tonic-gate default: 1135*0Sstevel@tonic-gate logerr("in_signal: read > 1\n"); 1136*0Sstevel@tonic-gate exit(1); 1137*0Sstevel@tonic-gate } 1138*0Sstevel@tonic-gate 1139*0Sstevel@tonic-gate if (debug & D_TIMER) 1140*0Sstevel@tonic-gate logdebug("in_signal() got %d\n", buf); 1141*0Sstevel@tonic-gate 1142*0Sstevel@tonic-gate switch (buf) { 1143*0Sstevel@tonic-gate case SIGALRM: 1144*0Sstevel@tonic-gate if (debug & D_TIMER) { 1145*0Sstevel@tonic-gate uint_t now = getcurrenttime(); 1146*0Sstevel@tonic-gate 1147*0Sstevel@tonic-gate logdebug("in_signal(SIGALRM) delta %u\n", 1148*0Sstevel@tonic-gate now - timer_next); 1149*0Sstevel@tonic-gate } 1150*0Sstevel@tonic-gate timer_active = _B_FALSE; 1151*0Sstevel@tonic-gate run_timeouts(); 1152*0Sstevel@tonic-gate break; 1153*0Sstevel@tonic-gate case SIGUSR1: 1154*0Sstevel@tonic-gate logdebug("Printing configuration:\n"); 1155*0Sstevel@tonic-gate /* Print out the internal tables */ 1156*0Sstevel@tonic-gate phyint_inst_print_all(); 1157*0Sstevel@tonic-gate 1158*0Sstevel@tonic-gate /* 1159*0Sstevel@tonic-gate * Print out the accumulated statistics about missed 1160*0Sstevel@tonic-gate * probes (happens due to scheduling delay). 1161*0Sstevel@tonic-gate */ 1162*0Sstevel@tonic-gate logerr("Missed sending total of %d probes spread over" 1163*0Sstevel@tonic-gate " %d occurrences\n", probes_missed.pm_nprobes, 1164*0Sstevel@tonic-gate probes_missed.pm_ntimes); 1165*0Sstevel@tonic-gate 1166*0Sstevel@tonic-gate /* 1167*0Sstevel@tonic-gate * Print out the accumulated statistics about probes 1168*0Sstevel@tonic-gate * that were sent. 1169*0Sstevel@tonic-gate */ 1170*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; 1171*0Sstevel@tonic-gate pii = pii->pii_next) { 1172*0Sstevel@tonic-gate unacked = 0; 1173*0Sstevel@tonic-gate acked = pii->pii_cum_stats.acked; 1174*0Sstevel@tonic-gate lost = pii->pii_cum_stats.lost; 1175*0Sstevel@tonic-gate sent = pii->pii_cum_stats.sent; 1176*0Sstevel@tonic-gate unknown = pii->pii_cum_stats.unknown; 1177*0Sstevel@tonic-gate for (pr_ndx = 0; pr_ndx < PROBE_STATS_COUNT; pr_ndx++) { 1178*0Sstevel@tonic-gate switch (pii->pii_probes[pr_ndx].pr_status) { 1179*0Sstevel@tonic-gate case PR_ACKED: 1180*0Sstevel@tonic-gate acked++; 1181*0Sstevel@tonic-gate break; 1182*0Sstevel@tonic-gate case PR_LOST: 1183*0Sstevel@tonic-gate lost++; 1184*0Sstevel@tonic-gate break; 1185*0Sstevel@tonic-gate case PR_UNACKED: 1186*0Sstevel@tonic-gate unacked++; 1187*0Sstevel@tonic-gate break; 1188*0Sstevel@tonic-gate } 1189*0Sstevel@tonic-gate } 1190*0Sstevel@tonic-gate logerr("\nProbe stats on (%s %s)\n" 1191*0Sstevel@tonic-gate "Number of probes sent %lld\n" 1192*0Sstevel@tonic-gate "Number of probe acks received %lld\n" 1193*0Sstevel@tonic-gate "Number of probes/acks lost %lld\n" 1194*0Sstevel@tonic-gate "Number of valid unacknowled probes %lld\n" 1195*0Sstevel@tonic-gate "Number of ambiguous probe acks received %lld\n", 1196*0Sstevel@tonic-gate AF_STR(pii->pii_af), pii->pii_name, 1197*0Sstevel@tonic-gate sent, acked, lost, unacked, unknown); 1198*0Sstevel@tonic-gate } 1199*0Sstevel@tonic-gate break; 1200*0Sstevel@tonic-gate case SIGHUP: 1201*0Sstevel@tonic-gate logerr("SIGHUP: restart and reread config file\n"); 1202*0Sstevel@tonic-gate cleanup(); 1203*0Sstevel@tonic-gate (void) execv(argv0[0], argv0); 1204*0Sstevel@tonic-gate _exit(0177); 1205*0Sstevel@tonic-gate /* NOTREACHED */ 1206*0Sstevel@tonic-gate case SIGINT: 1207*0Sstevel@tonic-gate case SIGTERM: 1208*0Sstevel@tonic-gate case SIGQUIT: 1209*0Sstevel@tonic-gate cleanup(); 1210*0Sstevel@tonic-gate exit(0); 1211*0Sstevel@tonic-gate /* NOTREACHED */ 1212*0Sstevel@tonic-gate default: 1213*0Sstevel@tonic-gate logerr("in_signal: unknown signal: %d\n", buf); 1214*0Sstevel@tonic-gate } 1215*0Sstevel@tonic-gate } 1216*0Sstevel@tonic-gate 1217*0Sstevel@tonic-gate static void 1218*0Sstevel@tonic-gate cleanup(void) 1219*0Sstevel@tonic-gate { 1220*0Sstevel@tonic-gate struct phyint_instance *pii; 1221*0Sstevel@tonic-gate struct phyint_instance *next_pii; 1222*0Sstevel@tonic-gate 1223*0Sstevel@tonic-gate /* 1224*0Sstevel@tonic-gate * Make sure that we don't write to eventpipe in 1225*0Sstevel@tonic-gate * sig_handler() if any signal notably SIGALRM, 1226*0Sstevel@tonic-gate * occurs after we close the eventpipe descriptor below 1227*0Sstevel@tonic-gate */ 1228*0Sstevel@tonic-gate cleanup_started = _B_TRUE; 1229*0Sstevel@tonic-gate 1230*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = next_pii) { 1231*0Sstevel@tonic-gate next_pii = pii->pii_next; 1232*0Sstevel@tonic-gate phyint_inst_delete(pii); 1233*0Sstevel@tonic-gate } 1234*0Sstevel@tonic-gate 1235*0Sstevel@tonic-gate (void) close(ifsock_v4); 1236*0Sstevel@tonic-gate (void) close(ifsock_v6); 1237*0Sstevel@tonic-gate (void) close(rtsock_v4); 1238*0Sstevel@tonic-gate (void) close(rtsock_v6); 1239*0Sstevel@tonic-gate (void) close(lsock_v4); 1240*0Sstevel@tonic-gate (void) close(lsock_v6); 1241*0Sstevel@tonic-gate (void) close(0); 1242*0Sstevel@tonic-gate (void) close(1); 1243*0Sstevel@tonic-gate (void) close(2); 1244*0Sstevel@tonic-gate (void) close(mibfd); 1245*0Sstevel@tonic-gate (void) close(eventpipe_read); 1246*0Sstevel@tonic-gate (void) close(eventpipe_write); 1247*0Sstevel@tonic-gate } 1248*0Sstevel@tonic-gate 1249*0Sstevel@tonic-gate /* 1250*0Sstevel@tonic-gate * Create pipe for signal delivery and set up signal handlers. 1251*0Sstevel@tonic-gate */ 1252*0Sstevel@tonic-gate static void 1253*0Sstevel@tonic-gate setup_eventpipe(void) 1254*0Sstevel@tonic-gate { 1255*0Sstevel@tonic-gate int fds[2]; 1256*0Sstevel@tonic-gate struct sigaction act; 1257*0Sstevel@tonic-gate 1258*0Sstevel@tonic-gate if ((pipe(fds)) < 0) { 1259*0Sstevel@tonic-gate logperror("setup_eventpipe: pipe"); 1260*0Sstevel@tonic-gate exit(1); 1261*0Sstevel@tonic-gate } 1262*0Sstevel@tonic-gate eventpipe_read = fds[0]; 1263*0Sstevel@tonic-gate eventpipe_write = fds[1]; 1264*0Sstevel@tonic-gate if (poll_add(eventpipe_read) == -1) { 1265*0Sstevel@tonic-gate exit(1); 1266*0Sstevel@tonic-gate } 1267*0Sstevel@tonic-gate 1268*0Sstevel@tonic-gate act.sa_handler = sig_handler; 1269*0Sstevel@tonic-gate act.sa_flags = SA_RESTART; 1270*0Sstevel@tonic-gate (void) sigaction(SIGALRM, &act, NULL); 1271*0Sstevel@tonic-gate 1272*0Sstevel@tonic-gate (void) sigset(SIGHUP, sig_handler); 1273*0Sstevel@tonic-gate (void) sigset(SIGUSR1, sig_handler); 1274*0Sstevel@tonic-gate (void) sigset(SIGTERM, sig_handler); 1275*0Sstevel@tonic-gate (void) sigset(SIGINT, sig_handler); 1276*0Sstevel@tonic-gate (void) sigset(SIGQUIT, sig_handler); 1277*0Sstevel@tonic-gate } 1278*0Sstevel@tonic-gate 1279*0Sstevel@tonic-gate /* 1280*0Sstevel@tonic-gate * Create a routing socket for receiving RTM_IFINFO messages. 1281*0Sstevel@tonic-gate */ 1282*0Sstevel@tonic-gate static int 1283*0Sstevel@tonic-gate setup_rtsock(int af) 1284*0Sstevel@tonic-gate { 1285*0Sstevel@tonic-gate int s; 1286*0Sstevel@tonic-gate int flags; 1287*0Sstevel@tonic-gate 1288*0Sstevel@tonic-gate s = socket(PF_ROUTE, SOCK_RAW, af); 1289*0Sstevel@tonic-gate if (s == -1) { 1290*0Sstevel@tonic-gate logperror("setup_rtsock: socket PF_ROUTE"); 1291*0Sstevel@tonic-gate exit(1); 1292*0Sstevel@tonic-gate } 1293*0Sstevel@tonic-gate if ((flags = fcntl(s, F_GETFL, 0)) < 0) { 1294*0Sstevel@tonic-gate logperror("setup_rtsock: fcntl F_GETFL"); 1295*0Sstevel@tonic-gate (void) close(s); 1296*0Sstevel@tonic-gate exit(1); 1297*0Sstevel@tonic-gate } 1298*0Sstevel@tonic-gate if ((fcntl(s, F_SETFL, flags | O_NONBLOCK)) < 0) { 1299*0Sstevel@tonic-gate logperror("setup_rtsock: fcntl F_SETFL"); 1300*0Sstevel@tonic-gate (void) close(s); 1301*0Sstevel@tonic-gate exit(1); 1302*0Sstevel@tonic-gate } 1303*0Sstevel@tonic-gate if (poll_add(s) == -1) { 1304*0Sstevel@tonic-gate (void) close(s); 1305*0Sstevel@tonic-gate exit(1); 1306*0Sstevel@tonic-gate } 1307*0Sstevel@tonic-gate return (s); 1308*0Sstevel@tonic-gate } 1309*0Sstevel@tonic-gate 1310*0Sstevel@tonic-gate /* 1311*0Sstevel@tonic-gate * Process an RTM_IFINFO message received on a routing socket. 1312*0Sstevel@tonic-gate * The return value indicates whether a full interface scan is required. 1313*0Sstevel@tonic-gate * Link up/down notifications from the NICs are reflected in the 1314*0Sstevel@tonic-gate * IFF_RUNNING flag. 1315*0Sstevel@tonic-gate * If just the state of the IFF_RUNNING interface flag has changed, a 1316*0Sstevel@tonic-gate * a full interface scan isn't required. 1317*0Sstevel@tonic-gate */ 1318*0Sstevel@tonic-gate static boolean_t 1319*0Sstevel@tonic-gate process_rtm_ifinfo(if_msghdr_t *ifm, int type) 1320*0Sstevel@tonic-gate { 1321*0Sstevel@tonic-gate struct sockaddr_dl *sdl; 1322*0Sstevel@tonic-gate struct phyint *pi; 1323*0Sstevel@tonic-gate uint64_t old_flags; 1324*0Sstevel@tonic-gate struct phyint_instance *pii; 1325*0Sstevel@tonic-gate 1326*0Sstevel@tonic-gate assert(ifm->ifm_type == RTM_IFINFO && ifm->ifm_addrs == RTA_IFP); 1327*0Sstevel@tonic-gate 1328*0Sstevel@tonic-gate /* 1329*0Sstevel@tonic-gate * Although the sockaddr_dl structure is directly after the 1330*0Sstevel@tonic-gate * if_msghdr_t structure. At the time of writing, the size of the 1331*0Sstevel@tonic-gate * if_msghdr_t structure is different on 32 and 64 bit kernels, due 1332*0Sstevel@tonic-gate * to the presence of a timeval structure, which contains longs, 1333*0Sstevel@tonic-gate * in the if_data structure. Anyway, we know where the message ends, 1334*0Sstevel@tonic-gate * so we work backwards to get the start of the sockaddr_dl structure. 1335*0Sstevel@tonic-gate */ 1336*0Sstevel@tonic-gate /*LINTED*/ 1337*0Sstevel@tonic-gate sdl = (struct sockaddr_dl *)((char *)ifm + ifm->ifm_msglen - 1338*0Sstevel@tonic-gate sizeof (struct sockaddr_dl)); 1339*0Sstevel@tonic-gate 1340*0Sstevel@tonic-gate assert(sdl->sdl_family == AF_LINK); 1341*0Sstevel@tonic-gate 1342*0Sstevel@tonic-gate /* 1343*0Sstevel@tonic-gate * The interface name is in sdl_data. 1344*0Sstevel@tonic-gate * RTM_IFINFO messages are only generated for logical interface 1345*0Sstevel@tonic-gate * zero, so there is no colon and logical interface number to 1346*0Sstevel@tonic-gate * strip from the name. The name is not null terminated, but 1347*0Sstevel@tonic-gate * there should be enough space in sdl_data to add the null. 1348*0Sstevel@tonic-gate */ 1349*0Sstevel@tonic-gate if (sdl->sdl_nlen >= sizeof (sdl->sdl_data)) { 1350*0Sstevel@tonic-gate if (debug & D_LINKNOTE) 1351*0Sstevel@tonic-gate logdebug("process_rtm_ifinfo: " 1352*0Sstevel@tonic-gate "phyint name too long\n"); 1353*0Sstevel@tonic-gate return (_B_TRUE); 1354*0Sstevel@tonic-gate } 1355*0Sstevel@tonic-gate sdl->sdl_data[sdl->sdl_nlen] = 0; 1356*0Sstevel@tonic-gate 1357*0Sstevel@tonic-gate pi = phyint_lookup(sdl->sdl_data); 1358*0Sstevel@tonic-gate if (pi == NULL) { 1359*0Sstevel@tonic-gate if (debug & D_LINKNOTE) 1360*0Sstevel@tonic-gate logdebug("process_rtm_ifinfo: phyint lookup failed" 1361*0Sstevel@tonic-gate " for %s\n", sdl->sdl_data); 1362*0Sstevel@tonic-gate return (_B_TRUE); 1363*0Sstevel@tonic-gate } 1364*0Sstevel@tonic-gate 1365*0Sstevel@tonic-gate /* 1366*0Sstevel@tonic-gate * We want to try and avoid doing a full interface scan for 1367*0Sstevel@tonic-gate * link state notifications from the NICs, as indicated 1368*0Sstevel@tonic-gate * by the state of the IFF_RUNNING flag. If just the 1369*0Sstevel@tonic-gate * IFF_RUNNING flag has changed state, the link state changes 1370*0Sstevel@tonic-gate * are processed without a full scan. 1371*0Sstevel@tonic-gate * If there is both an IPv4 and IPv6 instance associated with 1372*0Sstevel@tonic-gate * the physical interface, we will get an RTM_IFINFO message 1373*0Sstevel@tonic-gate * for each instance. If we just maintained a single copy of 1374*0Sstevel@tonic-gate * the physical interface flags, it would appear that no flags 1375*0Sstevel@tonic-gate * had changed when the second message is processed, leading us 1376*0Sstevel@tonic-gate * to believe that the message wasn't generated by a flags change, 1377*0Sstevel@tonic-gate * and that a full interface scan is required. 1378*0Sstevel@tonic-gate * To get around this problem, two additional copies of the flags 1379*0Sstevel@tonic-gate * are kept, one copy for each instance. These are only used in 1380*0Sstevel@tonic-gate * this routine. At any one time, all three copies of the flags 1381*0Sstevel@tonic-gate * should be identical except for the IFF_RUNNING flag. The 1382*0Sstevel@tonic-gate * copy of the flags in the "phyint" structure is always up to 1383*0Sstevel@tonic-gate * date. 1384*0Sstevel@tonic-gate */ 1385*0Sstevel@tonic-gate pii = (type == AF_INET) ? pi->pi_v4 : pi->pi_v6; 1386*0Sstevel@tonic-gate if (pii == NULL) { 1387*0Sstevel@tonic-gate if (debug & D_LINKNOTE) 1388*0Sstevel@tonic-gate logdebug("process_rtm_ifinfo: no instance of address " 1389*0Sstevel@tonic-gate "family %s for %s\n", AF_STR(type), pi->pi_name); 1390*0Sstevel@tonic-gate return (_B_TRUE); 1391*0Sstevel@tonic-gate } 1392*0Sstevel@tonic-gate 1393*0Sstevel@tonic-gate old_flags = pii->pii_flags; 1394*0Sstevel@tonic-gate pii->pii_flags = PHYINT_FLAGS(ifm->ifm_flags); 1395*0Sstevel@tonic-gate pi->pi_flags = pii->pii_flags; 1396*0Sstevel@tonic-gate 1397*0Sstevel@tonic-gate if (debug & D_LINKNOTE) { 1398*0Sstevel@tonic-gate logdebug("process_rtm_ifinfo: %s address family: %s, " 1399*0Sstevel@tonic-gate "old flags: %llx, new flags: %llx\n", pi->pi_name, 1400*0Sstevel@tonic-gate AF_STR(type), old_flags, pi->pi_flags); 1401*0Sstevel@tonic-gate } 1402*0Sstevel@tonic-gate 1403*0Sstevel@tonic-gate /* 1404*0Sstevel@tonic-gate * If IFF_STANDBY has changed, indicate that the interface has changed 1405*0Sstevel@tonic-gate * types. 1406*0Sstevel@tonic-gate */ 1407*0Sstevel@tonic-gate if ((old_flags ^ pii->pii_flags) & IFF_STANDBY) 1408*0Sstevel@tonic-gate phyint_newtype(pi); 1409*0Sstevel@tonic-gate 1410*0Sstevel@tonic-gate /* 1411*0Sstevel@tonic-gate * If IFF_INACTIVE has been set, then no data addresses should be 1412*0Sstevel@tonic-gate * hosted on the interface. If IFF_INACTIVE has been cleared, then 1413*0Sstevel@tonic-gate * move previously failed-over addresses back to it, provided it is 1414*0Sstevel@tonic-gate * not failed. For details, see the state diagram in mpd_probe.c. 1415*0Sstevel@tonic-gate */ 1416*0Sstevel@tonic-gate if ((old_flags ^ pii->pii_flags) & IFF_INACTIVE) { 1417*0Sstevel@tonic-gate if (pii->pii_flags & IFF_INACTIVE) { 1418*0Sstevel@tonic-gate assert(pii->pii_flags & IFF_STANDBY); 1419*0Sstevel@tonic-gate if (!pi->pi_empty) { 1420*0Sstevel@tonic-gate (void) try_failover(pi, FAILOVER_TO_NONSTANDBY); 1421*0Sstevel@tonic-gate } 1422*0Sstevel@tonic-gate } else { 1423*0Sstevel@tonic-gate if (pi->pi_state == PI_RUNNING && !pi->pi_full) { 1424*0Sstevel@tonic-gate pi->pi_empty = 0; 1425*0Sstevel@tonic-gate (void) try_failback(pi, _B_FALSE); 1426*0Sstevel@tonic-gate } 1427*0Sstevel@tonic-gate } 1428*0Sstevel@tonic-gate } 1429*0Sstevel@tonic-gate 1430*0Sstevel@tonic-gate /* Has just the IFF_RUNNING flag changed state ? */ 1431*0Sstevel@tonic-gate if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) { 1432*0Sstevel@tonic-gate struct phyint_instance *pii_other; 1433*0Sstevel@tonic-gate /* 1434*0Sstevel@tonic-gate * It wasn't just a link state change. Update 1435*0Sstevel@tonic-gate * the other instance's copy of the flags. 1436*0Sstevel@tonic-gate */ 1437*0Sstevel@tonic-gate pii_other = phyint_inst_other(pii); 1438*0Sstevel@tonic-gate if (pii_other != NULL) 1439*0Sstevel@tonic-gate pii_other->pii_flags = pii->pii_flags; 1440*0Sstevel@tonic-gate return (_B_TRUE); 1441*0Sstevel@tonic-gate } 1442*0Sstevel@tonic-gate 1443*0Sstevel@tonic-gate return (_B_FALSE); 1444*0Sstevel@tonic-gate } 1445*0Sstevel@tonic-gate 1446*0Sstevel@tonic-gate /* 1447*0Sstevel@tonic-gate * Retrieve as many routing socket messages as possible, and try to 1448*0Sstevel@tonic-gate * empty the routing sockets. Initiate full scan of targets or interfaces 1449*0Sstevel@tonic-gate * as needed. 1450*0Sstevel@tonic-gate * We listen on separate IPv4 an IPv6 sockets so that we can accurately 1451*0Sstevel@tonic-gate * detect changes in certain flags (see "process_rtm_ifinfo()" above). 1452*0Sstevel@tonic-gate */ 1453*0Sstevel@tonic-gate static void 1454*0Sstevel@tonic-gate process_rtsock(int rtsock_v4, int rtsock_v6) 1455*0Sstevel@tonic-gate { 1456*0Sstevel@tonic-gate int nbytes; 1457*0Sstevel@tonic-gate int64_t msg[2048 / 8]; 1458*0Sstevel@tonic-gate struct rt_msghdr *rtm; 1459*0Sstevel@tonic-gate boolean_t need_if_scan = _B_FALSE; 1460*0Sstevel@tonic-gate boolean_t need_rt_scan = _B_FALSE; 1461*0Sstevel@tonic-gate boolean_t rtm_ifinfo_seen = _B_FALSE; 1462*0Sstevel@tonic-gate int type; 1463*0Sstevel@tonic-gate 1464*0Sstevel@tonic-gate /* Read as many messages as possible and try to empty the sockets */ 1465*0Sstevel@tonic-gate for (type = AF_INET; ; type = AF_INET6) { 1466*0Sstevel@tonic-gate for (;;) { 1467*0Sstevel@tonic-gate nbytes = read((type == AF_INET) ? rtsock_v4 : 1468*0Sstevel@tonic-gate rtsock_v6, msg, sizeof (msg)); 1469*0Sstevel@tonic-gate if (nbytes <= 0) { 1470*0Sstevel@tonic-gate /* No more messages */ 1471*0Sstevel@tonic-gate break; 1472*0Sstevel@tonic-gate } 1473*0Sstevel@tonic-gate rtm = (struct rt_msghdr *)msg; 1474*0Sstevel@tonic-gate if (rtm->rtm_version != RTM_VERSION) { 1475*0Sstevel@tonic-gate logerr("process_rtsock: version %d " 1476*0Sstevel@tonic-gate "not understood\n", rtm->rtm_version); 1477*0Sstevel@tonic-gate break; 1478*0Sstevel@tonic-gate } 1479*0Sstevel@tonic-gate 1480*0Sstevel@tonic-gate if (debug & D_PHYINT) { 1481*0Sstevel@tonic-gate logdebug("process_rtsock: message %d\n", 1482*0Sstevel@tonic-gate rtm->rtm_type); 1483*0Sstevel@tonic-gate } 1484*0Sstevel@tonic-gate 1485*0Sstevel@tonic-gate switch (rtm->rtm_type) { 1486*0Sstevel@tonic-gate case RTM_NEWADDR: 1487*0Sstevel@tonic-gate case RTM_DELADDR: 1488*0Sstevel@tonic-gate /* 1489*0Sstevel@tonic-gate * Some logical interface has changed, 1490*0Sstevel@tonic-gate * have to scan everything to determine 1491*0Sstevel@tonic-gate * what actually changed. 1492*0Sstevel@tonic-gate */ 1493*0Sstevel@tonic-gate need_if_scan = _B_TRUE; 1494*0Sstevel@tonic-gate break; 1495*0Sstevel@tonic-gate 1496*0Sstevel@tonic-gate case RTM_IFINFO: 1497*0Sstevel@tonic-gate rtm_ifinfo_seen = _B_TRUE; 1498*0Sstevel@tonic-gate need_if_scan |= 1499*0Sstevel@tonic-gate process_rtm_ifinfo((if_msghdr_t *)rtm, 1500*0Sstevel@tonic-gate type); 1501*0Sstevel@tonic-gate break; 1502*0Sstevel@tonic-gate 1503*0Sstevel@tonic-gate case RTM_ADD: 1504*0Sstevel@tonic-gate case RTM_DELETE: 1505*0Sstevel@tonic-gate case RTM_CHANGE: 1506*0Sstevel@tonic-gate case RTM_OLDADD: 1507*0Sstevel@tonic-gate case RTM_OLDDEL: 1508*0Sstevel@tonic-gate need_rt_scan = _B_TRUE; 1509*0Sstevel@tonic-gate break; 1510*0Sstevel@tonic-gate 1511*0Sstevel@tonic-gate default: 1512*0Sstevel@tonic-gate /* Not interesting */ 1513*0Sstevel@tonic-gate break; 1514*0Sstevel@tonic-gate } 1515*0Sstevel@tonic-gate } 1516*0Sstevel@tonic-gate if (type == AF_INET6) 1517*0Sstevel@tonic-gate break; 1518*0Sstevel@tonic-gate } 1519*0Sstevel@tonic-gate 1520*0Sstevel@tonic-gate if (need_if_scan) { 1521*0Sstevel@tonic-gate if (debug & D_LINKNOTE && rtm_ifinfo_seen) 1522*0Sstevel@tonic-gate logdebug("process_rtsock: synchronizing with kernel\n"); 1523*0Sstevel@tonic-gate initifs(); 1524*0Sstevel@tonic-gate } else if (rtm_ifinfo_seen) { 1525*0Sstevel@tonic-gate if (debug & D_LINKNOTE) 1526*0Sstevel@tonic-gate logdebug("process_rtsock: " 1527*0Sstevel@tonic-gate "link up/down notification(s) seen\n"); 1528*0Sstevel@tonic-gate process_link_state_changes(); 1529*0Sstevel@tonic-gate } 1530*0Sstevel@tonic-gate 1531*0Sstevel@tonic-gate if (need_rt_scan) 1532*0Sstevel@tonic-gate init_router_targets(); 1533*0Sstevel@tonic-gate } 1534*0Sstevel@tonic-gate 1535*0Sstevel@tonic-gate /* 1536*0Sstevel@tonic-gate * Look if the phyint instance or one of its logints have been removed from 1537*0Sstevel@tonic-gate * the kernel and take appropriate action. 1538*0Sstevel@tonic-gate * Uses {pii,li}_in_use. 1539*0Sstevel@tonic-gate */ 1540*0Sstevel@tonic-gate static void 1541*0Sstevel@tonic-gate check_if_removed(struct phyint_instance *pii) 1542*0Sstevel@tonic-gate { 1543*0Sstevel@tonic-gate struct logint *li; 1544*0Sstevel@tonic-gate struct logint *next_li; 1545*0Sstevel@tonic-gate 1546*0Sstevel@tonic-gate /* Detect phyints that have been removed from the kernel. */ 1547*0Sstevel@tonic-gate if (!pii->pii_in_use) { 1548*0Sstevel@tonic-gate logtrace("%s %s has been removed from kernel\n", 1549*0Sstevel@tonic-gate AF_STR(pii->pii_af), pii->pii_phyint->pi_name); 1550*0Sstevel@tonic-gate phyint_inst_delete(pii); 1551*0Sstevel@tonic-gate } else { 1552*0Sstevel@tonic-gate /* Detect logints that have been removed. */ 1553*0Sstevel@tonic-gate for (li = pii->pii_logint; li != NULL; li = next_li) { 1554*0Sstevel@tonic-gate next_li = li->li_next; 1555*0Sstevel@tonic-gate if (!li->li_in_use) { 1556*0Sstevel@tonic-gate logint_delete(li); 1557*0Sstevel@tonic-gate } 1558*0Sstevel@tonic-gate } 1559*0Sstevel@tonic-gate } 1560*0Sstevel@tonic-gate } 1561*0Sstevel@tonic-gate 1562*0Sstevel@tonic-gate /* 1563*0Sstevel@tonic-gate * Send down a T_OPTMGMT_REQ to ip asking for all data in the various 1564*0Sstevel@tonic-gate * tables defined by mib2.h. Parse the returned data and extract 1565*0Sstevel@tonic-gate * the 'routing' information table. Process the 'routing' table 1566*0Sstevel@tonic-gate * to get the list of known onlink routers, and update our database. 1567*0Sstevel@tonic-gate * These onlink routers will serve as our probe targets. 1568*0Sstevel@tonic-gate * Returns false, if any system calls resulted in errors, true otherwise. 1569*0Sstevel@tonic-gate */ 1570*0Sstevel@tonic-gate static boolean_t 1571*0Sstevel@tonic-gate update_router_list(int fd) 1572*0Sstevel@tonic-gate { 1573*0Sstevel@tonic-gate union { 1574*0Sstevel@tonic-gate char ubuf[1024]; 1575*0Sstevel@tonic-gate union T_primitives uprim; 1576*0Sstevel@tonic-gate } buf; 1577*0Sstevel@tonic-gate 1578*0Sstevel@tonic-gate int flags; 1579*0Sstevel@tonic-gate struct strbuf ctlbuf; 1580*0Sstevel@tonic-gate struct strbuf databuf; 1581*0Sstevel@tonic-gate struct T_optmgmt_req *tor; 1582*0Sstevel@tonic-gate struct T_optmgmt_ack *toa; 1583*0Sstevel@tonic-gate struct T_error_ack *tea; 1584*0Sstevel@tonic-gate struct opthdr *optp; 1585*0Sstevel@tonic-gate struct opthdr *req; 1586*0Sstevel@tonic-gate int status; 1587*0Sstevel@tonic-gate t_scalar_t prim; 1588*0Sstevel@tonic-gate 1589*0Sstevel@tonic-gate tor = (struct T_optmgmt_req *)&buf; 1590*0Sstevel@tonic-gate 1591*0Sstevel@tonic-gate tor->PRIM_type = T_SVR4_OPTMGMT_REQ; 1592*0Sstevel@tonic-gate tor->OPT_offset = sizeof (struct T_optmgmt_req); 1593*0Sstevel@tonic-gate tor->OPT_length = sizeof (struct opthdr); 1594*0Sstevel@tonic-gate tor->MGMT_flags = T_CURRENT; 1595*0Sstevel@tonic-gate 1596*0Sstevel@tonic-gate req = (struct opthdr *)&tor[1]; 1597*0Sstevel@tonic-gate req->level = MIB2_IP; /* any MIB2_xxx value ok here */ 1598*0Sstevel@tonic-gate req->name = 0; 1599*0Sstevel@tonic-gate req->len = 0; 1600*0Sstevel@tonic-gate 1601*0Sstevel@tonic-gate ctlbuf.buf = (char *)&buf; 1602*0Sstevel@tonic-gate ctlbuf.len = tor->OPT_length + tor->OPT_offset; 1603*0Sstevel@tonic-gate ctlbuf.maxlen = sizeof (buf); 1604*0Sstevel@tonic-gate flags = 0; 1605*0Sstevel@tonic-gate if (putmsg(fd, &ctlbuf, NULL, flags) == -1) { 1606*0Sstevel@tonic-gate logperror("update_router_list: putmsg(ctl)"); 1607*0Sstevel@tonic-gate return (_B_FALSE); 1608*0Sstevel@tonic-gate } 1609*0Sstevel@tonic-gate 1610*0Sstevel@tonic-gate /* 1611*0Sstevel@tonic-gate * The response consists of multiple T_OPTMGMT_ACK msgs, 1 msg for 1612*0Sstevel@tonic-gate * each table defined in mib2.h. Each T_OPTMGMT_ACK msg contains 1613*0Sstevel@tonic-gate * a control and data part. The control part contains a struct 1614*0Sstevel@tonic-gate * T_optmgmt_ack followed by a struct opthdr. The 'opthdr' identifies 1615*0Sstevel@tonic-gate * the level, name and length of the data in the data part. The 1616*0Sstevel@tonic-gate * data part contains the actual table data. The last message 1617*0Sstevel@tonic-gate * is an end-of-data (EOD), consisting of a T_OPTMGMT_ACK and a 1618*0Sstevel@tonic-gate * single option with zero optlen. 1619*0Sstevel@tonic-gate */ 1620*0Sstevel@tonic-gate 1621*0Sstevel@tonic-gate for (;;) { 1622*0Sstevel@tonic-gate /* 1623*0Sstevel@tonic-gate * Go around this loop once for each table. Ignore 1624*0Sstevel@tonic-gate * all tables except the routing information table. 1625*0Sstevel@tonic-gate */ 1626*0Sstevel@tonic-gate flags = 0; 1627*0Sstevel@tonic-gate status = getmsg(fd, &ctlbuf, NULL, &flags); 1628*0Sstevel@tonic-gate if (status < 0) { 1629*0Sstevel@tonic-gate if (errno == EINTR) 1630*0Sstevel@tonic-gate continue; 1631*0Sstevel@tonic-gate logperror("update_router_list: getmsg(ctl)"); 1632*0Sstevel@tonic-gate return (_B_FALSE); 1633*0Sstevel@tonic-gate } 1634*0Sstevel@tonic-gate if (ctlbuf.len < sizeof (t_scalar_t)) { 1635*0Sstevel@tonic-gate logerr("update_router_list: ctlbuf.len %d\n", 1636*0Sstevel@tonic-gate ctlbuf.len); 1637*0Sstevel@tonic-gate return (_B_FALSE); 1638*0Sstevel@tonic-gate } 1639*0Sstevel@tonic-gate 1640*0Sstevel@tonic-gate prim = buf.uprim.type; 1641*0Sstevel@tonic-gate 1642*0Sstevel@tonic-gate switch (prim) { 1643*0Sstevel@tonic-gate 1644*0Sstevel@tonic-gate case T_ERROR_ACK: 1645*0Sstevel@tonic-gate tea = &buf.uprim.error_ack; 1646*0Sstevel@tonic-gate if (ctlbuf.len < sizeof (struct T_error_ack)) { 1647*0Sstevel@tonic-gate logerr("update_router_list: T_ERROR_ACK" 1648*0Sstevel@tonic-gate " ctlbuf.len %d\n", ctlbuf.len); 1649*0Sstevel@tonic-gate return (_B_FALSE); 1650*0Sstevel@tonic-gate } 1651*0Sstevel@tonic-gate logerr("update_router_list: T_ERROR_ACK:" 1652*0Sstevel@tonic-gate " TLI_error = 0x%lx, UNIX_error = 0x%lx\n", 1653*0Sstevel@tonic-gate tea->TLI_error, tea->UNIX_error); 1654*0Sstevel@tonic-gate return (_B_FALSE); 1655*0Sstevel@tonic-gate 1656*0Sstevel@tonic-gate case T_OPTMGMT_ACK: 1657*0Sstevel@tonic-gate toa = &buf.uprim.optmgmt_ack; 1658*0Sstevel@tonic-gate optp = (struct opthdr *)&toa[1]; 1659*0Sstevel@tonic-gate if (ctlbuf.len < sizeof (struct T_optmgmt_ack)) { 1660*0Sstevel@tonic-gate logerr("update_router_list: ctlbuf.len %d\n", 1661*0Sstevel@tonic-gate ctlbuf.len); 1662*0Sstevel@tonic-gate return (_B_FALSE); 1663*0Sstevel@tonic-gate } 1664*0Sstevel@tonic-gate if (toa->MGMT_flags != T_SUCCESS) { 1665*0Sstevel@tonic-gate logerr("update_router_list: MGMT_flags 0x%lx\n", 1666*0Sstevel@tonic-gate toa->MGMT_flags); 1667*0Sstevel@tonic-gate return (_B_FALSE); 1668*0Sstevel@tonic-gate } 1669*0Sstevel@tonic-gate break; 1670*0Sstevel@tonic-gate 1671*0Sstevel@tonic-gate default: 1672*0Sstevel@tonic-gate logerr("update_router_list: unknown primitive %ld\n", 1673*0Sstevel@tonic-gate prim); 1674*0Sstevel@tonic-gate return (_B_FALSE); 1675*0Sstevel@tonic-gate } 1676*0Sstevel@tonic-gate 1677*0Sstevel@tonic-gate /* Process the T_OPGMGMT_ACK below */ 1678*0Sstevel@tonic-gate assert(prim == T_OPTMGMT_ACK); 1679*0Sstevel@tonic-gate 1680*0Sstevel@tonic-gate switch (status) { 1681*0Sstevel@tonic-gate case 0: 1682*0Sstevel@tonic-gate /* 1683*0Sstevel@tonic-gate * We have reached the end of this T_OPTMGMT_ACK 1684*0Sstevel@tonic-gate * message. If this is the last message i.e EOD, 1685*0Sstevel@tonic-gate * return, else process the next T_OPTMGMT_ACK msg. 1686*0Sstevel@tonic-gate */ 1687*0Sstevel@tonic-gate if ((ctlbuf.len == sizeof (struct T_optmgmt_ack) + 1688*0Sstevel@tonic-gate sizeof (struct opthdr)) && optp->len == 0 && 1689*0Sstevel@tonic-gate optp->name == 0 && optp->level == 0) { 1690*0Sstevel@tonic-gate /* 1691*0Sstevel@tonic-gate * This is the EOD message. Return 1692*0Sstevel@tonic-gate */ 1693*0Sstevel@tonic-gate return (_B_TRUE); 1694*0Sstevel@tonic-gate } 1695*0Sstevel@tonic-gate continue; 1696*0Sstevel@tonic-gate 1697*0Sstevel@tonic-gate case MORECTL: 1698*0Sstevel@tonic-gate case MORECTL | MOREDATA: 1699*0Sstevel@tonic-gate /* 1700*0Sstevel@tonic-gate * This should not happen. We should be able to read 1701*0Sstevel@tonic-gate * the control portion in a single getmsg. 1702*0Sstevel@tonic-gate */ 1703*0Sstevel@tonic-gate logerr("update_router_list: MORECTL\n"); 1704*0Sstevel@tonic-gate return (_B_FALSE); 1705*0Sstevel@tonic-gate 1706*0Sstevel@tonic-gate case MOREDATA: 1707*0Sstevel@tonic-gate databuf.maxlen = optp->len; 1708*0Sstevel@tonic-gate /* malloc of 0 bytes is ok */ 1709*0Sstevel@tonic-gate databuf.buf = malloc((size_t)optp->len); 1710*0Sstevel@tonic-gate if (databuf.maxlen != 0 && databuf.buf == NULL) { 1711*0Sstevel@tonic-gate logperror("update_router_list: malloc"); 1712*0Sstevel@tonic-gate return (_B_FALSE); 1713*0Sstevel@tonic-gate } 1714*0Sstevel@tonic-gate databuf.len = 0; 1715*0Sstevel@tonic-gate flags = 0; 1716*0Sstevel@tonic-gate for (;;) { 1717*0Sstevel@tonic-gate status = getmsg(fd, NULL, &databuf, &flags); 1718*0Sstevel@tonic-gate if (status >= 0) { 1719*0Sstevel@tonic-gate break; 1720*0Sstevel@tonic-gate } else if (errno == EINTR) { 1721*0Sstevel@tonic-gate continue; 1722*0Sstevel@tonic-gate } else { 1723*0Sstevel@tonic-gate logperror("update_router_list:" 1724*0Sstevel@tonic-gate " getmsg(data)"); 1725*0Sstevel@tonic-gate free(databuf.buf); 1726*0Sstevel@tonic-gate return (_B_FALSE); 1727*0Sstevel@tonic-gate } 1728*0Sstevel@tonic-gate } 1729*0Sstevel@tonic-gate 1730*0Sstevel@tonic-gate if (optp->level == MIB2_IP && 1731*0Sstevel@tonic-gate optp->name == MIB2_IP_ROUTE) { 1732*0Sstevel@tonic-gate /* LINTED */ 1733*0Sstevel@tonic-gate ire_process_v4((mib2_ipRouteEntry_t *) 1734*0Sstevel@tonic-gate databuf.buf, databuf.len); 1735*0Sstevel@tonic-gate } else if (optp->level == MIB2_IP6 && 1736*0Sstevel@tonic-gate optp->name == MIB2_IP6_ROUTE) { 1737*0Sstevel@tonic-gate /* LINTED */ 1738*0Sstevel@tonic-gate ire_process_v6((mib2_ipv6RouteEntry_t *) 1739*0Sstevel@tonic-gate databuf.buf, databuf.len); 1740*0Sstevel@tonic-gate } 1741*0Sstevel@tonic-gate free(databuf.buf); 1742*0Sstevel@tonic-gate } 1743*0Sstevel@tonic-gate } 1744*0Sstevel@tonic-gate /* NOTREACHED */ 1745*0Sstevel@tonic-gate } 1746*0Sstevel@tonic-gate 1747*0Sstevel@tonic-gate /* 1748*0Sstevel@tonic-gate * Examine the IPv4 routing table, for default routers. For each default 1749*0Sstevel@tonic-gate * router, populate the list of targets of each phyint that is on the same 1750*0Sstevel@tonic-gate * link as the default router 1751*0Sstevel@tonic-gate */ 1752*0Sstevel@tonic-gate static void 1753*0Sstevel@tonic-gate ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len) 1754*0Sstevel@tonic-gate { 1755*0Sstevel@tonic-gate mib2_ipRouteEntry_t *rp; 1756*0Sstevel@tonic-gate mib2_ipRouteEntry_t *rp1; 1757*0Sstevel@tonic-gate struct in_addr nexthop_v4; 1758*0Sstevel@tonic-gate mib2_ipRouteEntry_t *endp; 1759*0Sstevel@tonic-gate 1760*0Sstevel@tonic-gate if (len == 0) 1761*0Sstevel@tonic-gate return; 1762*0Sstevel@tonic-gate assert((len % sizeof (mib2_ipRouteEntry_t)) == 0); 1763*0Sstevel@tonic-gate 1764*0Sstevel@tonic-gate endp = buf + (len / sizeof (mib2_ipRouteEntry_t)); 1765*0Sstevel@tonic-gate 1766*0Sstevel@tonic-gate /* 1767*0Sstevel@tonic-gate * Loop thru the routing table entries. Process any IRE_DEFAULT, 1768*0Sstevel@tonic-gate * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others. 1769*0Sstevel@tonic-gate * For each such IRE_OFFSUBNET ire, get the nexthop gateway address. 1770*0Sstevel@tonic-gate * This is a potential target for probing, which we try to add 1771*0Sstevel@tonic-gate * to the list of probe targets. 1772*0Sstevel@tonic-gate */ 1773*0Sstevel@tonic-gate for (rp = buf; rp < endp; rp++) { 1774*0Sstevel@tonic-gate if (!(rp->ipRouteInfo.re_ire_type & IRE_OFFSUBNET)) 1775*0Sstevel@tonic-gate continue; 1776*0Sstevel@tonic-gate 1777*0Sstevel@tonic-gate /* Get the nexthop address. */ 1778*0Sstevel@tonic-gate nexthop_v4.s_addr = rp->ipRouteNextHop; 1779*0Sstevel@tonic-gate 1780*0Sstevel@tonic-gate /* 1781*0Sstevel@tonic-gate * Get the nexthop address. Then determine the outgoing 1782*0Sstevel@tonic-gate * interface, by examining all interface IREs, and picking the 1783*0Sstevel@tonic-gate * match. We don't look at the interface specified in the route 1784*0Sstevel@tonic-gate * because we need to add the router target on all matching 1785*0Sstevel@tonic-gate * interfaces anyway; the goal is to avoid falling back to 1786*0Sstevel@tonic-gate * multicast when some interfaces are in the same subnet but 1787*0Sstevel@tonic-gate * not in the same group. 1788*0Sstevel@tonic-gate */ 1789*0Sstevel@tonic-gate for (rp1 = buf; rp1 < endp; rp1++) { 1790*0Sstevel@tonic-gate if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE)) { 1791*0Sstevel@tonic-gate continue; 1792*0Sstevel@tonic-gate } 1793*0Sstevel@tonic-gate 1794*0Sstevel@tonic-gate /* 1795*0Sstevel@tonic-gate * Determine the interface IRE that matches the nexthop. 1796*0Sstevel@tonic-gate * i.e. (IRE addr & IRE mask) == (nexthop & IRE mask) 1797*0Sstevel@tonic-gate */ 1798*0Sstevel@tonic-gate if ((rp1->ipRouteDest & rp1->ipRouteMask) == 1799*0Sstevel@tonic-gate (nexthop_v4.s_addr & rp1->ipRouteMask)) { 1800*0Sstevel@tonic-gate /* 1801*0Sstevel@tonic-gate * We found the interface ire 1802*0Sstevel@tonic-gate */ 1803*0Sstevel@tonic-gate router_add_v4(rp1, nexthop_v4); 1804*0Sstevel@tonic-gate } 1805*0Sstevel@tonic-gate } 1806*0Sstevel@tonic-gate } 1807*0Sstevel@tonic-gate } 1808*0Sstevel@tonic-gate 1809*0Sstevel@tonic-gate void 1810*0Sstevel@tonic-gate router_add_v4(mib2_ipRouteEntry_t *rp1, struct in_addr nexthop_v4) 1811*0Sstevel@tonic-gate { 1812*0Sstevel@tonic-gate char *cp; 1813*0Sstevel@tonic-gate char ifname[LIFNAMSIZ + 1]; 1814*0Sstevel@tonic-gate struct in6_addr nexthop; 1815*0Sstevel@tonic-gate int len; 1816*0Sstevel@tonic-gate 1817*0Sstevel@tonic-gate if (debug & D_TARGET) 1818*0Sstevel@tonic-gate logdebug("router_add_v4()\n"); 1819*0Sstevel@tonic-gate 1820*0Sstevel@tonic-gate len = MIN(rp1->ipRouteIfIndex.o_length, sizeof (ifname) - 1); 1821*0Sstevel@tonic-gate (void) memcpy(ifname, rp1->ipRouteIfIndex.o_bytes, len); 1822*0Sstevel@tonic-gate ifname[len] = '\0'; 1823*0Sstevel@tonic-gate 1824*0Sstevel@tonic-gate if (ifname[0] == '\0') 1825*0Sstevel@tonic-gate return; 1826*0Sstevel@tonic-gate 1827*0Sstevel@tonic-gate cp = strchr(ifname, IF_SEPARATOR); 1828*0Sstevel@tonic-gate if (cp != NULL) 1829*0Sstevel@tonic-gate *cp = '\0'; 1830*0Sstevel@tonic-gate 1831*0Sstevel@tonic-gate IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop); 1832*0Sstevel@tonic-gate router_add_common(AF_INET, ifname, nexthop); 1833*0Sstevel@tonic-gate } 1834*0Sstevel@tonic-gate 1835*0Sstevel@tonic-gate void 1836*0Sstevel@tonic-gate router_add_common(int af, char *ifname, struct in6_addr nexthop) 1837*0Sstevel@tonic-gate { 1838*0Sstevel@tonic-gate struct phyint_instance *pii; 1839*0Sstevel@tonic-gate struct phyint *pi; 1840*0Sstevel@tonic-gate 1841*0Sstevel@tonic-gate if (debug & D_TARGET) 1842*0Sstevel@tonic-gate logdebug("router_add_common(%s %s)\n", AF_STR(af), ifname); 1843*0Sstevel@tonic-gate 1844*0Sstevel@tonic-gate /* 1845*0Sstevel@tonic-gate * Retrieve the phyint instance; bail if it's not known to us yet. 1846*0Sstevel@tonic-gate */ 1847*0Sstevel@tonic-gate pii = phyint_inst_lookup(af, ifname); 1848*0Sstevel@tonic-gate if (pii == NULL) 1849*0Sstevel@tonic-gate return; 1850*0Sstevel@tonic-gate 1851*0Sstevel@tonic-gate /* 1852*0Sstevel@tonic-gate * Don't use our own addresses as targets. 1853*0Sstevel@tonic-gate */ 1854*0Sstevel@tonic-gate if (own_address(pii->pii_af, nexthop)) 1855*0Sstevel@tonic-gate return; 1856*0Sstevel@tonic-gate 1857*0Sstevel@tonic-gate /* 1858*0Sstevel@tonic-gate * If the phyint is part a named group, then add the address to all 1859*0Sstevel@tonic-gate * members of the group; note that this is suboptimal in the IPv4 case 1860*0Sstevel@tonic-gate * as it has already been added to all matching interfaces in 1861*0Sstevel@tonic-gate * ire_process_v4(). Otherwise, add the address only to the phyint 1862*0Sstevel@tonic-gate * itself, since other phyints in the anongroup may not be on the same 1863*0Sstevel@tonic-gate * subnet. 1864*0Sstevel@tonic-gate */ 1865*0Sstevel@tonic-gate pi = pii->pii_phyint; 1866*0Sstevel@tonic-gate if (pi->pi_group == phyint_anongroup) { 1867*0Sstevel@tonic-gate target_add(pii, nexthop, _B_TRUE); 1868*0Sstevel@tonic-gate } else { 1869*0Sstevel@tonic-gate pi = pi->pi_group->pg_phyint; 1870*0Sstevel@tonic-gate for (; pi != NULL; pi = pi->pi_pgnext) 1871*0Sstevel@tonic-gate target_add(PHYINT_INSTANCE(pi, af), nexthop, _B_TRUE); 1872*0Sstevel@tonic-gate } 1873*0Sstevel@tonic-gate } 1874*0Sstevel@tonic-gate 1875*0Sstevel@tonic-gate /* 1876*0Sstevel@tonic-gate * Examine the IPv6 routing table, for default routers. For each default 1877*0Sstevel@tonic-gate * router, populate the list of targets of each phyint that is on the same 1878*0Sstevel@tonic-gate * link as the default router 1879*0Sstevel@tonic-gate */ 1880*0Sstevel@tonic-gate static void 1881*0Sstevel@tonic-gate ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len) 1882*0Sstevel@tonic-gate { 1883*0Sstevel@tonic-gate mib2_ipv6RouteEntry_t *rp; 1884*0Sstevel@tonic-gate mib2_ipv6RouteEntry_t *endp; 1885*0Sstevel@tonic-gate struct in6_addr nexthop_v6; 1886*0Sstevel@tonic-gate 1887*0Sstevel@tonic-gate if (debug & D_TARGET) 1888*0Sstevel@tonic-gate logdebug("ire_process_v6(len %d)\n", len); 1889*0Sstevel@tonic-gate 1890*0Sstevel@tonic-gate if (len == 0) 1891*0Sstevel@tonic-gate return; 1892*0Sstevel@tonic-gate 1893*0Sstevel@tonic-gate assert((len % sizeof (mib2_ipv6RouteEntry_t)) == 0); 1894*0Sstevel@tonic-gate endp = buf + (len / sizeof (mib2_ipv6RouteEntry_t)); 1895*0Sstevel@tonic-gate 1896*0Sstevel@tonic-gate /* 1897*0Sstevel@tonic-gate * Loop thru the routing table entries. Process any IRE_DEFAULT, 1898*0Sstevel@tonic-gate * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others. 1899*0Sstevel@tonic-gate * For each such IRE_OFFSUBNET ire, get the nexthop gateway address. 1900*0Sstevel@tonic-gate * This is a potential target for probing, which we try to add 1901*0Sstevel@tonic-gate * to the list of probe targets. 1902*0Sstevel@tonic-gate */ 1903*0Sstevel@tonic-gate for (rp = buf; rp < endp; rp++) { 1904*0Sstevel@tonic-gate if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET)) 1905*0Sstevel@tonic-gate continue; 1906*0Sstevel@tonic-gate 1907*0Sstevel@tonic-gate /* 1908*0Sstevel@tonic-gate * We have the outgoing interface in ipv6RouteIfIndex 1909*0Sstevel@tonic-gate * if ipv6RouteIfindex.o_length is non-zero. The outgoing 1910*0Sstevel@tonic-gate * interface must be present for link-local addresses. Since 1911*0Sstevel@tonic-gate * we use only link-local addreses for probing, we don't 1912*0Sstevel@tonic-gate * consider the case when the outgoing interface is not 1913*0Sstevel@tonic-gate * known and we need to scan interface ires 1914*0Sstevel@tonic-gate */ 1915*0Sstevel@tonic-gate nexthop_v6 = rp->ipv6RouteNextHop; 1916*0Sstevel@tonic-gate if (rp->ipv6RouteIfIndex.o_length != 0) { 1917*0Sstevel@tonic-gate /* 1918*0Sstevel@tonic-gate * We already have the outgoing interface 1919*0Sstevel@tonic-gate * in ipv6RouteIfIndex. 1920*0Sstevel@tonic-gate */ 1921*0Sstevel@tonic-gate router_add_v6(rp, nexthop_v6); 1922*0Sstevel@tonic-gate } 1923*0Sstevel@tonic-gate } 1924*0Sstevel@tonic-gate } 1925*0Sstevel@tonic-gate 1926*0Sstevel@tonic-gate 1927*0Sstevel@tonic-gate void 1928*0Sstevel@tonic-gate router_add_v6(mib2_ipv6RouteEntry_t *rp1, struct in6_addr nexthop_v6) 1929*0Sstevel@tonic-gate { 1930*0Sstevel@tonic-gate char ifname[LIFNAMSIZ + 1]; 1931*0Sstevel@tonic-gate char *cp; 1932*0Sstevel@tonic-gate int len; 1933*0Sstevel@tonic-gate 1934*0Sstevel@tonic-gate if (debug & D_TARGET) 1935*0Sstevel@tonic-gate logdebug("router_add_v6()\n"); 1936*0Sstevel@tonic-gate 1937*0Sstevel@tonic-gate len = MIN(rp1->ipv6RouteIfIndex.o_length, sizeof (ifname) - 1); 1938*0Sstevel@tonic-gate (void) memcpy(ifname, rp1->ipv6RouteIfIndex.o_bytes, len); 1939*0Sstevel@tonic-gate ifname[len] = '\0'; 1940*0Sstevel@tonic-gate 1941*0Sstevel@tonic-gate if (ifname[0] == '\0') 1942*0Sstevel@tonic-gate return; 1943*0Sstevel@tonic-gate 1944*0Sstevel@tonic-gate cp = strchr(ifname, IF_SEPARATOR); 1945*0Sstevel@tonic-gate if (cp != NULL) 1946*0Sstevel@tonic-gate *cp = '\0'; 1947*0Sstevel@tonic-gate 1948*0Sstevel@tonic-gate router_add_common(AF_INET6, ifname, nexthop_v6); 1949*0Sstevel@tonic-gate } 1950*0Sstevel@tonic-gate 1951*0Sstevel@tonic-gate 1952*0Sstevel@tonic-gate 1953*0Sstevel@tonic-gate /* 1954*0Sstevel@tonic-gate * Build a list of target routers, by scanning the routing tables. 1955*0Sstevel@tonic-gate * It is assumed that interface routes exist, to reach the routers. 1956*0Sstevel@tonic-gate */ 1957*0Sstevel@tonic-gate static void 1958*0Sstevel@tonic-gate init_router_targets(void) 1959*0Sstevel@tonic-gate { 1960*0Sstevel@tonic-gate struct target *tg; 1961*0Sstevel@tonic-gate struct target *next_tg; 1962*0Sstevel@tonic-gate struct phyint_instance *pii; 1963*0Sstevel@tonic-gate struct phyint *pi; 1964*0Sstevel@tonic-gate 1965*0Sstevel@tonic-gate if (force_mcast) 1966*0Sstevel@tonic-gate return; 1967*0Sstevel@tonic-gate 1968*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 1969*0Sstevel@tonic-gate pi = pii->pii_phyint; 1970*0Sstevel@tonic-gate /* 1971*0Sstevel@tonic-gate * Exclude ptp and host targets. Set tg_in_use to false, 1972*0Sstevel@tonic-gate * only for router targets. 1973*0Sstevel@tonic-gate */ 1974*0Sstevel@tonic-gate if (!pii->pii_targets_are_routers || 1975*0Sstevel@tonic-gate (pi->pi_flags & IFF_POINTOPOINT)) 1976*0Sstevel@tonic-gate continue; 1977*0Sstevel@tonic-gate 1978*0Sstevel@tonic-gate for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) 1979*0Sstevel@tonic-gate tg->tg_in_use = 0; 1980*0Sstevel@tonic-gate } 1981*0Sstevel@tonic-gate 1982*0Sstevel@tonic-gate if (mibfd < 0) { 1983*0Sstevel@tonic-gate mibfd = open("/dev/ip", O_RDWR); 1984*0Sstevel@tonic-gate if (mibfd < 0) { 1985*0Sstevel@tonic-gate logperror("mibopen: ip open"); 1986*0Sstevel@tonic-gate exit(1); 1987*0Sstevel@tonic-gate } 1988*0Sstevel@tonic-gate } 1989*0Sstevel@tonic-gate 1990*0Sstevel@tonic-gate if (!update_router_list(mibfd)) { 1991*0Sstevel@tonic-gate (void) close(mibfd); 1992*0Sstevel@tonic-gate mibfd = -1; 1993*0Sstevel@tonic-gate } 1994*0Sstevel@tonic-gate 1995*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 1996*0Sstevel@tonic-gate if (!pii->pii_targets_are_routers || 1997*0Sstevel@tonic-gate (pi->pi_flags & IFF_POINTOPOINT)) 1998*0Sstevel@tonic-gate continue; 1999*0Sstevel@tonic-gate 2000*0Sstevel@tonic-gate for (tg = pii->pii_targets; tg != NULL; tg = next_tg) { 2001*0Sstevel@tonic-gate next_tg = tg->tg_next; 2002*0Sstevel@tonic-gate if (!tg->tg_in_use) { 2003*0Sstevel@tonic-gate target_delete(tg); 2004*0Sstevel@tonic-gate } 2005*0Sstevel@tonic-gate } 2006*0Sstevel@tonic-gate } 2007*0Sstevel@tonic-gate } 2008*0Sstevel@tonic-gate 2009*0Sstevel@tonic-gate /* 2010*0Sstevel@tonic-gate * Attempt to assign host targets to any interfaces that do not currently 2011*0Sstevel@tonic-gate * have probe targets by sharing targets with other interfaces in the group. 2012*0Sstevel@tonic-gate */ 2013*0Sstevel@tonic-gate static void 2014*0Sstevel@tonic-gate init_host_targets(void) 2015*0Sstevel@tonic-gate { 2016*0Sstevel@tonic-gate struct phyint_instance *pii; 2017*0Sstevel@tonic-gate struct phyint_group *pg; 2018*0Sstevel@tonic-gate 2019*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 2020*0Sstevel@tonic-gate pg = pii->pii_phyint->pi_group; 2021*0Sstevel@tonic-gate if (pg != phyint_anongroup && pii->pii_targets == NULL) 2022*0Sstevel@tonic-gate dup_host_targets(pii); 2023*0Sstevel@tonic-gate } 2024*0Sstevel@tonic-gate } 2025*0Sstevel@tonic-gate 2026*0Sstevel@tonic-gate /* 2027*0Sstevel@tonic-gate * Duplicate host targets from other phyints of the group to 2028*0Sstevel@tonic-gate * the phyint instance 'desired_pii'. 2029*0Sstevel@tonic-gate */ 2030*0Sstevel@tonic-gate static void 2031*0Sstevel@tonic-gate dup_host_targets(struct phyint_instance *desired_pii) 2032*0Sstevel@tonic-gate { 2033*0Sstevel@tonic-gate int af; 2034*0Sstevel@tonic-gate struct phyint *pi; 2035*0Sstevel@tonic-gate struct phyint_instance *pii; 2036*0Sstevel@tonic-gate struct target *tg; 2037*0Sstevel@tonic-gate 2038*0Sstevel@tonic-gate assert(desired_pii->pii_phyint->pi_group != phyint_anongroup); 2039*0Sstevel@tonic-gate 2040*0Sstevel@tonic-gate af = desired_pii->pii_af; 2041*0Sstevel@tonic-gate 2042*0Sstevel@tonic-gate /* 2043*0Sstevel@tonic-gate * For every phyint in the same group as desired_pii, check if 2044*0Sstevel@tonic-gate * it has any host targets. If so add them to desired_pii. 2045*0Sstevel@tonic-gate */ 2046*0Sstevel@tonic-gate for (pi = desired_pii->pii_phyint; pi != NULL; pi = pi->pi_pgnext) { 2047*0Sstevel@tonic-gate pii = PHYINT_INSTANCE(pi, af); 2048*0Sstevel@tonic-gate /* 2049*0Sstevel@tonic-gate * We know that we don't have targets on this phyint instance 2050*0Sstevel@tonic-gate * since we have been called. But we still check for 2051*0Sstevel@tonic-gate * pii_targets_are_routers because another phyint instance 2052*0Sstevel@tonic-gate * could have router targets, since IFF_NOFAILOVER addresses 2053*0Sstevel@tonic-gate * on different phyint instances may belong to different 2054*0Sstevel@tonic-gate * subnets. 2055*0Sstevel@tonic-gate */ 2056*0Sstevel@tonic-gate if ((pii == NULL) || (pii == desired_pii) || 2057*0Sstevel@tonic-gate pii->pii_targets_are_routers) 2058*0Sstevel@tonic-gate continue; 2059*0Sstevel@tonic-gate for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 2060*0Sstevel@tonic-gate target_create(desired_pii, tg->tg_address, _B_FALSE); 2061*0Sstevel@tonic-gate } 2062*0Sstevel@tonic-gate } 2063*0Sstevel@tonic-gate } 2064*0Sstevel@tonic-gate 2065*0Sstevel@tonic-gate static void 2066*0Sstevel@tonic-gate usage(char *cmd) 2067*0Sstevel@tonic-gate { 2068*0Sstevel@tonic-gate (void) fprintf(stderr, "usage: %s\n", cmd); 2069*0Sstevel@tonic-gate } 2070*0Sstevel@tonic-gate 2071*0Sstevel@tonic-gate 2072*0Sstevel@tonic-gate #define MPATHD_DEFAULT_FILE "/etc/default/mpathd" 2073*0Sstevel@tonic-gate 2074*0Sstevel@tonic-gate /* Get an option from the /etc/default/mpathd file */ 2075*0Sstevel@tonic-gate static char * 2076*0Sstevel@tonic-gate getdefault(char *name) 2077*0Sstevel@tonic-gate { 2078*0Sstevel@tonic-gate char namebuf[BUFSIZ]; 2079*0Sstevel@tonic-gate char *value = NULL; 2080*0Sstevel@tonic-gate 2081*0Sstevel@tonic-gate if (defopen(MPATHD_DEFAULT_FILE) == 0) { 2082*0Sstevel@tonic-gate char *cp; 2083*0Sstevel@tonic-gate int flags; 2084*0Sstevel@tonic-gate 2085*0Sstevel@tonic-gate /* 2086*0Sstevel@tonic-gate * ignore case 2087*0Sstevel@tonic-gate */ 2088*0Sstevel@tonic-gate flags = defcntl(DC_GETFLAGS, 0); 2089*0Sstevel@tonic-gate TURNOFF(flags, DC_CASE); 2090*0Sstevel@tonic-gate (void) defcntl(DC_SETFLAGS, flags); 2091*0Sstevel@tonic-gate 2092*0Sstevel@tonic-gate /* Add "=" to the name */ 2093*0Sstevel@tonic-gate (void) strncpy(namebuf, name, sizeof (namebuf) - 2); 2094*0Sstevel@tonic-gate (void) strncat(namebuf, "=", 2); 2095*0Sstevel@tonic-gate 2096*0Sstevel@tonic-gate if ((cp = defread(namebuf)) != NULL) 2097*0Sstevel@tonic-gate value = strdup(cp); 2098*0Sstevel@tonic-gate 2099*0Sstevel@tonic-gate /* close */ 2100*0Sstevel@tonic-gate (void) defopen((char *)NULL); 2101*0Sstevel@tonic-gate } 2102*0Sstevel@tonic-gate return (value); 2103*0Sstevel@tonic-gate } 2104*0Sstevel@tonic-gate 2105*0Sstevel@tonic-gate 2106*0Sstevel@tonic-gate /* 2107*0Sstevel@tonic-gate * Command line options below 2108*0Sstevel@tonic-gate */ 2109*0Sstevel@tonic-gate boolean_t failback_enabled = _B_TRUE; /* failback enabled/disabled */ 2110*0Sstevel@tonic-gate boolean_t track_all_phyints = _B_FALSE; /* option to track all NICs */ 2111*0Sstevel@tonic-gate static boolean_t adopt = _B_FALSE; 2112*0Sstevel@tonic-gate static boolean_t foreground = _B_FALSE; 2113*0Sstevel@tonic-gate 2114*0Sstevel@tonic-gate int 2115*0Sstevel@tonic-gate main(int argc, char *argv[]) 2116*0Sstevel@tonic-gate { 2117*0Sstevel@tonic-gate int i; 2118*0Sstevel@tonic-gate int c; 2119*0Sstevel@tonic-gate struct phyint_instance *pii; 2120*0Sstevel@tonic-gate char *value; 2121*0Sstevel@tonic-gate 2122*0Sstevel@tonic-gate argv0 = argv; /* Saved for re-exec on SIGHUP */ 2123*0Sstevel@tonic-gate srandom(gethostid()); /* Initialize the random number generator */ 2124*0Sstevel@tonic-gate 2125*0Sstevel@tonic-gate /* 2126*0Sstevel@tonic-gate * NOTE: The messages output by in.mpathd are not suitable for 2127*0Sstevel@tonic-gate * translation, so we do not call textdomain(). 2128*0Sstevel@tonic-gate */ 2129*0Sstevel@tonic-gate (void) setlocale(LC_ALL, ""); 2130*0Sstevel@tonic-gate 2131*0Sstevel@tonic-gate /* 2132*0Sstevel@tonic-gate * Get the user specified value of 'failure detection time' 2133*0Sstevel@tonic-gate * from /etc/default/mpathd 2134*0Sstevel@tonic-gate */ 2135*0Sstevel@tonic-gate value = getdefault("FAILURE_DETECTION_TIME"); 2136*0Sstevel@tonic-gate if (value != NULL) { 2137*0Sstevel@tonic-gate user_failure_detection_time = 2138*0Sstevel@tonic-gate (int)strtol((char *)value, NULL, 0); 2139*0Sstevel@tonic-gate 2140*0Sstevel@tonic-gate if (user_failure_detection_time <= 0) { 2141*0Sstevel@tonic-gate user_failure_detection_time = FAILURE_DETECTION_TIME; 2142*0Sstevel@tonic-gate logerr("Invalid failure detection time %s, assuming " 2143*0Sstevel@tonic-gate "default %d\n", value, user_failure_detection_time); 2144*0Sstevel@tonic-gate 2145*0Sstevel@tonic-gate } else if (user_failure_detection_time < 2146*0Sstevel@tonic-gate MIN_FAILURE_DETECTION_TIME) { 2147*0Sstevel@tonic-gate user_failure_detection_time = 2148*0Sstevel@tonic-gate MIN_FAILURE_DETECTION_TIME; 2149*0Sstevel@tonic-gate logerr("Too small failure detection time of %s, " 2150*0Sstevel@tonic-gate "assuming minimum %d\n", value, 2151*0Sstevel@tonic-gate user_failure_detection_time); 2152*0Sstevel@tonic-gate } 2153*0Sstevel@tonic-gate free(value); 2154*0Sstevel@tonic-gate } else { 2155*0Sstevel@tonic-gate /* User has not specified the parameter, Use default value */ 2156*0Sstevel@tonic-gate user_failure_detection_time = FAILURE_DETECTION_TIME; 2157*0Sstevel@tonic-gate } 2158*0Sstevel@tonic-gate 2159*0Sstevel@tonic-gate /* 2160*0Sstevel@tonic-gate * This gives the frequency at which probes will be sent. 2161*0Sstevel@tonic-gate * When fdt ms elapses, we should be able to determine 2162*0Sstevel@tonic-gate * whether 5 consecutive probes have failed or not. 2163*0Sstevel@tonic-gate * 1 probe will be sent in every user_probe_interval ms, 2164*0Sstevel@tonic-gate * randomly anytime in the (0.5 - 1.0) 2nd half of every 2165*0Sstevel@tonic-gate * user_probe_interval. Thus when we send out probe 'n' we 2166*0Sstevel@tonic-gate * can be sure that probe 'n - 2' is lost, if we have not 2167*0Sstevel@tonic-gate * got the ack. (since the probe interval is > crtt). But 2168*0Sstevel@tonic-gate * probe 'n - 1' may be a valid unacked probe, since the 2169*0Sstevel@tonic-gate * time between 2 successive probes could be as small as 2170*0Sstevel@tonic-gate * 0.5 * user_probe_interval. Hence the NUM_PROBE_FAILS + 2 2171*0Sstevel@tonic-gate */ 2172*0Sstevel@tonic-gate user_probe_interval = user_failure_detection_time / 2173*0Sstevel@tonic-gate (NUM_PROBE_FAILS + 2); 2174*0Sstevel@tonic-gate 2175*0Sstevel@tonic-gate /* 2176*0Sstevel@tonic-gate * Get the user specified value of failback_enabled from 2177*0Sstevel@tonic-gate * /etc/default/mpathd 2178*0Sstevel@tonic-gate */ 2179*0Sstevel@tonic-gate value = getdefault("FAILBACK"); 2180*0Sstevel@tonic-gate if (value != NULL) { 2181*0Sstevel@tonic-gate if (strncasecmp(value, "yes", 3) == 0) 2182*0Sstevel@tonic-gate failback_enabled = _B_TRUE; 2183*0Sstevel@tonic-gate else if (strncasecmp(value, "no", 2) == 0) 2184*0Sstevel@tonic-gate failback_enabled = _B_FALSE; 2185*0Sstevel@tonic-gate else 2186*0Sstevel@tonic-gate logerr("Invalid value for FAILBACK %s\n", value); 2187*0Sstevel@tonic-gate free(value); 2188*0Sstevel@tonic-gate } else { 2189*0Sstevel@tonic-gate failback_enabled = _B_TRUE; 2190*0Sstevel@tonic-gate } 2191*0Sstevel@tonic-gate 2192*0Sstevel@tonic-gate /* 2193*0Sstevel@tonic-gate * Get the user specified value of track_all_phyints from 2194*0Sstevel@tonic-gate * /etc/default/mpathd. The sense is reversed in 2195*0Sstevel@tonic-gate * TRACK_INTERFACES_ONLY_WITH_GROUPS. 2196*0Sstevel@tonic-gate */ 2197*0Sstevel@tonic-gate value = getdefault("TRACK_INTERFACES_ONLY_WITH_GROUPS"); 2198*0Sstevel@tonic-gate if (value != NULL) { 2199*0Sstevel@tonic-gate if (strncasecmp(value, "yes", 3) == 0) 2200*0Sstevel@tonic-gate track_all_phyints = _B_FALSE; 2201*0Sstevel@tonic-gate else if (strncasecmp(value, "no", 2) == 0) 2202*0Sstevel@tonic-gate track_all_phyints = _B_TRUE; 2203*0Sstevel@tonic-gate else 2204*0Sstevel@tonic-gate logerr("Invalid value for " 2205*0Sstevel@tonic-gate "TRACK_INTERFACES_ONLY_WITH_GROUPS %s\n", value); 2206*0Sstevel@tonic-gate free(value); 2207*0Sstevel@tonic-gate } else { 2208*0Sstevel@tonic-gate track_all_phyints = _B_FALSE; 2209*0Sstevel@tonic-gate } 2210*0Sstevel@tonic-gate 2211*0Sstevel@tonic-gate while ((c = getopt(argc, argv, "adD:ml")) != EOF) { 2212*0Sstevel@tonic-gate switch (c) { 2213*0Sstevel@tonic-gate case 'a': 2214*0Sstevel@tonic-gate adopt = _B_TRUE; 2215*0Sstevel@tonic-gate break; 2216*0Sstevel@tonic-gate case 'm': 2217*0Sstevel@tonic-gate force_mcast = _B_TRUE; 2218*0Sstevel@tonic-gate break; 2219*0Sstevel@tonic-gate case 'd': 2220*0Sstevel@tonic-gate debug = D_ALL; 2221*0Sstevel@tonic-gate foreground = _B_TRUE; 2222*0Sstevel@tonic-gate break; 2223*0Sstevel@tonic-gate case 'D': 2224*0Sstevel@tonic-gate i = (int)strtol(optarg, NULL, 0); 2225*0Sstevel@tonic-gate if (i == 0) { 2226*0Sstevel@tonic-gate (void) fprintf(stderr, "Bad debug flags: %s\n", 2227*0Sstevel@tonic-gate optarg); 2228*0Sstevel@tonic-gate exit(1); 2229*0Sstevel@tonic-gate } 2230*0Sstevel@tonic-gate debug |= i; 2231*0Sstevel@tonic-gate foreground = _B_TRUE; 2232*0Sstevel@tonic-gate break; 2233*0Sstevel@tonic-gate case 'l': 2234*0Sstevel@tonic-gate /* 2235*0Sstevel@tonic-gate * Turn off link state notification handling. 2236*0Sstevel@tonic-gate * Undocumented command line flag, for debugging 2237*0Sstevel@tonic-gate * purposes. 2238*0Sstevel@tonic-gate */ 2239*0Sstevel@tonic-gate handle_link_notifications = _B_FALSE; 2240*0Sstevel@tonic-gate break; 2241*0Sstevel@tonic-gate default: 2242*0Sstevel@tonic-gate usage(argv[0]); 2243*0Sstevel@tonic-gate exit(1); 2244*0Sstevel@tonic-gate } 2245*0Sstevel@tonic-gate } 2246*0Sstevel@tonic-gate 2247*0Sstevel@tonic-gate /* 2248*0Sstevel@tonic-gate * The sockets for the loopback command interface should be listening 2249*0Sstevel@tonic-gate * before we fork and exit in daemonize(). This way, whoever started us 2250*0Sstevel@tonic-gate * can use the loopback interface as soon as they get a zero exit 2251*0Sstevel@tonic-gate * status. 2252*0Sstevel@tonic-gate */ 2253*0Sstevel@tonic-gate lsock_v4 = setup_listener(AF_INET); 2254*0Sstevel@tonic-gate lsock_v6 = setup_listener(AF_INET6); 2255*0Sstevel@tonic-gate 2256*0Sstevel@tonic-gate if (lsock_v4 < 0 && lsock_v6 < 0) { 2257*0Sstevel@tonic-gate logerr("main: setup_listener failed for both IPv4 and IPv6\n"); 2258*0Sstevel@tonic-gate exit(1); 2259*0Sstevel@tonic-gate } 2260*0Sstevel@tonic-gate 2261*0Sstevel@tonic-gate if (!foreground) { 2262*0Sstevel@tonic-gate if (!daemonize()) { 2263*0Sstevel@tonic-gate logerr("cannot daemonize\n"); 2264*0Sstevel@tonic-gate exit(EXIT_FAILURE); 2265*0Sstevel@tonic-gate } 2266*0Sstevel@tonic-gate initlog(); 2267*0Sstevel@tonic-gate } 2268*0Sstevel@tonic-gate 2269*0Sstevel@tonic-gate /* 2270*0Sstevel@tonic-gate * Initializations: 2271*0Sstevel@tonic-gate * 1. Create ifsock* sockets. These are used for performing SIOC* 2272*0Sstevel@tonic-gate * ioctls. We have 2 sockets 1 each for IPv4 and IPv6. 2273*0Sstevel@tonic-gate * 2. Initialize a pipe for handling/recording signal events. 2274*0Sstevel@tonic-gate * 3. Create the routing sockets, used for listening 2275*0Sstevel@tonic-gate * to routing / interface changes. 2276*0Sstevel@tonic-gate * 4. phyint_init() - Initialize physical interface state 2277*0Sstevel@tonic-gate * (in mpd_tables.c). Must be done before creating interfaces, 2278*0Sstevel@tonic-gate * which timer_init() does indirectly. 2279*0Sstevel@tonic-gate * 5. timer_init() - Initialize timer related stuff 2280*0Sstevel@tonic-gate * 6. initifs() - Initialize our database of all known interfaces 2281*0Sstevel@tonic-gate * 7. init_router_targets() - Initialize our database of all known 2282*0Sstevel@tonic-gate * router targets. 2283*0Sstevel@tonic-gate */ 2284*0Sstevel@tonic-gate ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0); 2285*0Sstevel@tonic-gate if (ifsock_v4 < 0) { 2286*0Sstevel@tonic-gate logperror("main: IPv4 socket open"); 2287*0Sstevel@tonic-gate exit(1); 2288*0Sstevel@tonic-gate } 2289*0Sstevel@tonic-gate 2290*0Sstevel@tonic-gate ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0); 2291*0Sstevel@tonic-gate if (ifsock_v6 < 0) { 2292*0Sstevel@tonic-gate logperror("main: IPv6 socket open"); 2293*0Sstevel@tonic-gate exit(1); 2294*0Sstevel@tonic-gate } 2295*0Sstevel@tonic-gate 2296*0Sstevel@tonic-gate setup_eventpipe(); 2297*0Sstevel@tonic-gate 2298*0Sstevel@tonic-gate rtsock_v4 = setup_rtsock(AF_INET); 2299*0Sstevel@tonic-gate rtsock_v6 = setup_rtsock(AF_INET6); 2300*0Sstevel@tonic-gate 2301*0Sstevel@tonic-gate if (phyint_init() == -1) { 2302*0Sstevel@tonic-gate logerr("cannot initialize physical interface structures"); 2303*0Sstevel@tonic-gate exit(1); 2304*0Sstevel@tonic-gate } 2305*0Sstevel@tonic-gate 2306*0Sstevel@tonic-gate timer_init(); 2307*0Sstevel@tonic-gate 2308*0Sstevel@tonic-gate initifs(); 2309*0Sstevel@tonic-gate 2310*0Sstevel@tonic-gate /* 2311*0Sstevel@tonic-gate * If we're operating in "adopt" mode and no interfaces need to be 2312*0Sstevel@tonic-gate * tracked, shut down (ifconfig(1M) will restart us on demand if 2313*0Sstevel@tonic-gate * interfaces are subsequently put into multipathing groups). 2314*0Sstevel@tonic-gate */ 2315*0Sstevel@tonic-gate if (adopt && phyint_instances == NULL) 2316*0Sstevel@tonic-gate exit(0); 2317*0Sstevel@tonic-gate 2318*0Sstevel@tonic-gate /* 2319*0Sstevel@tonic-gate * Main body. Keep listening for activity on any of the sockets 2320*0Sstevel@tonic-gate * that we are monitoring and take appropriate action as necessary. 2321*0Sstevel@tonic-gate * signals are also handled synchronously. 2322*0Sstevel@tonic-gate */ 2323*0Sstevel@tonic-gate for (;;) { 2324*0Sstevel@tonic-gate if (poll(pollfds, pollfd_num, -1) < 0) { 2325*0Sstevel@tonic-gate if (errno == EINTR) 2326*0Sstevel@tonic-gate continue; 2327*0Sstevel@tonic-gate logperror("main: poll"); 2328*0Sstevel@tonic-gate exit(1); 2329*0Sstevel@tonic-gate } 2330*0Sstevel@tonic-gate for (i = 0; i < pollfd_num; i++) { 2331*0Sstevel@tonic-gate if ((pollfds[i].fd == -1) || 2332*0Sstevel@tonic-gate !(pollfds[i].revents & POLLIN)) 2333*0Sstevel@tonic-gate continue; 2334*0Sstevel@tonic-gate if (pollfds[i].fd == eventpipe_read) { 2335*0Sstevel@tonic-gate in_signal(eventpipe_read); 2336*0Sstevel@tonic-gate break; 2337*0Sstevel@tonic-gate } 2338*0Sstevel@tonic-gate if (pollfds[i].fd == rtsock_v4 || 2339*0Sstevel@tonic-gate pollfds[i].fd == rtsock_v6) { 2340*0Sstevel@tonic-gate process_rtsock(rtsock_v4, rtsock_v6); 2341*0Sstevel@tonic-gate break; 2342*0Sstevel@tonic-gate } 2343*0Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; 2344*0Sstevel@tonic-gate pii = pii->pii_next) { 2345*0Sstevel@tonic-gate if (pollfds[i].fd == pii->pii_probe_sock) { 2346*0Sstevel@tonic-gate if (pii->pii_af == AF_INET) 2347*0Sstevel@tonic-gate in_data(pii); 2348*0Sstevel@tonic-gate else 2349*0Sstevel@tonic-gate in6_data(pii); 2350*0Sstevel@tonic-gate break; 2351*0Sstevel@tonic-gate } 2352*0Sstevel@tonic-gate } 2353*0Sstevel@tonic-gate if (pollfds[i].fd == lsock_v4) 2354*0Sstevel@tonic-gate loopback_cmd(lsock_v4, AF_INET); 2355*0Sstevel@tonic-gate else if (pollfds[i].fd == lsock_v6) 2356*0Sstevel@tonic-gate loopback_cmd(lsock_v6, AF_INET6); 2357*0Sstevel@tonic-gate } 2358*0Sstevel@tonic-gate if (full_scan_required) { 2359*0Sstevel@tonic-gate initifs(); 2360*0Sstevel@tonic-gate full_scan_required = _B_FALSE; 2361*0Sstevel@tonic-gate } 2362*0Sstevel@tonic-gate } 2363*0Sstevel@tonic-gate /* NOTREACHED */ 2364*0Sstevel@tonic-gate return (EXIT_SUCCESS); 2365*0Sstevel@tonic-gate } 2366*0Sstevel@tonic-gate 2367*0Sstevel@tonic-gate static int 2368*0Sstevel@tonic-gate setup_listener(int af) 2369*0Sstevel@tonic-gate { 2370*0Sstevel@tonic-gate int sock; 2371*0Sstevel@tonic-gate int on; 2372*0Sstevel@tonic-gate int len; 2373*0Sstevel@tonic-gate int ret; 2374*0Sstevel@tonic-gate struct sockaddr_storage laddr; 2375*0Sstevel@tonic-gate struct sockaddr_in *sin; 2376*0Sstevel@tonic-gate struct sockaddr_in6 *sin6; 2377*0Sstevel@tonic-gate struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; 2378*0Sstevel@tonic-gate 2379*0Sstevel@tonic-gate assert(af == AF_INET || af == AF_INET6); 2380*0Sstevel@tonic-gate 2381*0Sstevel@tonic-gate sock = socket(af, SOCK_STREAM, 0); 2382*0Sstevel@tonic-gate if (sock < 0) { 2383*0Sstevel@tonic-gate logperror("setup_listener: socket"); 2384*0Sstevel@tonic-gate exit(1); 2385*0Sstevel@tonic-gate } 2386*0Sstevel@tonic-gate 2387*0Sstevel@tonic-gate on = 1; 2388*0Sstevel@tonic-gate if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&on, 2389*0Sstevel@tonic-gate sizeof (on)) < 0) { 2390*0Sstevel@tonic-gate logperror("setup_listener: setsockopt (SO_REUSEADDR)"); 2391*0Sstevel@tonic-gate exit(1); 2392*0Sstevel@tonic-gate } 2393*0Sstevel@tonic-gate 2394*0Sstevel@tonic-gate bzero(&laddr, sizeof (laddr)); 2395*0Sstevel@tonic-gate laddr.ss_family = af; 2396*0Sstevel@tonic-gate 2397*0Sstevel@tonic-gate if (af == AF_INET) { 2398*0Sstevel@tonic-gate sin = (struct sockaddr_in *)&laddr; 2399*0Sstevel@tonic-gate sin->sin_port = htons(MPATHD_PORT); 2400*0Sstevel@tonic-gate sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2401*0Sstevel@tonic-gate len = sizeof (struct sockaddr_in); 2402*0Sstevel@tonic-gate } else { 2403*0Sstevel@tonic-gate sin6 = (struct sockaddr_in6 *)&laddr; 2404*0Sstevel@tonic-gate sin6->sin6_port = htons(MPATHD_PORT); 2405*0Sstevel@tonic-gate sin6->sin6_addr = loopback_addr; 2406*0Sstevel@tonic-gate len = sizeof (struct sockaddr_in6); 2407*0Sstevel@tonic-gate } 2408*0Sstevel@tonic-gate 2409*0Sstevel@tonic-gate ret = bind(sock, (struct sockaddr *)&laddr, len); 2410*0Sstevel@tonic-gate if (ret < 0) { 2411*0Sstevel@tonic-gate if (errno == EADDRINUSE) { 2412*0Sstevel@tonic-gate /* 2413*0Sstevel@tonic-gate * Another instance of mpathd may be already active. 2414*0Sstevel@tonic-gate */ 2415*0Sstevel@tonic-gate logerr("main: is another instance of in.mpathd " 2416*0Sstevel@tonic-gate "already active?\n"); 2417*0Sstevel@tonic-gate exit(1); 2418*0Sstevel@tonic-gate } else { 2419*0Sstevel@tonic-gate (void) close(sock); 2420*0Sstevel@tonic-gate return (-1); 2421*0Sstevel@tonic-gate } 2422*0Sstevel@tonic-gate } 2423*0Sstevel@tonic-gate if (listen(sock, 30) < 0) { 2424*0Sstevel@tonic-gate logperror("main: listen"); 2425*0Sstevel@tonic-gate exit(1); 2426*0Sstevel@tonic-gate } 2427*0Sstevel@tonic-gate if (poll_add(sock) == -1) { 2428*0Sstevel@tonic-gate (void) close(sock); 2429*0Sstevel@tonic-gate exit(1); 2430*0Sstevel@tonic-gate } 2431*0Sstevel@tonic-gate 2432*0Sstevel@tonic-gate return (sock); 2433*0Sstevel@tonic-gate } 2434*0Sstevel@tonic-gate 2435*0Sstevel@tonic-gate /* 2436*0Sstevel@tonic-gate * Table of commands and their expected size; used by loopback_cmd(). 2437*0Sstevel@tonic-gate */ 2438*0Sstevel@tonic-gate static struct { 2439*0Sstevel@tonic-gate const char *name; 2440*0Sstevel@tonic-gate unsigned int size; 2441*0Sstevel@tonic-gate } commands[] = { 2442*0Sstevel@tonic-gate { "MI_PING", sizeof (uint32_t) }, 2443*0Sstevel@tonic-gate { "MI_OFFLINE", sizeof (mi_offline_t) }, 2444*0Sstevel@tonic-gate { "MI_UNDO_OFFLINE", sizeof (mi_undo_offline_t) }, 2445*0Sstevel@tonic-gate { "MI_SETOINDEX", sizeof (mi_setoindex_t) }, 2446*0Sstevel@tonic-gate { "MI_QUERY", sizeof (mi_query_t) } 2447*0Sstevel@tonic-gate }; 2448*0Sstevel@tonic-gate 2449*0Sstevel@tonic-gate /* 2450*0Sstevel@tonic-gate * Commands received over the loopback interface come here. Currently 2451*0Sstevel@tonic-gate * the agents that send commands are ifconfig, if_mpadm and the RCM IPMP 2452*0Sstevel@tonic-gate * module. ifconfig only makes a connection, and closes it to check if 2453*0Sstevel@tonic-gate * in.mpathd is running. 2454*0Sstevel@tonic-gate * if_mpadm sends commands in the format specified by the mpathd_interface 2455*0Sstevel@tonic-gate * structure. 2456*0Sstevel@tonic-gate */ 2457*0Sstevel@tonic-gate static void 2458*0Sstevel@tonic-gate loopback_cmd(int sock, int family) 2459*0Sstevel@tonic-gate { 2460*0Sstevel@tonic-gate int newfd; 2461*0Sstevel@tonic-gate ssize_t len; 2462*0Sstevel@tonic-gate struct sockaddr_storage peer; 2463*0Sstevel@tonic-gate struct sockaddr_in *peer_sin; 2464*0Sstevel@tonic-gate struct sockaddr_in6 *peer_sin6; 2465*0Sstevel@tonic-gate socklen_t peerlen; 2466*0Sstevel@tonic-gate union mi_commands mpi; 2467*0Sstevel@tonic-gate struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; 2468*0Sstevel@tonic-gate char abuf[INET6_ADDRSTRLEN]; 2469*0Sstevel@tonic-gate uint_t cmd; 2470*0Sstevel@tonic-gate int retval; 2471*0Sstevel@tonic-gate 2472*0Sstevel@tonic-gate peerlen = sizeof (peer); 2473*0Sstevel@tonic-gate newfd = accept(sock, (struct sockaddr *)&peer, &peerlen); 2474*0Sstevel@tonic-gate if (newfd < 0) { 2475*0Sstevel@tonic-gate logperror("loopback_cmd: accept"); 2476*0Sstevel@tonic-gate return; 2477*0Sstevel@tonic-gate } 2478*0Sstevel@tonic-gate 2479*0Sstevel@tonic-gate switch (family) { 2480*0Sstevel@tonic-gate case AF_INET: 2481*0Sstevel@tonic-gate /* 2482*0Sstevel@tonic-gate * Validate the address and port to make sure that 2483*0Sstevel@tonic-gate * non privileged processes don't connect and start 2484*0Sstevel@tonic-gate * talking to us. 2485*0Sstevel@tonic-gate */ 2486*0Sstevel@tonic-gate if (peerlen != sizeof (struct sockaddr_in)) { 2487*0Sstevel@tonic-gate logerr("loopback_cmd: AF_INET peerlen %d\n", peerlen); 2488*0Sstevel@tonic-gate (void) close(newfd); 2489*0Sstevel@tonic-gate return; 2490*0Sstevel@tonic-gate } 2491*0Sstevel@tonic-gate peer_sin = (struct sockaddr_in *)&peer; 2492*0Sstevel@tonic-gate if ((ntohs(peer_sin->sin_port) >= IPPORT_RESERVED) || 2493*0Sstevel@tonic-gate (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK)) { 2494*0Sstevel@tonic-gate (void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr, 2495*0Sstevel@tonic-gate abuf, sizeof (abuf)); 2496*0Sstevel@tonic-gate logerr("Attempt to connect from addr %s port %d\n", 2497*0Sstevel@tonic-gate abuf, ntohs(peer_sin->sin_port)); 2498*0Sstevel@tonic-gate (void) close(newfd); 2499*0Sstevel@tonic-gate return; 2500*0Sstevel@tonic-gate } 2501*0Sstevel@tonic-gate break; 2502*0Sstevel@tonic-gate 2503*0Sstevel@tonic-gate case AF_INET6: 2504*0Sstevel@tonic-gate if (peerlen != sizeof (struct sockaddr_in6)) { 2505*0Sstevel@tonic-gate logerr("loopback_cmd: AF_INET6 peerlen %d\n", peerlen); 2506*0Sstevel@tonic-gate (void) close(newfd); 2507*0Sstevel@tonic-gate return; 2508*0Sstevel@tonic-gate } 2509*0Sstevel@tonic-gate /* 2510*0Sstevel@tonic-gate * Validate the address and port to make sure that 2511*0Sstevel@tonic-gate * non privileged processes don't connect and start 2512*0Sstevel@tonic-gate * talking to us. 2513*0Sstevel@tonic-gate */ 2514*0Sstevel@tonic-gate peer_sin6 = (struct sockaddr_in6 *)&peer; 2515*0Sstevel@tonic-gate if ((ntohs(peer_sin6->sin6_port) >= IPPORT_RESERVED) || 2516*0Sstevel@tonic-gate (!IN6_ARE_ADDR_EQUAL(&peer_sin6->sin6_addr, 2517*0Sstevel@tonic-gate &loopback_addr))) { 2518*0Sstevel@tonic-gate (void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf, 2519*0Sstevel@tonic-gate sizeof (abuf)); 2520*0Sstevel@tonic-gate logerr("Attempt to connect from addr %s port %d\n", 2521*0Sstevel@tonic-gate abuf, ntohs(peer_sin6->sin6_port)); 2522*0Sstevel@tonic-gate (void) close(newfd); 2523*0Sstevel@tonic-gate return; 2524*0Sstevel@tonic-gate } 2525*0Sstevel@tonic-gate 2526*0Sstevel@tonic-gate default: 2527*0Sstevel@tonic-gate logdebug("loopback_cmd: family %d\n", family); 2528*0Sstevel@tonic-gate (void) close(newfd); 2529*0Sstevel@tonic-gate return; 2530*0Sstevel@tonic-gate } 2531*0Sstevel@tonic-gate 2532*0Sstevel@tonic-gate /* 2533*0Sstevel@tonic-gate * The sizeof the 'mpi' buffer corresponds to the maximum size of 2534*0Sstevel@tonic-gate * all supported commands 2535*0Sstevel@tonic-gate */ 2536*0Sstevel@tonic-gate len = read(newfd, &mpi, sizeof (mpi)); 2537*0Sstevel@tonic-gate 2538*0Sstevel@tonic-gate /* 2539*0Sstevel@tonic-gate * ifconfig does not send any data. Just tests to see if mpathd 2540*0Sstevel@tonic-gate * is already running. 2541*0Sstevel@tonic-gate */ 2542*0Sstevel@tonic-gate if (len <= 0) { 2543*0Sstevel@tonic-gate (void) close(newfd); 2544*0Sstevel@tonic-gate return; 2545*0Sstevel@tonic-gate } 2546*0Sstevel@tonic-gate 2547*0Sstevel@tonic-gate /* 2548*0Sstevel@tonic-gate * In theory, we can receive any sized message for a stream socket, 2549*0Sstevel@tonic-gate * but we don't expect that to happen for a small message over a 2550*0Sstevel@tonic-gate * loopback connection. 2551*0Sstevel@tonic-gate */ 2552*0Sstevel@tonic-gate if (len < sizeof (uint32_t)) { 2553*0Sstevel@tonic-gate logerr("loopback_cmd: bad command format or read returns " 2554*0Sstevel@tonic-gate "partial data %d\n", len); 2555*0Sstevel@tonic-gate } 2556*0Sstevel@tonic-gate 2557*0Sstevel@tonic-gate cmd = mpi.mi_command; 2558*0Sstevel@tonic-gate if (cmd >= MI_NCMD) { 2559*0Sstevel@tonic-gate logerr("loopback_cmd: unknown command id `%d'\n", cmd); 2560*0Sstevel@tonic-gate (void) close(newfd); 2561*0Sstevel@tonic-gate return; 2562*0Sstevel@tonic-gate } 2563*0Sstevel@tonic-gate 2564*0Sstevel@tonic-gate if (len < commands[cmd].size) { 2565*0Sstevel@tonic-gate logerr("loopback_cmd: short %s command (expected %d, got %d)\n", 2566*0Sstevel@tonic-gate commands[cmd].name, commands[cmd].size, len); 2567*0Sstevel@tonic-gate (void) close(newfd); 2568*0Sstevel@tonic-gate return; 2569*0Sstevel@tonic-gate } 2570*0Sstevel@tonic-gate 2571*0Sstevel@tonic-gate retval = process_cmd(newfd, &mpi); 2572*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) { 2573*0Sstevel@tonic-gate logerr("failed processing %s: %s\n", commands[cmd].name, 2574*0Sstevel@tonic-gate ipmp_errmsg(retval)); 2575*0Sstevel@tonic-gate } 2576*0Sstevel@tonic-gate (void) close(newfd); 2577*0Sstevel@tonic-gate } 2578*0Sstevel@tonic-gate 2579*0Sstevel@tonic-gate extern int global_errno; /* set by failover() or failback() */ 2580*0Sstevel@tonic-gate 2581*0Sstevel@tonic-gate /* 2582*0Sstevel@tonic-gate * Process the offline, undo offline and set original index commands, 2583*0Sstevel@tonic-gate * received from if_mpadm(1M) 2584*0Sstevel@tonic-gate */ 2585*0Sstevel@tonic-gate static unsigned int 2586*0Sstevel@tonic-gate process_cmd(int newfd, union mi_commands *mpi) 2587*0Sstevel@tonic-gate { 2588*0Sstevel@tonic-gate uint_t nif = 0; 2589*0Sstevel@tonic-gate uint32_t cmd; 2590*0Sstevel@tonic-gate struct phyint *pi; 2591*0Sstevel@tonic-gate struct phyint *pi2; 2592*0Sstevel@tonic-gate struct phyint_group *pg; 2593*0Sstevel@tonic-gate boolean_t success; 2594*0Sstevel@tonic-gate int error; 2595*0Sstevel@tonic-gate struct mi_offline *mio; 2596*0Sstevel@tonic-gate struct mi_undo_offline *miu; 2597*0Sstevel@tonic-gate struct lifreq lifr; 2598*0Sstevel@tonic-gate int ifsock; 2599*0Sstevel@tonic-gate struct mi_setoindex *mis; 2600*0Sstevel@tonic-gate 2601*0Sstevel@tonic-gate cmd = mpi->mi_command; 2602*0Sstevel@tonic-gate 2603*0Sstevel@tonic-gate switch (cmd) { 2604*0Sstevel@tonic-gate case MI_OFFLINE: 2605*0Sstevel@tonic-gate mio = &mpi->mi_ocmd; 2606*0Sstevel@tonic-gate /* 2607*0Sstevel@tonic-gate * Lookup the interface that needs to be offlined. 2608*0Sstevel@tonic-gate * If it does not exist, return a suitable error. 2609*0Sstevel@tonic-gate */ 2610*0Sstevel@tonic-gate pi = phyint_lookup(mio->mio_ifname); 2611*0Sstevel@tonic-gate if (pi == NULL) 2612*0Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, EINVAL)); 2613*0Sstevel@tonic-gate 2614*0Sstevel@tonic-gate /* 2615*0Sstevel@tonic-gate * Verify that the minimum redundancy requirements are met. 2616*0Sstevel@tonic-gate * The multipathing group must have at least the specified 2617*0Sstevel@tonic-gate * number of functional interfaces after offlining the 2618*0Sstevel@tonic-gate * requested interface. Otherwise return a suitable error. 2619*0Sstevel@tonic-gate */ 2620*0Sstevel@tonic-gate pg = pi->pi_group; 2621*0Sstevel@tonic-gate nif = 0; 2622*0Sstevel@tonic-gate if (pg != phyint_anongroup) { 2623*0Sstevel@tonic-gate for (nif = 0, pi2 = pg->pg_phyint; pi2 != NULL; 2624*0Sstevel@tonic-gate pi2 = pi2->pi_pgnext) { 2625*0Sstevel@tonic-gate if ((pi2->pi_state == PI_RUNNING) || 2626*0Sstevel@tonic-gate (pg->pg_groupfailed && 2627*0Sstevel@tonic-gate !(pi2->pi_flags & IFF_OFFLINE))) 2628*0Sstevel@tonic-gate nif++; 2629*0Sstevel@tonic-gate } 2630*0Sstevel@tonic-gate } 2631*0Sstevel@tonic-gate if (nif < mio->mio_min_redundancy) 2632*0Sstevel@tonic-gate return (send_result(newfd, IPMP_EMINRED, 0)); 2633*0Sstevel@tonic-gate 2634*0Sstevel@tonic-gate /* 2635*0Sstevel@tonic-gate * The order of operation is to set IFF_OFFLINE, followed by 2636*0Sstevel@tonic-gate * failover. Setting IFF_OFFLINE ensures that no new ipif's 2637*0Sstevel@tonic-gate * can be created. Subsequent failover moves everything on 2638*0Sstevel@tonic-gate * the OFFLINE interface to some other functional interface. 2639*0Sstevel@tonic-gate */ 2640*0Sstevel@tonic-gate success = change_lif_flags(pi, IFF_OFFLINE, _B_TRUE); 2641*0Sstevel@tonic-gate if (success) { 2642*0Sstevel@tonic-gate if (!pi->pi_empty) { 2643*0Sstevel@tonic-gate error = try_failover(pi, FAILOVER_NORMAL); 2644*0Sstevel@tonic-gate if (error != 0) { 2645*0Sstevel@tonic-gate if (!change_lif_flags(pi, IFF_OFFLINE, 2646*0Sstevel@tonic-gate _B_FALSE)) { 2647*0Sstevel@tonic-gate logerr("process_cmd: couldn't" 2648*0Sstevel@tonic-gate " clear OFFLINE flag on" 2649*0Sstevel@tonic-gate " %s\n", pi->pi_name); 2650*0Sstevel@tonic-gate /* 2651*0Sstevel@tonic-gate * Offline interfaces should 2652*0Sstevel@tonic-gate * not be probed. 2653*0Sstevel@tonic-gate */ 2654*0Sstevel@tonic-gate stop_probing(pi); 2655*0Sstevel@tonic-gate } 2656*0Sstevel@tonic-gate return (send_result(newfd, error, 2657*0Sstevel@tonic-gate global_errno)); 2658*0Sstevel@tonic-gate } 2659*0Sstevel@tonic-gate } 2660*0Sstevel@tonic-gate } else { 2661*0Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, errno)); 2662*0Sstevel@tonic-gate } 2663*0Sstevel@tonic-gate 2664*0Sstevel@tonic-gate /* 2665*0Sstevel@tonic-gate * The interface is now Offline, so stop probing it. 2666*0Sstevel@tonic-gate * Note that if_mpadm(1M) will down the test addresses, 2667*0Sstevel@tonic-gate * after receiving a success reply from us. The routing 2668*0Sstevel@tonic-gate * socket message will then make us close the socket used 2669*0Sstevel@tonic-gate * for sending probes. But it is more logical that an 2670*0Sstevel@tonic-gate * offlined interface must not be probed, even if it has 2671*0Sstevel@tonic-gate * test addresses. 2672*0Sstevel@tonic-gate */ 2673*0Sstevel@tonic-gate stop_probing(pi); 2674*0Sstevel@tonic-gate return (send_result(newfd, IPMP_SUCCESS, 0)); 2675*0Sstevel@tonic-gate 2676*0Sstevel@tonic-gate case MI_UNDO_OFFLINE: 2677*0Sstevel@tonic-gate miu = &mpi->mi_ucmd; 2678*0Sstevel@tonic-gate /* 2679*0Sstevel@tonic-gate * Undo the offline command. As usual lookup the interface. 2680*0Sstevel@tonic-gate * Send an error if it does not exist. 2681*0Sstevel@tonic-gate */ 2682*0Sstevel@tonic-gate pi = phyint_lookup(miu->miu_ifname); 2683*0Sstevel@tonic-gate if (pi == NULL) 2684*0Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, EINVAL)); 2685*0Sstevel@tonic-gate 2686*0Sstevel@tonic-gate /* 2687*0Sstevel@tonic-gate * Inverse of the offline operation. Do a failback, and then 2688*0Sstevel@tonic-gate * clear the IFF_OFFLINE flag. 2689*0Sstevel@tonic-gate */ 2690*0Sstevel@tonic-gate error = do_failback(pi, _B_TRUE); 2691*0Sstevel@tonic-gate if (error == IPMP_EFBPARTIAL) 2692*0Sstevel@tonic-gate return (send_result(newfd, IPMP_EFBPARTIAL, 0)); 2693*0Sstevel@tonic-gate error = do_failback(pi, _B_FALSE); 2694*0Sstevel@tonic-gate 2695*0Sstevel@tonic-gate switch (error) { 2696*0Sstevel@tonic-gate case IPMP_SUCCESS: 2697*0Sstevel@tonic-gate if (!change_lif_flags(pi, IFF_OFFLINE, _B_FALSE)) { 2698*0Sstevel@tonic-gate logdebug("undo error %X\n", global_errno); 2699*0Sstevel@tonic-gate error = IPMP_FAILURE; 2700*0Sstevel@tonic-gate break; 2701*0Sstevel@tonic-gate } 2702*0Sstevel@tonic-gate /* FALLTHROUGH */ 2703*0Sstevel@tonic-gate 2704*0Sstevel@tonic-gate case IPMP_EFBPARTIAL: 2705*0Sstevel@tonic-gate /* 2706*0Sstevel@tonic-gate * Reset the state of the interface based on the 2707*0Sstevel@tonic-gate * current link state; if this phyint subsequently 2708*0Sstevel@tonic-gate * acquires a test address, the state will be changed 2709*0Sstevel@tonic-gate * again later as a result of the probes. 2710*0Sstevel@tonic-gate */ 2711*0Sstevel@tonic-gate if (LINK_UP(pi)) 2712*0Sstevel@tonic-gate phyint_chstate(pi, PI_RUNNING); 2713*0Sstevel@tonic-gate else 2714*0Sstevel@tonic-gate phyint_chstate(pi, PI_FAILED); 2715*0Sstevel@tonic-gate break; 2716*0Sstevel@tonic-gate 2717*0Sstevel@tonic-gate case IPMP_FAILURE: 2718*0Sstevel@tonic-gate break; 2719*0Sstevel@tonic-gate 2720*0Sstevel@tonic-gate default: 2721*0Sstevel@tonic-gate logdebug("do_failback: unexpected return value\n"); 2722*0Sstevel@tonic-gate break; 2723*0Sstevel@tonic-gate } 2724*0Sstevel@tonic-gate return (send_result(newfd, error, global_errno)); 2725*0Sstevel@tonic-gate 2726*0Sstevel@tonic-gate case MI_SETOINDEX: 2727*0Sstevel@tonic-gate mis = &mpi->mi_scmd; 2728*0Sstevel@tonic-gate 2729*0Sstevel@tonic-gate /* Get the socket for doing ioctls */ 2730*0Sstevel@tonic-gate ifsock = (mis->mis_iftype == AF_INET) ? ifsock_v4 : ifsock_v6; 2731*0Sstevel@tonic-gate 2732*0Sstevel@tonic-gate /* 2733*0Sstevel@tonic-gate * Get index of new original interface. 2734*0Sstevel@tonic-gate * The index is returned in lifr.lifr_index. 2735*0Sstevel@tonic-gate */ 2736*0Sstevel@tonic-gate (void) strlcpy(lifr.lifr_name, mis->mis_new_pifname, 2737*0Sstevel@tonic-gate sizeof (lifr.lifr_name)); 2738*0Sstevel@tonic-gate 2739*0Sstevel@tonic-gate if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) 2740*0Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, errno)); 2741*0Sstevel@tonic-gate 2742*0Sstevel@tonic-gate /* 2743*0Sstevel@tonic-gate * Set new original interface index. 2744*0Sstevel@tonic-gate * The new index was put into lifr.lifr_index by the 2745*0Sstevel@tonic-gate * SIOCGLIFINDEX ioctl. 2746*0Sstevel@tonic-gate */ 2747*0Sstevel@tonic-gate (void) strlcpy(lifr.lifr_name, mis->mis_lifname, 2748*0Sstevel@tonic-gate sizeof (lifr.lifr_name)); 2749*0Sstevel@tonic-gate 2750*0Sstevel@tonic-gate if (ioctl(ifsock, SIOCSLIFOINDEX, (char *)&lifr) < 0) 2751*0Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, errno)); 2752*0Sstevel@tonic-gate 2753*0Sstevel@tonic-gate return (send_result(newfd, IPMP_SUCCESS, 0)); 2754*0Sstevel@tonic-gate 2755*0Sstevel@tonic-gate case MI_QUERY: 2756*0Sstevel@tonic-gate return (process_query(newfd, &mpi->mi_qcmd)); 2757*0Sstevel@tonic-gate 2758*0Sstevel@tonic-gate default: 2759*0Sstevel@tonic-gate break; 2760*0Sstevel@tonic-gate } 2761*0Sstevel@tonic-gate 2762*0Sstevel@tonic-gate return (send_result(newfd, IPMP_EPROTO, 0)); 2763*0Sstevel@tonic-gate } 2764*0Sstevel@tonic-gate 2765*0Sstevel@tonic-gate /* 2766*0Sstevel@tonic-gate * Process the query request pointed to by `miq' and send a reply on file 2767*0Sstevel@tonic-gate * descriptor `fd'. Returns an IPMP error code. 2768*0Sstevel@tonic-gate */ 2769*0Sstevel@tonic-gate static unsigned int 2770*0Sstevel@tonic-gate process_query(int fd, mi_query_t *miq) 2771*0Sstevel@tonic-gate { 2772*0Sstevel@tonic-gate ipmp_groupinfo_t *grinfop; 2773*0Sstevel@tonic-gate ipmp_groupinfolist_t *grlp; 2774*0Sstevel@tonic-gate ipmp_grouplist_t *grlistp; 2775*0Sstevel@tonic-gate ipmp_ifinfo_t *ifinfop; 2776*0Sstevel@tonic-gate ipmp_ifinfolist_t *iflp; 2777*0Sstevel@tonic-gate ipmp_snap_t *snap; 2778*0Sstevel@tonic-gate unsigned int retval; 2779*0Sstevel@tonic-gate 2780*0Sstevel@tonic-gate switch (miq->miq_inforeq) { 2781*0Sstevel@tonic-gate case IPMP_GROUPLIST: 2782*0Sstevel@tonic-gate retval = getgrouplist(&grlistp); 2783*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2784*0Sstevel@tonic-gate return (send_result(fd, retval, errno)); 2785*0Sstevel@tonic-gate 2786*0Sstevel@tonic-gate retval = send_result(fd, IPMP_SUCCESS, 0); 2787*0Sstevel@tonic-gate if (retval == IPMP_SUCCESS) 2788*0Sstevel@tonic-gate retval = send_grouplist(fd, grlistp); 2789*0Sstevel@tonic-gate 2790*0Sstevel@tonic-gate ipmp_freegrouplist(grlistp); 2791*0Sstevel@tonic-gate return (retval); 2792*0Sstevel@tonic-gate 2793*0Sstevel@tonic-gate case IPMP_GROUPINFO: 2794*0Sstevel@tonic-gate miq->miq_grname[LIFGRNAMSIZ - 1] = '\0'; 2795*0Sstevel@tonic-gate retval = getgroupinfo(miq->miq_ifname, &grinfop); 2796*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2797*0Sstevel@tonic-gate return (send_result(fd, retval, errno)); 2798*0Sstevel@tonic-gate 2799*0Sstevel@tonic-gate retval = send_result(fd, IPMP_SUCCESS, 0); 2800*0Sstevel@tonic-gate if (retval == IPMP_SUCCESS) 2801*0Sstevel@tonic-gate retval = send_groupinfo(fd, grinfop); 2802*0Sstevel@tonic-gate 2803*0Sstevel@tonic-gate ipmp_freegroupinfo(grinfop); 2804*0Sstevel@tonic-gate return (retval); 2805*0Sstevel@tonic-gate 2806*0Sstevel@tonic-gate case IPMP_IFINFO: 2807*0Sstevel@tonic-gate miq->miq_ifname[LIFNAMSIZ - 1] = '\0'; 2808*0Sstevel@tonic-gate retval = getifinfo(miq->miq_ifname, &ifinfop); 2809*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2810*0Sstevel@tonic-gate return (send_result(fd, retval, errno)); 2811*0Sstevel@tonic-gate 2812*0Sstevel@tonic-gate retval = send_result(fd, IPMP_SUCCESS, 0); 2813*0Sstevel@tonic-gate if (retval == IPMP_SUCCESS) 2814*0Sstevel@tonic-gate retval = send_ifinfo(fd, ifinfop); 2815*0Sstevel@tonic-gate 2816*0Sstevel@tonic-gate ipmp_freeifinfo(ifinfop); 2817*0Sstevel@tonic-gate return (retval); 2818*0Sstevel@tonic-gate 2819*0Sstevel@tonic-gate case IPMP_SNAP: 2820*0Sstevel@tonic-gate retval = getsnap(&snap); 2821*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2822*0Sstevel@tonic-gate return (send_result(fd, retval, errno)); 2823*0Sstevel@tonic-gate 2824*0Sstevel@tonic-gate retval = send_result(fd, IPMP_SUCCESS, 0); 2825*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2826*0Sstevel@tonic-gate goto out; 2827*0Sstevel@tonic-gate 2828*0Sstevel@tonic-gate retval = ipmp_writetlv(fd, IPMP_SNAP, sizeof (*snap), snap); 2829*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2830*0Sstevel@tonic-gate goto out; 2831*0Sstevel@tonic-gate 2832*0Sstevel@tonic-gate retval = send_grouplist(fd, snap->sn_grlistp); 2833*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2834*0Sstevel@tonic-gate goto out; 2835*0Sstevel@tonic-gate 2836*0Sstevel@tonic-gate iflp = snap->sn_ifinfolistp; 2837*0Sstevel@tonic-gate for (; iflp != NULL; iflp = iflp->ifl_next) { 2838*0Sstevel@tonic-gate retval = send_ifinfo(fd, iflp->ifl_ifinfop); 2839*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2840*0Sstevel@tonic-gate goto out; 2841*0Sstevel@tonic-gate } 2842*0Sstevel@tonic-gate 2843*0Sstevel@tonic-gate grlp = snap->sn_grinfolistp; 2844*0Sstevel@tonic-gate for (; grlp != NULL; grlp = grlp->grl_next) { 2845*0Sstevel@tonic-gate retval = send_groupinfo(fd, grlp->grl_grinfop); 2846*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2847*0Sstevel@tonic-gate goto out; 2848*0Sstevel@tonic-gate } 2849*0Sstevel@tonic-gate out: 2850*0Sstevel@tonic-gate ipmp_snap_free(snap); 2851*0Sstevel@tonic-gate return (retval); 2852*0Sstevel@tonic-gate 2853*0Sstevel@tonic-gate default: 2854*0Sstevel@tonic-gate break; 2855*0Sstevel@tonic-gate 2856*0Sstevel@tonic-gate } 2857*0Sstevel@tonic-gate return (send_result(fd, IPMP_EPROTO, 0)); 2858*0Sstevel@tonic-gate } 2859*0Sstevel@tonic-gate 2860*0Sstevel@tonic-gate /* 2861*0Sstevel@tonic-gate * Send the group information pointed to by `grinfop' on file descriptor `fd'. 2862*0Sstevel@tonic-gate * Returns an IPMP error code. 2863*0Sstevel@tonic-gate */ 2864*0Sstevel@tonic-gate static unsigned int 2865*0Sstevel@tonic-gate send_groupinfo(int fd, ipmp_groupinfo_t *grinfop) 2866*0Sstevel@tonic-gate { 2867*0Sstevel@tonic-gate ipmp_iflist_t *iflistp = grinfop->gr_iflistp; 2868*0Sstevel@tonic-gate unsigned int retval; 2869*0Sstevel@tonic-gate 2870*0Sstevel@tonic-gate retval = ipmp_writetlv(fd, IPMP_GROUPINFO, sizeof (*grinfop), grinfop); 2871*0Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 2872*0Sstevel@tonic-gate return (retval); 2873*0Sstevel@tonic-gate 2874*0Sstevel@tonic-gate return (ipmp_writetlv(fd, IPMP_IFLIST, 2875*0Sstevel@tonic-gate IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp)); 2876*0Sstevel@tonic-gate } 2877*0Sstevel@tonic-gate 2878*0Sstevel@tonic-gate /* 2879*0Sstevel@tonic-gate * Send the interface information pointed to by `ifinfop' on file descriptor 2880*0Sstevel@tonic-gate * `fd'. Returns an IPMP error code. 2881*0Sstevel@tonic-gate */ 2882*0Sstevel@tonic-gate static unsigned int 2883*0Sstevel@tonic-gate send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop) 2884*0Sstevel@tonic-gate { 2885*0Sstevel@tonic-gate return (ipmp_writetlv(fd, IPMP_IFINFO, sizeof (*ifinfop), ifinfop)); 2886*0Sstevel@tonic-gate } 2887*0Sstevel@tonic-gate 2888*0Sstevel@tonic-gate /* 2889*0Sstevel@tonic-gate * Send the group list pointed to by `grlistp' on file descriptor `fd'. 2890*0Sstevel@tonic-gate * Returns an IPMP error code. 2891*0Sstevel@tonic-gate */ 2892*0Sstevel@tonic-gate static unsigned int 2893*0Sstevel@tonic-gate send_grouplist(int fd, ipmp_grouplist_t *grlistp) 2894*0Sstevel@tonic-gate { 2895*0Sstevel@tonic-gate return (ipmp_writetlv(fd, IPMP_GROUPLIST, 2896*0Sstevel@tonic-gate IPMP_GROUPLIST_SIZE(grlistp->gl_ngroup), grlistp)); 2897*0Sstevel@tonic-gate } 2898*0Sstevel@tonic-gate 2899*0Sstevel@tonic-gate /* 2900*0Sstevel@tonic-gate * Initialize an mi_result_t structure using `error' and `syserror' and 2901*0Sstevel@tonic-gate * send it on file descriptor `fd'. Returns an IPMP error code. 2902*0Sstevel@tonic-gate */ 2903*0Sstevel@tonic-gate static unsigned int 2904*0Sstevel@tonic-gate send_result(int fd, unsigned int error, int syserror) 2905*0Sstevel@tonic-gate { 2906*0Sstevel@tonic-gate mi_result_t me; 2907*0Sstevel@tonic-gate 2908*0Sstevel@tonic-gate me.me_mpathd_error = error; 2909*0Sstevel@tonic-gate if (error == IPMP_FAILURE) 2910*0Sstevel@tonic-gate me.me_sys_error = syserror; 2911*0Sstevel@tonic-gate else 2912*0Sstevel@tonic-gate me.me_sys_error = 0; 2913*0Sstevel@tonic-gate 2914*0Sstevel@tonic-gate return (ipmp_write(fd, &me, sizeof (me))); 2915*0Sstevel@tonic-gate } 2916*0Sstevel@tonic-gate 2917*0Sstevel@tonic-gate /* 2918*0Sstevel@tonic-gate * Daemonize the process. 2919*0Sstevel@tonic-gate */ 2920*0Sstevel@tonic-gate static boolean_t 2921*0Sstevel@tonic-gate daemonize(void) 2922*0Sstevel@tonic-gate { 2923*0Sstevel@tonic-gate switch (fork()) { 2924*0Sstevel@tonic-gate case -1: 2925*0Sstevel@tonic-gate return (_B_FALSE); 2926*0Sstevel@tonic-gate 2927*0Sstevel@tonic-gate case 0: 2928*0Sstevel@tonic-gate /* 2929*0Sstevel@tonic-gate * Lose our controlling terminal, and become both a session 2930*0Sstevel@tonic-gate * leader and a process group leader. 2931*0Sstevel@tonic-gate */ 2932*0Sstevel@tonic-gate if (setsid() == -1) 2933*0Sstevel@tonic-gate return (_B_FALSE); 2934*0Sstevel@tonic-gate 2935*0Sstevel@tonic-gate /* 2936*0Sstevel@tonic-gate * Under POSIX, a session leader can accidentally (through 2937*0Sstevel@tonic-gate * open(2)) acquire a controlling terminal if it does not 2938*0Sstevel@tonic-gate * have one. Just to be safe, fork() again so we are not a 2939*0Sstevel@tonic-gate * session leader. 2940*0Sstevel@tonic-gate */ 2941*0Sstevel@tonic-gate switch (fork()) { 2942*0Sstevel@tonic-gate case -1: 2943*0Sstevel@tonic-gate return (_B_FALSE); 2944*0Sstevel@tonic-gate 2945*0Sstevel@tonic-gate case 0: 2946*0Sstevel@tonic-gate (void) chdir("/"); 2947*0Sstevel@tonic-gate (void) umask(022); 2948*0Sstevel@tonic-gate (void) fdwalk(closefunc, NULL); 2949*0Sstevel@tonic-gate break; 2950*0Sstevel@tonic-gate 2951*0Sstevel@tonic-gate default: 2952*0Sstevel@tonic-gate _exit(EXIT_SUCCESS); 2953*0Sstevel@tonic-gate } 2954*0Sstevel@tonic-gate break; 2955*0Sstevel@tonic-gate 2956*0Sstevel@tonic-gate default: 2957*0Sstevel@tonic-gate _exit(EXIT_SUCCESS); 2958*0Sstevel@tonic-gate } 2959*0Sstevel@tonic-gate 2960*0Sstevel@tonic-gate return (_B_TRUE); 2961*0Sstevel@tonic-gate } 2962*0Sstevel@tonic-gate 2963*0Sstevel@tonic-gate /* 2964*0Sstevel@tonic-gate * The parent has created some fds before forking on purpose, keep them open. 2965*0Sstevel@tonic-gate */ 2966*0Sstevel@tonic-gate static int 2967*0Sstevel@tonic-gate closefunc(void *not_used, int fd) 2968*0Sstevel@tonic-gate /* ARGSUSED */ 2969*0Sstevel@tonic-gate { 2970*0Sstevel@tonic-gate if (fd != lsock_v4 && fd != lsock_v6) 2971*0Sstevel@tonic-gate (void) close(fd); 2972*0Sstevel@tonic-gate return (0); 2973*0Sstevel@tonic-gate } 2974*0Sstevel@tonic-gate 2975*0Sstevel@tonic-gate /* LOGGER */ 2976*0Sstevel@tonic-gate 2977*0Sstevel@tonic-gate #include <syslog.h> 2978*0Sstevel@tonic-gate 2979*0Sstevel@tonic-gate /* 2980*0Sstevel@tonic-gate * Logging routines. All routines log to syslog, unless the daemon is 2981*0Sstevel@tonic-gate * running in the foreground, in which case the logging goes to stderr. 2982*0Sstevel@tonic-gate * 2983*0Sstevel@tonic-gate * The following routines are available: 2984*0Sstevel@tonic-gate * 2985*0Sstevel@tonic-gate * logdebug(): A printf-like function for outputting debug messages 2986*0Sstevel@tonic-gate * (messages at LOG_DEBUG) that are only of use to developers. 2987*0Sstevel@tonic-gate * 2988*0Sstevel@tonic-gate * logtrace(): A printf-like function for outputting tracing messages 2989*0Sstevel@tonic-gate * (messages at LOG_INFO) from the daemon. This is typically used 2990*0Sstevel@tonic-gate * to log the receipt of interesting network-related conditions. 2991*0Sstevel@tonic-gate * 2992*0Sstevel@tonic-gate * logerr(): A printf-like function for outputting error messages 2993*0Sstevel@tonic-gate * (messages at LOG_ERR) from the daemon. 2994*0Sstevel@tonic-gate * 2995*0Sstevel@tonic-gate * logperror*(): A set of functions used to output error messages 2996*0Sstevel@tonic-gate * (messages at LOG_ERR); these automatically append strerror(errno) 2997*0Sstevel@tonic-gate * and a newline to the message passed to them. 2998*0Sstevel@tonic-gate * 2999*0Sstevel@tonic-gate * NOTE: since the logging functions write to syslog, the messages passed 3000*0Sstevel@tonic-gate * to them are not eligible for localization. Thus, gettext() must 3001*0Sstevel@tonic-gate * *not* be used. 3002*0Sstevel@tonic-gate */ 3003*0Sstevel@tonic-gate 3004*0Sstevel@tonic-gate static int logging = 0; 3005*0Sstevel@tonic-gate 3006*0Sstevel@tonic-gate static void 3007*0Sstevel@tonic-gate initlog(void) 3008*0Sstevel@tonic-gate { 3009*0Sstevel@tonic-gate logging++; 3010*0Sstevel@tonic-gate openlog("in.mpathd", LOG_PID | LOG_CONS, LOG_DAEMON); 3011*0Sstevel@tonic-gate } 3012*0Sstevel@tonic-gate 3013*0Sstevel@tonic-gate /* PRINTFLIKE1 */ 3014*0Sstevel@tonic-gate void 3015*0Sstevel@tonic-gate logerr(char *fmt, ...) 3016*0Sstevel@tonic-gate { 3017*0Sstevel@tonic-gate va_list ap; 3018*0Sstevel@tonic-gate 3019*0Sstevel@tonic-gate va_start(ap, fmt); 3020*0Sstevel@tonic-gate 3021*0Sstevel@tonic-gate if (logging) 3022*0Sstevel@tonic-gate vsyslog(LOG_ERR, fmt, ap); 3023*0Sstevel@tonic-gate else 3024*0Sstevel@tonic-gate (void) vfprintf(stderr, fmt, ap); 3025*0Sstevel@tonic-gate va_end(ap); 3026*0Sstevel@tonic-gate } 3027*0Sstevel@tonic-gate 3028*0Sstevel@tonic-gate /* PRINTFLIKE1 */ 3029*0Sstevel@tonic-gate void 3030*0Sstevel@tonic-gate logtrace(char *fmt, ...) 3031*0Sstevel@tonic-gate { 3032*0Sstevel@tonic-gate va_list ap; 3033*0Sstevel@tonic-gate 3034*0Sstevel@tonic-gate va_start(ap, fmt); 3035*0Sstevel@tonic-gate 3036*0Sstevel@tonic-gate if (logging) 3037*0Sstevel@tonic-gate vsyslog(LOG_INFO, fmt, ap); 3038*0Sstevel@tonic-gate else 3039*0Sstevel@tonic-gate (void) vfprintf(stderr, fmt, ap); 3040*0Sstevel@tonic-gate va_end(ap); 3041*0Sstevel@tonic-gate } 3042*0Sstevel@tonic-gate 3043*0Sstevel@tonic-gate /* PRINTFLIKE1 */ 3044*0Sstevel@tonic-gate void 3045*0Sstevel@tonic-gate logdebug(char *fmt, ...) 3046*0Sstevel@tonic-gate { 3047*0Sstevel@tonic-gate va_list ap; 3048*0Sstevel@tonic-gate 3049*0Sstevel@tonic-gate va_start(ap, fmt); 3050*0Sstevel@tonic-gate 3051*0Sstevel@tonic-gate if (logging) 3052*0Sstevel@tonic-gate vsyslog(LOG_DEBUG, fmt, ap); 3053*0Sstevel@tonic-gate else 3054*0Sstevel@tonic-gate (void) vfprintf(stderr, fmt, ap); 3055*0Sstevel@tonic-gate va_end(ap); 3056*0Sstevel@tonic-gate } 3057*0Sstevel@tonic-gate 3058*0Sstevel@tonic-gate /* PRINTFLIKE1 */ 3059*0Sstevel@tonic-gate void 3060*0Sstevel@tonic-gate logperror(char *str) 3061*0Sstevel@tonic-gate { 3062*0Sstevel@tonic-gate if (logging) 3063*0Sstevel@tonic-gate syslog(LOG_ERR, "%s: %m\n", str); 3064*0Sstevel@tonic-gate else 3065*0Sstevel@tonic-gate (void) fprintf(stderr, "%s: %s\n", str, strerror(errno)); 3066*0Sstevel@tonic-gate } 3067*0Sstevel@tonic-gate 3068*0Sstevel@tonic-gate void 3069*0Sstevel@tonic-gate logperror_pii(struct phyint_instance *pii, char *str) 3070*0Sstevel@tonic-gate { 3071*0Sstevel@tonic-gate if (logging) { 3072*0Sstevel@tonic-gate syslog(LOG_ERR, "%s (%s %s): %m\n", 3073*0Sstevel@tonic-gate str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name); 3074*0Sstevel@tonic-gate } else { 3075*0Sstevel@tonic-gate (void) fprintf(stderr, "%s (%s %s): %s\n", 3076*0Sstevel@tonic-gate str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name, 3077*0Sstevel@tonic-gate strerror(errno)); 3078*0Sstevel@tonic-gate } 3079*0Sstevel@tonic-gate } 3080*0Sstevel@tonic-gate 3081*0Sstevel@tonic-gate void 3082*0Sstevel@tonic-gate logperror_li(struct logint *li, char *str) 3083*0Sstevel@tonic-gate { 3084*0Sstevel@tonic-gate struct phyint_instance *pii = li->li_phyint_inst; 3085*0Sstevel@tonic-gate 3086*0Sstevel@tonic-gate if (logging) { 3087*0Sstevel@tonic-gate syslog(LOG_ERR, "%s (%s %s): %m\n", 3088*0Sstevel@tonic-gate str, AF_STR(pii->pii_af), li->li_name); 3089*0Sstevel@tonic-gate } else { 3090*0Sstevel@tonic-gate (void) fprintf(stderr, "%s (%s %s): %s\n", 3091*0Sstevel@tonic-gate str, AF_STR(pii->pii_af), li->li_name, 3092*0Sstevel@tonic-gate strerror(errno)); 3093*0Sstevel@tonic-gate } 3094*0Sstevel@tonic-gate } 3095*0Sstevel@tonic-gate 3096*0Sstevel@tonic-gate void 3097*0Sstevel@tonic-gate close_probe_socket(struct phyint_instance *pii, boolean_t polled) 3098*0Sstevel@tonic-gate { 3099*0Sstevel@tonic-gate if (polled) 3100*0Sstevel@tonic-gate (void) poll_remove(pii->pii_probe_sock); 3101*0Sstevel@tonic-gate (void) close(pii->pii_probe_sock); 3102*0Sstevel@tonic-gate pii->pii_probe_sock = -1; 3103*0Sstevel@tonic-gate pii->pii_basetime_inited = 0; 3104*0Sstevel@tonic-gate } 3105