10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52074Smeem * Common Development and Distribution License (the "License"). 62074Smeem * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 222074Smeem * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate #include "mpd_defs.h" 290Sstevel@tonic-gate #include "mpd_tables.h" 300Sstevel@tonic-gate 310Sstevel@tonic-gate int debug = 0; /* Debug flag */ 320Sstevel@tonic-gate static int pollfd_num = 0; /* Num. of poll descriptors */ 330Sstevel@tonic-gate static struct pollfd *pollfds = NULL; /* Array of poll descriptors */ 340Sstevel@tonic-gate 350Sstevel@tonic-gate /* All times below in ms */ 360Sstevel@tonic-gate int user_failure_detection_time; /* user specified failure detection */ 370Sstevel@tonic-gate /* time (fdt) */ 380Sstevel@tonic-gate int user_probe_interval; /* derived from user specified fdt */ 390Sstevel@tonic-gate 400Sstevel@tonic-gate static int rtsock_v4; /* AF_INET routing socket */ 410Sstevel@tonic-gate static int rtsock_v6; /* AF_INET6 routing socket */ 420Sstevel@tonic-gate int ifsock_v4 = -1; /* IPv4 socket for ioctls */ 430Sstevel@tonic-gate int ifsock_v6 = -1; /* IPv6 socket for ioctls */ 440Sstevel@tonic-gate static int lsock_v4; /* Listen socket to detect mpathd */ 450Sstevel@tonic-gate static int lsock_v6; /* Listen socket to detect mpathd */ 460Sstevel@tonic-gate static int mibfd = -1; /* fd to get mib info */ 470Sstevel@tonic-gate static boolean_t force_mcast = _B_FALSE; /* Only for test purposes */ 480Sstevel@tonic-gate 490Sstevel@tonic-gate boolean_t full_scan_required = _B_FALSE; 500Sstevel@tonic-gate static uint_t last_initifs_time; /* Time when initifs was last run */ 510Sstevel@tonic-gate static char **argv0; /* Saved for re-exec on SIGHUP */ 520Sstevel@tonic-gate boolean_t handle_link_notifications = _B_TRUE; 530Sstevel@tonic-gate 540Sstevel@tonic-gate static void initlog(void); 550Sstevel@tonic-gate static void run_timeouts(void); 560Sstevel@tonic-gate static void initifs(void); 570Sstevel@tonic-gate static void check_if_removed(struct phyint_instance *pii); 580Sstevel@tonic-gate static void select_test_ifs(void); 590Sstevel@tonic-gate static void ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len); 600Sstevel@tonic-gate static void ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len); 610Sstevel@tonic-gate static void router_add_v4(mib2_ipRouteEntry_t *rp1, 620Sstevel@tonic-gate struct in_addr nexthop_v4); 630Sstevel@tonic-gate static void router_add_v6(mib2_ipv6RouteEntry_t *rp1, 640Sstevel@tonic-gate struct in6_addr nexthop_v6); 650Sstevel@tonic-gate static void router_add_common(int af, char *ifname, 660Sstevel@tonic-gate struct in6_addr nexthop); 670Sstevel@tonic-gate static void init_router_targets(); 680Sstevel@tonic-gate static void cleanup(void); 690Sstevel@tonic-gate static int setup_listener(int af); 700Sstevel@tonic-gate static void check_config(void); 710Sstevel@tonic-gate static void check_addr_unique(int af, char *name); 720Sstevel@tonic-gate static void init_host_targets(void); 730Sstevel@tonic-gate static void dup_host_targets(struct phyint_instance *desired_pii); 740Sstevel@tonic-gate static void loopback_cmd(int sock, int family); 750Sstevel@tonic-gate static int poll_remove(int fd); 760Sstevel@tonic-gate static boolean_t daemonize(void); 770Sstevel@tonic-gate static int closefunc(void *, int); 780Sstevel@tonic-gate static unsigned int process_cmd(int newfd, union mi_commands *mpi); 790Sstevel@tonic-gate static unsigned int process_query(int fd, mi_query_t *miq); 800Sstevel@tonic-gate static unsigned int send_groupinfo(int fd, ipmp_groupinfo_t *grinfop); 810Sstevel@tonic-gate static unsigned int send_grouplist(int fd, ipmp_grouplist_t *grlistp); 820Sstevel@tonic-gate static unsigned int send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop); 830Sstevel@tonic-gate static unsigned int send_result(int fd, unsigned int error, int syserror); 840Sstevel@tonic-gate 85*2250Srk129064 struct local_addr *laddr_list = NULL; 86*2250Srk129064 870Sstevel@tonic-gate /* 880Sstevel@tonic-gate * Return the current time in milliseconds (from an arbitrary reference) 890Sstevel@tonic-gate * truncated to fit into an int. Truncation is ok since we are interested 900Sstevel@tonic-gate * only in differences and not the absolute values. 910Sstevel@tonic-gate */ 920Sstevel@tonic-gate uint_t 930Sstevel@tonic-gate getcurrenttime(void) 940Sstevel@tonic-gate { 950Sstevel@tonic-gate uint_t cur_time; /* In ms */ 960Sstevel@tonic-gate 970Sstevel@tonic-gate /* 980Sstevel@tonic-gate * Use of a non-user-adjustable source of time is 990Sstevel@tonic-gate * required. However millisecond precision is sufficient. 1000Sstevel@tonic-gate * divide by 10^6 1010Sstevel@tonic-gate */ 1020Sstevel@tonic-gate cur_time = (uint_t)(gethrtime() / 1000000LL); 1030Sstevel@tonic-gate return (cur_time); 1040Sstevel@tonic-gate } 1050Sstevel@tonic-gate 1060Sstevel@tonic-gate /* 1070Sstevel@tonic-gate * Add fd to the set being polled. Returns 0 if ok; -1 if failed. 1080Sstevel@tonic-gate */ 1090Sstevel@tonic-gate int 1100Sstevel@tonic-gate poll_add(int fd) 1110Sstevel@tonic-gate { 1120Sstevel@tonic-gate int i; 1130Sstevel@tonic-gate int new_num; 1140Sstevel@tonic-gate struct pollfd *newfds; 1150Sstevel@tonic-gate retry: 1160Sstevel@tonic-gate /* Check if already present */ 1170Sstevel@tonic-gate for (i = 0; i < pollfd_num; i++) { 1180Sstevel@tonic-gate if (pollfds[i].fd == fd) 1190Sstevel@tonic-gate return (0); 1200Sstevel@tonic-gate } 1210Sstevel@tonic-gate /* Check for empty spot already present */ 1220Sstevel@tonic-gate for (i = 0; i < pollfd_num; i++) { 1230Sstevel@tonic-gate if (pollfds[i].fd == -1) { 1240Sstevel@tonic-gate pollfds[i].fd = fd; 1250Sstevel@tonic-gate return (0); 1260Sstevel@tonic-gate } 1270Sstevel@tonic-gate } 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate /* Allocate space for 32 more fds and initialize to -1 */ 1300Sstevel@tonic-gate new_num = pollfd_num + 32; 1310Sstevel@tonic-gate newfds = realloc(pollfds, new_num * sizeof (struct pollfd)); 1320Sstevel@tonic-gate if (newfds == NULL) { 1330Sstevel@tonic-gate logperror("poll_add: realloc"); 1340Sstevel@tonic-gate return (-1); 1350Sstevel@tonic-gate } 1360Sstevel@tonic-gate for (i = pollfd_num; i < new_num; i++) { 1370Sstevel@tonic-gate newfds[i].fd = -1; 1380Sstevel@tonic-gate newfds[i].events = POLLIN; 1390Sstevel@tonic-gate } 1400Sstevel@tonic-gate pollfd_num = new_num; 1410Sstevel@tonic-gate pollfds = newfds; 1420Sstevel@tonic-gate goto retry; 1430Sstevel@tonic-gate } 1440Sstevel@tonic-gate 1450Sstevel@tonic-gate /* 1460Sstevel@tonic-gate * Remove fd from the set being polled. Returns 0 if ok; -1 if failed. 1470Sstevel@tonic-gate */ 1480Sstevel@tonic-gate static int 1490Sstevel@tonic-gate poll_remove(int fd) 1500Sstevel@tonic-gate { 1510Sstevel@tonic-gate int i; 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate /* Check if already present */ 1540Sstevel@tonic-gate for (i = 0; i < pollfd_num; i++) { 1550Sstevel@tonic-gate if (pollfds[i].fd == fd) { 1560Sstevel@tonic-gate pollfds[i].fd = -1; 1570Sstevel@tonic-gate return (0); 1580Sstevel@tonic-gate } 1590Sstevel@tonic-gate } 1600Sstevel@tonic-gate return (-1); 1610Sstevel@tonic-gate } 1620Sstevel@tonic-gate 1630Sstevel@tonic-gate /* 1640Sstevel@tonic-gate * Extract information about the phyint instance. If the phyint instance still 1650Sstevel@tonic-gate * exists in the kernel then set pii_in_use, else clear it. check_if_removed() 1660Sstevel@tonic-gate * will use it to detect phyint instances that don't exist any longer and 1670Sstevel@tonic-gate * remove them, from our database of phyint instances. 1680Sstevel@tonic-gate * Return value: 1690Sstevel@tonic-gate * returns true if the phyint instance exists in the kernel, 1700Sstevel@tonic-gate * returns false otherwise 1710Sstevel@tonic-gate */ 1720Sstevel@tonic-gate static boolean_t 1730Sstevel@tonic-gate pii_process(int af, char *name, struct phyint_instance **pii_p) 1740Sstevel@tonic-gate { 1750Sstevel@tonic-gate int err; 1760Sstevel@tonic-gate struct phyint_instance *pii; 1770Sstevel@tonic-gate struct phyint_instance *pii_other; 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate if (debug & D_PHYINT) 1800Sstevel@tonic-gate logdebug("pii_process(%s %s)\n", AF_STR(af), name); 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate pii = phyint_inst_lookup(af, name); 1830Sstevel@tonic-gate if (pii == NULL) { 1840Sstevel@tonic-gate /* 1850Sstevel@tonic-gate * Phyint instance does not exist in our tables, 1860Sstevel@tonic-gate * create new phyint instance 1870Sstevel@tonic-gate */ 1880Sstevel@tonic-gate pii = phyint_inst_init_from_k(af, name); 1890Sstevel@tonic-gate } else { 1900Sstevel@tonic-gate /* Phyint exists in our tables */ 1910Sstevel@tonic-gate err = phyint_inst_update_from_k(pii); 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate switch (err) { 1940Sstevel@tonic-gate case PI_IOCTL_ERROR: 1950Sstevel@tonic-gate /* Some ioctl error. don't change anything */ 1960Sstevel@tonic-gate pii->pii_in_use = 1; 1970Sstevel@tonic-gate break; 1980Sstevel@tonic-gate 1990Sstevel@tonic-gate case PI_GROUP_CHANGED: 2000Sstevel@tonic-gate /* 2010Sstevel@tonic-gate * The phyint has changed group. 2020Sstevel@tonic-gate */ 2030Sstevel@tonic-gate restore_phyint(pii->pii_phyint); 2040Sstevel@tonic-gate /* FALLTHRU */ 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate case PI_IFINDEX_CHANGED: 2070Sstevel@tonic-gate /* 2080Sstevel@tonic-gate * Interface index has changed. Delete and 2090Sstevel@tonic-gate * recreate the phyint as it is quite likely 2100Sstevel@tonic-gate * the interface has been unplumbed and replumbed. 2110Sstevel@tonic-gate */ 2120Sstevel@tonic-gate pii_other = phyint_inst_other(pii); 2130Sstevel@tonic-gate if (pii_other != NULL) 2140Sstevel@tonic-gate phyint_inst_delete(pii_other); 2150Sstevel@tonic-gate phyint_inst_delete(pii); 2160Sstevel@tonic-gate pii = phyint_inst_init_from_k(af, name); 2170Sstevel@tonic-gate break; 2180Sstevel@tonic-gate 2190Sstevel@tonic-gate case PI_DELETED: 2200Sstevel@tonic-gate /* Phyint instance has disappeared from kernel */ 2210Sstevel@tonic-gate pii->pii_in_use = 0; 2220Sstevel@tonic-gate break; 2230Sstevel@tonic-gate 2240Sstevel@tonic-gate case PI_OK: 2250Sstevel@tonic-gate /* Phyint instance exists and is fine */ 2260Sstevel@tonic-gate pii->pii_in_use = 1; 2270Sstevel@tonic-gate break; 2280Sstevel@tonic-gate 2290Sstevel@tonic-gate default: 2300Sstevel@tonic-gate /* Unknown status */ 2310Sstevel@tonic-gate logerr("pii_process: Unknown status %d\n", err); 2320Sstevel@tonic-gate break; 2330Sstevel@tonic-gate } 2340Sstevel@tonic-gate } 2350Sstevel@tonic-gate 2360Sstevel@tonic-gate *pii_p = pii; 2370Sstevel@tonic-gate if (pii != NULL) 2380Sstevel@tonic-gate return (pii->pii_in_use ? _B_TRUE : _B_FALSE); 2390Sstevel@tonic-gate else 2400Sstevel@tonic-gate return (_B_FALSE); 2410Sstevel@tonic-gate } 2420Sstevel@tonic-gate 2430Sstevel@tonic-gate /* 2440Sstevel@tonic-gate * This phyint is leaving the group. Try to restore the phyint to its 2450Sstevel@tonic-gate * initial state. Return the addresses that belong to other group members, 2460Sstevel@tonic-gate * to the group, and take back any addresses owned by this phyint 2470Sstevel@tonic-gate */ 2480Sstevel@tonic-gate void 2490Sstevel@tonic-gate restore_phyint(struct phyint *pi) 2500Sstevel@tonic-gate { 2510Sstevel@tonic-gate if (pi->pi_group == phyint_anongroup) 2520Sstevel@tonic-gate return; 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate /* 2550Sstevel@tonic-gate * Move everthing to some other member in the group. 2560Sstevel@tonic-gate * The phyint has changed group in the kernel. But we 2570Sstevel@tonic-gate * have yet to do it in our tables. 2580Sstevel@tonic-gate */ 2590Sstevel@tonic-gate if (!pi->pi_empty) 2600Sstevel@tonic-gate (void) try_failover(pi, FAILOVER_TO_ANY); 2610Sstevel@tonic-gate /* 2620Sstevel@tonic-gate * Move all addresses owned by 'pi' back to pi, from each 2630Sstevel@tonic-gate * of the other members of the group 2640Sstevel@tonic-gate */ 2650Sstevel@tonic-gate (void) try_failback(pi, _B_FALSE); 2660Sstevel@tonic-gate } 2670Sstevel@tonic-gate 2680Sstevel@tonic-gate /* 2690Sstevel@tonic-gate * Scan all interfaces to detect changes as well as new and deleted interfaces 2700Sstevel@tonic-gate */ 2710Sstevel@tonic-gate static void 2720Sstevel@tonic-gate initifs() 2730Sstevel@tonic-gate { 2740Sstevel@tonic-gate int n; 2750Sstevel@tonic-gate int af; 2760Sstevel@tonic-gate char *cp; 2770Sstevel@tonic-gate char *buf; 2780Sstevel@tonic-gate int numifs; 2790Sstevel@tonic-gate struct lifnum lifn; 2800Sstevel@tonic-gate struct lifconf lifc; 2810Sstevel@tonic-gate struct lifreq *lifr; 2820Sstevel@tonic-gate struct logint *li; 2830Sstevel@tonic-gate struct phyint_instance *pii; 2840Sstevel@tonic-gate struct phyint_instance *next_pii; 2850Sstevel@tonic-gate char pi_name[LIFNAMSIZ + 1]; 2860Sstevel@tonic-gate boolean_t exists; 2870Sstevel@tonic-gate struct phyint *pi; 288*2250Srk129064 struct local_addr *next; 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate if (debug & D_PHYINT) 2910Sstevel@tonic-gate logdebug("initifs: Scanning interfaces\n"); 2920Sstevel@tonic-gate 2930Sstevel@tonic-gate last_initifs_time = getcurrenttime(); 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate /* 296*2250Srk129064 * Free the laddr_list before collecting the local addresses. 297*2250Srk129064 */ 298*2250Srk129064 while (laddr_list != NULL) { 299*2250Srk129064 next = laddr_list->next; 300*2250Srk129064 free(laddr_list); 301*2250Srk129064 laddr_list = next; 302*2250Srk129064 } 303*2250Srk129064 304*2250Srk129064 /* 3050Sstevel@tonic-gate * Mark the interfaces so that we can find phyints and logints 3060Sstevel@tonic-gate * which have disappeared from the kernel. pii_process() and 3070Sstevel@tonic-gate * logint_init_from_k() will set {pii,li}_in_use when they find 3080Sstevel@tonic-gate * the interface in the kernel. Also, clear dupaddr bit on probe 3090Sstevel@tonic-gate * logint. check_addr_unique() will set the dupaddr bit on the 3100Sstevel@tonic-gate * probe logint, if the testaddress is not unique. 3110Sstevel@tonic-gate */ 3120Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 3130Sstevel@tonic-gate pii->pii_in_use = 0; 3140Sstevel@tonic-gate for (li = pii->pii_logint; li != NULL; li = li->li_next) { 3150Sstevel@tonic-gate li->li_in_use = 0; 3160Sstevel@tonic-gate if (pii->pii_probe_logint == li) 3170Sstevel@tonic-gate li->li_dupaddr = 0; 3180Sstevel@tonic-gate } 3190Sstevel@tonic-gate } 3200Sstevel@tonic-gate 3210Sstevel@tonic-gate lifn.lifn_family = AF_UNSPEC; 322*2250Srk129064 lifn.lifn_flags = LIFC_ALLZONES; 3230Sstevel@tonic-gate if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) { 3240Sstevel@tonic-gate logperror("initifs: ioctl (get interface numbers)"); 3250Sstevel@tonic-gate return; 3260Sstevel@tonic-gate } 3270Sstevel@tonic-gate numifs = lifn.lifn_count; 3280Sstevel@tonic-gate 3290Sstevel@tonic-gate buf = (char *)calloc(numifs, sizeof (struct lifreq)); 3300Sstevel@tonic-gate if (buf == NULL) { 3310Sstevel@tonic-gate logperror("initifs: calloc"); 3320Sstevel@tonic-gate return; 3330Sstevel@tonic-gate } 3340Sstevel@tonic-gate 3350Sstevel@tonic-gate lifc.lifc_family = AF_UNSPEC; 336*2250Srk129064 lifc.lifc_flags = LIFC_ALLZONES; 3370Sstevel@tonic-gate lifc.lifc_len = numifs * sizeof (struct lifreq); 3380Sstevel@tonic-gate lifc.lifc_buf = buf; 3390Sstevel@tonic-gate 3400Sstevel@tonic-gate if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) { 3410Sstevel@tonic-gate /* 3420Sstevel@tonic-gate * EINVAL is commonly encountered, when things change 3430Sstevel@tonic-gate * underneath us rapidly, (eg. at boot, when new interfaces 3440Sstevel@tonic-gate * are plumbed successively) and the kernel finds the buffer 3450Sstevel@tonic-gate * size we passed as too small. We will retry again 3460Sstevel@tonic-gate * when we see the next routing socket msg, or at worst after 3470Sstevel@tonic-gate * IF_SCAN_INTERVAL ms. 3480Sstevel@tonic-gate */ 3490Sstevel@tonic-gate if (errno != EINVAL) { 3500Sstevel@tonic-gate logperror("initifs: ioctl" 3510Sstevel@tonic-gate " (get interface configuration)"); 3520Sstevel@tonic-gate } 3530Sstevel@tonic-gate free(buf); 3540Sstevel@tonic-gate return; 3550Sstevel@tonic-gate } 3560Sstevel@tonic-gate 3570Sstevel@tonic-gate lifr = (struct lifreq *)lifc.lifc_req; 3580Sstevel@tonic-gate 3590Sstevel@tonic-gate /* 3600Sstevel@tonic-gate * For each lifreq returned by SIOGGLIFCONF, call pii_process() 3610Sstevel@tonic-gate * and get the state of the corresponding phyint_instance. If it is 3620Sstevel@tonic-gate * successful, then call logint_init_from_k() to get the state of the 3630Sstevel@tonic-gate * logint. 3640Sstevel@tonic-gate */ 3650Sstevel@tonic-gate for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifr++) { 366*2250Srk129064 int sockfd; 367*2250Srk129064 struct local_addr *taddr; 368*2250Srk129064 struct sockaddr_in *sin; 369*2250Srk129064 struct sockaddr_in6 *sin6; 370*2250Srk129064 struct lifreq lifreq; 371*2250Srk129064 3720Sstevel@tonic-gate af = lifr->lifr_addr.ss_family; 3730Sstevel@tonic-gate 3740Sstevel@tonic-gate /* 375*2250Srk129064 * Collect all local addresses. 376*2250Srk129064 */ 377*2250Srk129064 sockfd = (af == AF_INET) ? ifsock_v4 : ifsock_v6; 378*2250Srk129064 (void) memset(&lifreq, 0, sizeof (lifreq)); 379*2250Srk129064 (void) strlcpy(lifreq.lifr_name, lifr->lifr_name, 380*2250Srk129064 sizeof (lifreq.lifr_name)); 381*2250Srk129064 382*2250Srk129064 if (ioctl(sockfd, SIOCGLIFFLAGS, &lifreq) == -1) { 383*2250Srk129064 if (errno != ENXIO) 384*2250Srk129064 logperror("initifs: ioctl (SIOCGLIFFLAGS)"); 385*2250Srk129064 continue; 386*2250Srk129064 } 387*2250Srk129064 388*2250Srk129064 /* 389*2250Srk129064 * Add the interface address to laddr_list. 390*2250Srk129064 * Another node might have the same IP address which is up. 391*2250Srk129064 * In that case, it is appropriate to use the address as a 392*2250Srk129064 * target, even though it is also configured (but not up) on 393*2250Srk129064 * the local system. 394*2250Srk129064 * Hence,the interface address is not added to laddr_list 395*2250Srk129064 * unless it is IFF_UP. 396*2250Srk129064 */ 397*2250Srk129064 if (lifreq.lifr_flags & IFF_UP) { 398*2250Srk129064 taddr = malloc(sizeof (struct local_addr)); 399*2250Srk129064 if (taddr == NULL) { 400*2250Srk129064 logperror("initifs: malloc"); 401*2250Srk129064 continue; 402*2250Srk129064 } 403*2250Srk129064 if (af == AF_INET) { 404*2250Srk129064 sin = (struct sockaddr_in *)&lifr->lifr_addr; 405*2250Srk129064 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, 406*2250Srk129064 &taddr->addr); 407*2250Srk129064 } else { 408*2250Srk129064 sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr; 409*2250Srk129064 taddr->addr = sin6->sin6_addr; 410*2250Srk129064 } 411*2250Srk129064 taddr->next = laddr_list; 412*2250Srk129064 laddr_list = taddr; 413*2250Srk129064 } 414*2250Srk129064 415*2250Srk129064 /* 4160Sstevel@tonic-gate * Need to pass a phyint name to pii_process. Insert the 4170Sstevel@tonic-gate * null where the ':' IF_SEPARATOR is found in the logical 4180Sstevel@tonic-gate * name. 4190Sstevel@tonic-gate */ 420*2250Srk129064 (void) strlcpy(pi_name, lifr->lifr_name, sizeof (pi_name)); 4210Sstevel@tonic-gate if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL) 4220Sstevel@tonic-gate *cp = '\0'; 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate exists = pii_process(af, pi_name, &pii); 4250Sstevel@tonic-gate if (exists) { 4260Sstevel@tonic-gate /* The phyint is fine. So process the logint */ 4270Sstevel@tonic-gate logint_init_from_k(pii, lifr->lifr_name); 4280Sstevel@tonic-gate } 4290Sstevel@tonic-gate check_addr_unique(af, lifr->lifr_name); 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate free(buf); 4330Sstevel@tonic-gate 4340Sstevel@tonic-gate /* 4350Sstevel@tonic-gate * If the test address is now unique, and if it was not unique 4360Sstevel@tonic-gate * previously, clear the li_dupaddrmsg_printed flag and log a 4370Sstevel@tonic-gate * recovery message 4380Sstevel@tonic-gate */ 4390Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 4400Sstevel@tonic-gate struct logint *li; 4410Sstevel@tonic-gate char abuf[INET6_ADDRSTRLEN]; 4420Sstevel@tonic-gate 4430Sstevel@tonic-gate li = pii->pii_probe_logint; 4440Sstevel@tonic-gate if ((li != NULL) && !li->li_dupaddr && 4450Sstevel@tonic-gate li->li_dupaddrmsg_printed) { 4460Sstevel@tonic-gate logerr("Test address %s is unique; enabling probe-" 4470Sstevel@tonic-gate "based failure detection\n", 4480Sstevel@tonic-gate pr_addr(pii->pii_af, li->li_addr, abuf, 4490Sstevel@tonic-gate sizeof (abuf))); 4500Sstevel@tonic-gate li->li_dupaddrmsg_printed = 0; 4510Sstevel@tonic-gate } 4520Sstevel@tonic-gate } 4530Sstevel@tonic-gate 4540Sstevel@tonic-gate /* 4550Sstevel@tonic-gate * Scan for phyints and logints that have disappeared from the 4560Sstevel@tonic-gate * kernel, and delete them. 4570Sstevel@tonic-gate */ 4580Sstevel@tonic-gate pii = phyint_instances; 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate while (pii != NULL) { 4610Sstevel@tonic-gate next_pii = pii->pii_next; 4620Sstevel@tonic-gate check_if_removed(pii); 4630Sstevel@tonic-gate pii = next_pii; 4640Sstevel@tonic-gate } 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate /* 4670Sstevel@tonic-gate * Select a test address for sending probes on each phyint instance 4680Sstevel@tonic-gate */ 4690Sstevel@tonic-gate select_test_ifs(); 4700Sstevel@tonic-gate 4710Sstevel@tonic-gate /* 4720Sstevel@tonic-gate * Handle link up/down notifications from the NICs. 4730Sstevel@tonic-gate */ 4740Sstevel@tonic-gate process_link_state_changes(); 4750Sstevel@tonic-gate 4760Sstevel@tonic-gate for (pi = phyints; pi != NULL; pi = pi->pi_next) { 4770Sstevel@tonic-gate /* 4780Sstevel@tonic-gate * If this is a case of group failure, we don't have much 4790Sstevel@tonic-gate * to do until the group recovers again. 4800Sstevel@tonic-gate */ 4810Sstevel@tonic-gate if (GROUP_FAILED(pi->pi_group)) 4820Sstevel@tonic-gate continue; 4830Sstevel@tonic-gate 4840Sstevel@tonic-gate /* 4850Sstevel@tonic-gate * Try/Retry any pending failovers / failbacks, that did not 4860Sstevel@tonic-gate * not complete, or that could not be initiated previously. 4870Sstevel@tonic-gate * This implements the 3 invariants described in the big block 4880Sstevel@tonic-gate * comment at the beginning of probe.c 4890Sstevel@tonic-gate */ 4900Sstevel@tonic-gate if (pi->pi_flags & IFF_INACTIVE) { 491704Sethindra if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY)) 4920Sstevel@tonic-gate (void) try_failover(pi, FAILOVER_TO_NONSTANDBY); 4930Sstevel@tonic-gate } else { 4940Sstevel@tonic-gate struct phyint_instance *pii; 4950Sstevel@tonic-gate 4960Sstevel@tonic-gate pii = pi->pi_v4; 4970Sstevel@tonic-gate if (LINK_UP(pi) && !PROBE_CAPABLE(pii)) 4980Sstevel@tonic-gate pii = pi->pi_v6; 4990Sstevel@tonic-gate if (LINK_UP(pi) && !PROBE_CAPABLE(pii)) 5000Sstevel@tonic-gate continue; 5010Sstevel@tonic-gate /* 5020Sstevel@tonic-gate * It is possible that the phyint has started 5030Sstevel@tonic-gate * receiving packets, after it has been marked 5040Sstevel@tonic-gate * PI_FAILED. Don't initiate failover, if the 5050Sstevel@tonic-gate * phyint has started recovering. failure_state() 5060Sstevel@tonic-gate * captures this check. A similar logic is used 5070Sstevel@tonic-gate * for failback/repair case. 5080Sstevel@tonic-gate */ 5090Sstevel@tonic-gate if (pi->pi_state == PI_FAILED && !pi->pi_empty && 5100Sstevel@tonic-gate (failure_state(pii) == PHYINT_FAILURE)) { 5110Sstevel@tonic-gate (void) try_failover(pi, FAILOVER_NORMAL); 5120Sstevel@tonic-gate } else if (pi->pi_state == PI_RUNNING && !pi->pi_full) { 5130Sstevel@tonic-gate if (try_failback(pi, _B_FALSE) != 5140Sstevel@tonic-gate IPMP_FAILURE) { 5150Sstevel@tonic-gate (void) change_lif_flags(pi, IFF_FAILED, 5160Sstevel@tonic-gate _B_FALSE); 5170Sstevel@tonic-gate /* Per state diagram */ 5180Sstevel@tonic-gate pi->pi_empty = 0; 5190Sstevel@tonic-gate } 5200Sstevel@tonic-gate } 5210Sstevel@tonic-gate } 5220Sstevel@tonic-gate } 5230Sstevel@tonic-gate } 5240Sstevel@tonic-gate 5250Sstevel@tonic-gate /* 5260Sstevel@tonic-gate * Check that test/probe addresses are always unique. link-locals and 5270Sstevel@tonic-gate * ptp unnumbered may not be unique, and bind to such an (IFF_NOFAILOVER) 5280Sstevel@tonic-gate * address can produce unexpected results. Log an error and alert the user. 5290Sstevel@tonic-gate */ 5300Sstevel@tonic-gate static void 5310Sstevel@tonic-gate check_addr_unique(int af, char *name) 5320Sstevel@tonic-gate { 5330Sstevel@tonic-gate struct lifreq lifr; 5340Sstevel@tonic-gate struct phyint *pi; 5350Sstevel@tonic-gate struct in6_addr addr; 5360Sstevel@tonic-gate struct phyint_instance *pii; 5370Sstevel@tonic-gate struct sockaddr_in *sin; 5380Sstevel@tonic-gate struct sockaddr_in6 *sin6; 5390Sstevel@tonic-gate int ifsock; 5400Sstevel@tonic-gate char abuf[INET6_ADDRSTRLEN]; 5410Sstevel@tonic-gate 5420Sstevel@tonic-gate /* Get the socket for doing ioctls */ 5430Sstevel@tonic-gate ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6; 5440Sstevel@tonic-gate 5450Sstevel@tonic-gate (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); 5460Sstevel@tonic-gate lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; 5470Sstevel@tonic-gate /* 5480Sstevel@tonic-gate * Get the address corresponding to 'name'. We cannot 5490Sstevel@tonic-gate * do a logint lookup in our tables, because, not all logints 5500Sstevel@tonic-gate * in the system are tracked by mpathd. (eg. things not in a group) 5510Sstevel@tonic-gate */ 5520Sstevel@tonic-gate if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) { 5530Sstevel@tonic-gate if (errno == ENXIO) { 5540Sstevel@tonic-gate /* Interface has vanished */ 5550Sstevel@tonic-gate return; 5560Sstevel@tonic-gate } else { 5570Sstevel@tonic-gate logperror("ioctl (get addr)"); 5580Sstevel@tonic-gate return; 5590Sstevel@tonic-gate } 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate if (af == AF_INET) { 5630Sstevel@tonic-gate sin = (struct sockaddr_in *)&lifr.lifr_addr; 5640Sstevel@tonic-gate IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &addr); 5650Sstevel@tonic-gate } else { 5660Sstevel@tonic-gate sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; 5670Sstevel@tonic-gate addr = sin6->sin6_addr; 5680Sstevel@tonic-gate } 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate /* 5710Sstevel@tonic-gate * Does the address 'addr' match any known test address ? If so 5720Sstevel@tonic-gate * it is a duplicate, unless we are looking at the same logint 5730Sstevel@tonic-gate */ 5740Sstevel@tonic-gate for (pi = phyints; pi != NULL; pi = pi->pi_next) { 5750Sstevel@tonic-gate pii = PHYINT_INSTANCE(pi, af); 5760Sstevel@tonic-gate if (pii == NULL || pii->pii_probe_logint == NULL) 5770Sstevel@tonic-gate continue; 5780Sstevel@tonic-gate 5790Sstevel@tonic-gate if (!IN6_ARE_ADDR_EQUAL(&addr, 5800Sstevel@tonic-gate &pii->pii_probe_logint->li_addr)) { 5810Sstevel@tonic-gate continue; 5820Sstevel@tonic-gate } 5830Sstevel@tonic-gate 5840Sstevel@tonic-gate if (strncmp(pii->pii_probe_logint->li_name, name, 5850Sstevel@tonic-gate sizeof (pii->pii_probe_logint->li_name)) == 0) { 5860Sstevel@tonic-gate continue; 5870Sstevel@tonic-gate } 5880Sstevel@tonic-gate 5890Sstevel@tonic-gate /* 5900Sstevel@tonic-gate * This test address is not unique. Set the dupaddr bit 5910Sstevel@tonic-gate */ 5920Sstevel@tonic-gate pii->pii_probe_logint->li_dupaddr = 1; 5930Sstevel@tonic-gate 5940Sstevel@tonic-gate /* 5950Sstevel@tonic-gate * Log an error message if not already logged 5960Sstevel@tonic-gate */ 5970Sstevel@tonic-gate if (pii->pii_probe_logint->li_dupaddrmsg_printed) 5980Sstevel@tonic-gate continue; 5990Sstevel@tonic-gate 6000Sstevel@tonic-gate logerr("Test address %s is not unique; disabling " 6010Sstevel@tonic-gate "probe-based failure detection\n", 6020Sstevel@tonic-gate pr_addr(af, addr, abuf, sizeof (abuf))); 6030Sstevel@tonic-gate 6040Sstevel@tonic-gate pii->pii_probe_logint->li_dupaddrmsg_printed = 1; 6050Sstevel@tonic-gate } 6060Sstevel@tonic-gate } 6070Sstevel@tonic-gate 6080Sstevel@tonic-gate /* 6090Sstevel@tonic-gate * Stop probing an interface. Called when an interface is offlined. 6100Sstevel@tonic-gate * The probe socket is closed on each interface instance, and the 6110Sstevel@tonic-gate * interface state set to PI_OFFLINE. 6120Sstevel@tonic-gate */ 6130Sstevel@tonic-gate static void 6140Sstevel@tonic-gate stop_probing(struct phyint *pi) 6150Sstevel@tonic-gate { 6160Sstevel@tonic-gate struct phyint_instance *pii; 6170Sstevel@tonic-gate 6180Sstevel@tonic-gate pii = pi->pi_v4; 6190Sstevel@tonic-gate if (pii != NULL) { 6200Sstevel@tonic-gate if (pii->pii_probe_sock != -1) 6210Sstevel@tonic-gate close_probe_socket(pii, _B_TRUE); 6220Sstevel@tonic-gate pii->pii_probe_logint = NULL; 6230Sstevel@tonic-gate } 6240Sstevel@tonic-gate 6250Sstevel@tonic-gate pii = pi->pi_v6; 6260Sstevel@tonic-gate if (pii != NULL) { 6270Sstevel@tonic-gate if (pii->pii_probe_sock != -1) 6280Sstevel@tonic-gate close_probe_socket(pii, _B_TRUE); 6290Sstevel@tonic-gate pii->pii_probe_logint = NULL; 6300Sstevel@tonic-gate } 6310Sstevel@tonic-gate 6320Sstevel@tonic-gate phyint_chstate(pi, PI_OFFLINE); 6330Sstevel@tonic-gate } 6340Sstevel@tonic-gate 6352074Smeem enum { BAD_TESTFLAGS, OK_TESTFLAGS, BEST_TESTFLAGS }; 6362074Smeem 6370Sstevel@tonic-gate /* 6382074Smeem * Rate the provided test flags. By definition, IFF_NOFAILOVER must be set. 6392074Smeem * IFF_UP must also be set so that the associated address can be used as a 6402074Smeem * source address. Further, we must be able to exchange packets with local 6412074Smeem * destinations, so IFF_NOXMIT and IFF_NOLOCAL must be clear. For historical 6422074Smeem * reasons, we have a proclivity for IFF_DEPRECATED IPv4 test addresses. 6432074Smeem */ 6442074Smeem static int 6452074Smeem rate_testflags(uint64_t flags) 6462074Smeem { 6472074Smeem if ((flags & (IFF_NOFAILOVER | IFF_UP)) != (IFF_NOFAILOVER | IFF_UP)) 6482074Smeem return (BAD_TESTFLAGS); 6492074Smeem 6502074Smeem if ((flags & (IFF_NOXMIT | IFF_NOLOCAL)) != 0) 6512074Smeem return (BAD_TESTFLAGS); 6522074Smeem 6532074Smeem if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_DEPRECATED) 6542074Smeem return (BEST_TESTFLAGS); 6552074Smeem 6562074Smeem if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_IPV6) 6572074Smeem return (BEST_TESTFLAGS); 6582074Smeem 6592074Smeem return (OK_TESTFLAGS); 6602074Smeem } 6612074Smeem 6622074Smeem /* 6632074Smeem * Attempt to select a test address for each phyint instance. 6642074Smeem * Call phyint_inst_sockinit() to complete the initializations. 6650Sstevel@tonic-gate */ 6660Sstevel@tonic-gate static void 6670Sstevel@tonic-gate select_test_ifs(void) 6680Sstevel@tonic-gate { 6690Sstevel@tonic-gate struct phyint *pi; 6700Sstevel@tonic-gate struct phyint_instance *pii; 6710Sstevel@tonic-gate struct phyint_instance *next_pii; 6722074Smeem struct logint *li; 6732074Smeem struct logint *probe_logint; 6742074Smeem boolean_t target_scan_reqd = _B_FALSE; 6752074Smeem struct target *tg; 6762074Smeem int rating; 6770Sstevel@tonic-gate 6780Sstevel@tonic-gate if (debug & D_PHYINT) 6790Sstevel@tonic-gate logdebug("select_test_ifs\n"); 6800Sstevel@tonic-gate 6810Sstevel@tonic-gate /* 6820Sstevel@tonic-gate * For each phyint instance, do the test address selection 6830Sstevel@tonic-gate */ 6840Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = next_pii) { 6850Sstevel@tonic-gate next_pii = pii->pii_next; 6862074Smeem probe_logint = NULL; 6872074Smeem 6880Sstevel@tonic-gate /* 6890Sstevel@tonic-gate * An interface that is offline, should not be probed. 6900Sstevel@tonic-gate * Offline interfaces should always in PI_OFFLINE state, 6910Sstevel@tonic-gate * unless some other entity has set the offline flag. 6920Sstevel@tonic-gate */ 6930Sstevel@tonic-gate if (pii->pii_phyint->pi_flags & IFF_OFFLINE) { 6940Sstevel@tonic-gate if (pii->pii_phyint->pi_state != PI_OFFLINE) { 6950Sstevel@tonic-gate logerr("shouldn't be probing offline" 6960Sstevel@tonic-gate " interface %s (state is: %u)." 6970Sstevel@tonic-gate " Stopping probes.\n", 6980Sstevel@tonic-gate pii->pii_phyint->pi_name, 6990Sstevel@tonic-gate pii->pii_phyint->pi_state); 7000Sstevel@tonic-gate stop_probing(pii->pii_phyint); 7010Sstevel@tonic-gate } 7020Sstevel@tonic-gate continue; 7030Sstevel@tonic-gate } 7040Sstevel@tonic-gate 7052074Smeem li = pii->pii_probe_logint; 7062074Smeem if (li != NULL) { 7070Sstevel@tonic-gate /* 7082074Smeem * We've already got a test address; only proceed 7092074Smeem * if it's suboptimal. 7100Sstevel@tonic-gate */ 7112074Smeem if (rate_testflags(li->li_flags) == BEST_TESTFLAGS) 7122074Smeem continue; 7130Sstevel@tonic-gate } 7140Sstevel@tonic-gate 7150Sstevel@tonic-gate /* 7160Sstevel@tonic-gate * Walk the logints of this phyint instance, and select 7170Sstevel@tonic-gate * the best available test address 7180Sstevel@tonic-gate */ 7190Sstevel@tonic-gate for (li = pii->pii_logint; li != NULL; li = li->li_next) { 7200Sstevel@tonic-gate /* 7210Sstevel@tonic-gate * Skip any IPv6 logints that are not link-local, 7220Sstevel@tonic-gate * since we should always have a link-local address 7230Sstevel@tonic-gate * anyway and in6_data() expects link-local replies. 7240Sstevel@tonic-gate */ 7250Sstevel@tonic-gate if (pii->pii_af == AF_INET6 && 7260Sstevel@tonic-gate !IN6_IS_ADDR_LINKLOCAL(&li->li_addr)) 7270Sstevel@tonic-gate continue; 7280Sstevel@tonic-gate 7292074Smeem /* 7302074Smeem * Rate the testflags. If we've found an optimal 7312074Smeem * match, then break out; otherwise, record the most 7322074Smeem * recent OK one. 7332074Smeem */ 7342074Smeem rating = rate_testflags(li->li_flags); 7352074Smeem if (rating == BAD_TESTFLAGS) 7362074Smeem continue; 7372074Smeem 7382074Smeem probe_logint = li; 7392074Smeem if (rating == BEST_TESTFLAGS) 7402074Smeem break; 7410Sstevel@tonic-gate } 7420Sstevel@tonic-gate 7430Sstevel@tonic-gate /* 7442074Smeem * If the probe logint has changed, ditch the old one. 7450Sstevel@tonic-gate */ 7462074Smeem if (pii->pii_probe_logint != NULL && 7472074Smeem pii->pii_probe_logint != probe_logint) { 7480Sstevel@tonic-gate if (pii->pii_probe_sock != -1) 7490Sstevel@tonic-gate close_probe_socket(pii, _B_TRUE); 7500Sstevel@tonic-gate pii->pii_probe_logint = NULL; 7510Sstevel@tonic-gate } 7520Sstevel@tonic-gate 7532074Smeem if (probe_logint == NULL) { 7540Sstevel@tonic-gate /* 7550Sstevel@tonic-gate * We don't have a test address. Don't print an 7560Sstevel@tonic-gate * error message immediately. check_config() will 7570Sstevel@tonic-gate * take care of it. Zero out the probe stats array 7580Sstevel@tonic-gate * since it is no longer relevant. Optimize by 7590Sstevel@tonic-gate * checking if it is already zeroed out. 7600Sstevel@tonic-gate */ 7610Sstevel@tonic-gate int pr_ndx; 7620Sstevel@tonic-gate 7630Sstevel@tonic-gate pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next); 7640Sstevel@tonic-gate if (pii->pii_probes[pr_ndx].pr_status != PR_UNUSED) { 7650Sstevel@tonic-gate clear_pii_probe_stats(pii); 7660Sstevel@tonic-gate reset_crtt_all(pii->pii_phyint); 7670Sstevel@tonic-gate } 7680Sstevel@tonic-gate continue; 7692074Smeem } else if (probe_logint == pii->pii_probe_logint) { 7700Sstevel@tonic-gate /* 7710Sstevel@tonic-gate * If we didn't find any new test addr, go to the 7720Sstevel@tonic-gate * next phyint. 7730Sstevel@tonic-gate */ 7740Sstevel@tonic-gate continue; 7750Sstevel@tonic-gate } 7760Sstevel@tonic-gate 7770Sstevel@tonic-gate /* 7780Sstevel@tonic-gate * The phyint is either being assigned a new testaddr 7790Sstevel@tonic-gate * or is being assigned a testaddr for the 1st time. 7800Sstevel@tonic-gate * Need to initialize the phyint socket 7810Sstevel@tonic-gate */ 7822074Smeem pii->pii_probe_logint = probe_logint; 7830Sstevel@tonic-gate if (!phyint_inst_sockinit(pii)) { 7840Sstevel@tonic-gate if (debug & D_PHYINT) { 7850Sstevel@tonic-gate logdebug("select_test_ifs: " 7860Sstevel@tonic-gate "phyint_sockinit failed\n"); 7870Sstevel@tonic-gate } 7880Sstevel@tonic-gate phyint_inst_delete(pii); 7890Sstevel@tonic-gate continue; 7900Sstevel@tonic-gate } 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate /* 7930Sstevel@tonic-gate * This phyint instance is now enabled for probes; this 7940Sstevel@tonic-gate * impacts our state machine in two ways: 7950Sstevel@tonic-gate * 7960Sstevel@tonic-gate * 1. If we're probe *capable* as well (i.e., we have 7970Sstevel@tonic-gate * probe targets) and the interface is in PI_NOTARGETS, 7980Sstevel@tonic-gate * then transition to PI_RUNNING. 7990Sstevel@tonic-gate * 8000Sstevel@tonic-gate * 2. If we're not probe capable, and the other phyint 8010Sstevel@tonic-gate * instance is also not probe capable, and we were in 8020Sstevel@tonic-gate * PI_RUNNING, then transition to PI_NOTARGETS. 8030Sstevel@tonic-gate * 8040Sstevel@tonic-gate * Also see the state diagram in mpd_probe.c. 8050Sstevel@tonic-gate */ 8060Sstevel@tonic-gate if (PROBE_CAPABLE(pii)) { 8070Sstevel@tonic-gate if (pii->pii_phyint->pi_state == PI_NOTARGETS) 8080Sstevel@tonic-gate phyint_chstate(pii->pii_phyint, PI_RUNNING); 8090Sstevel@tonic-gate } else if (!PROBE_CAPABLE(phyint_inst_other(pii))) { 8100Sstevel@tonic-gate if (pii->pii_phyint->pi_state == PI_RUNNING) 8110Sstevel@tonic-gate phyint_chstate(pii->pii_phyint, PI_NOTARGETS); 8120Sstevel@tonic-gate } 8130Sstevel@tonic-gate 8140Sstevel@tonic-gate if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) { 8150Sstevel@tonic-gate tg = pii->pii_targets; 8160Sstevel@tonic-gate if (tg != NULL) 8170Sstevel@tonic-gate target_delete(tg); 8180Sstevel@tonic-gate assert(pii->pii_targets == NULL); 8190Sstevel@tonic-gate assert(pii->pii_target_next == NULL); 8200Sstevel@tonic-gate assert(pii->pii_ntargets == 0); 8212074Smeem target_create(pii, probe_logint->li_dstaddr, 8220Sstevel@tonic-gate _B_TRUE); 8230Sstevel@tonic-gate } 8240Sstevel@tonic-gate 8250Sstevel@tonic-gate /* 8260Sstevel@tonic-gate * If no targets are currently known for this phyint 8270Sstevel@tonic-gate * we need to call init_router_targets. Since 8280Sstevel@tonic-gate * init_router_targets() initializes the list of targets 8290Sstevel@tonic-gate * for all phyints it is done below the loop. 8300Sstevel@tonic-gate */ 8310Sstevel@tonic-gate if (pii->pii_targets == NULL) 8320Sstevel@tonic-gate target_scan_reqd = _B_TRUE; 8330Sstevel@tonic-gate 8340Sstevel@tonic-gate /* 8350Sstevel@tonic-gate * Start the probe timer for this instance. 8360Sstevel@tonic-gate */ 8370Sstevel@tonic-gate if (!pii->pii_basetime_inited && pii->pii_probe_sock != -1) { 8380Sstevel@tonic-gate start_timer(pii); 8390Sstevel@tonic-gate pii->pii_basetime_inited = 1; 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate } 8420Sstevel@tonic-gate 8430Sstevel@tonic-gate /* 8440Sstevel@tonic-gate * Check the interface list for any interfaces that are marked 8450Sstevel@tonic-gate * PI_FAILED but no longer enabled to send probes, and call 8460Sstevel@tonic-gate * phyint_check_for_repair() to see if the link now indicates that the 8470Sstevel@tonic-gate * interface should be repaired. Also see the state diagram in 8480Sstevel@tonic-gate * mpd_probe.c. 8490Sstevel@tonic-gate */ 8500Sstevel@tonic-gate for (pi = phyints; pi != NULL; pi = pi->pi_next) { 8510Sstevel@tonic-gate if (pi->pi_state == PI_FAILED && 8520Sstevel@tonic-gate !PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) { 8530Sstevel@tonic-gate phyint_check_for_repair(pi); 8540Sstevel@tonic-gate } 8550Sstevel@tonic-gate } 8560Sstevel@tonic-gate 8570Sstevel@tonic-gate /* 8580Sstevel@tonic-gate * Try to populate the target list. init_router_targets populates 8590Sstevel@tonic-gate * the target list from the routing table. If our target list is 8600Sstevel@tonic-gate * still empty, init_host_targets adds host targets based on the 8610Sstevel@tonic-gate * host target list of other phyints in the group. 8620Sstevel@tonic-gate */ 8630Sstevel@tonic-gate if (target_scan_reqd) { 8640Sstevel@tonic-gate init_router_targets(); 8650Sstevel@tonic-gate init_host_targets(); 8660Sstevel@tonic-gate } 8670Sstevel@tonic-gate } 8680Sstevel@tonic-gate 8690Sstevel@tonic-gate /* 8700Sstevel@tonic-gate * Check phyint group configuration, to detect any inconsistencies, 8710Sstevel@tonic-gate * and log an error message. This is called from runtimeouts every 8720Sstevel@tonic-gate * 20 secs. But the error message is displayed once. If the 8730Sstevel@tonic-gate * consistency is resolved by the admin, a recovery message is displayed 8740Sstevel@tonic-gate * once. 8750Sstevel@tonic-gate */ 8760Sstevel@tonic-gate static void 8770Sstevel@tonic-gate check_config(void) 8780Sstevel@tonic-gate { 8790Sstevel@tonic-gate struct phyint_group *pg; 8800Sstevel@tonic-gate struct phyint *pi; 8810Sstevel@tonic-gate boolean_t v4_in_group; 8820Sstevel@tonic-gate boolean_t v6_in_group; 8830Sstevel@tonic-gate 8840Sstevel@tonic-gate /* 8850Sstevel@tonic-gate * All phyints of a group must be homogenous to ensure that 8860Sstevel@tonic-gate * failover or failback can be done. If any phyint in a group 8870Sstevel@tonic-gate * has IPv4 plumbed, check that all phyints have IPv4 plumbed. 8880Sstevel@tonic-gate * Do a similar check for IPv6. 8890Sstevel@tonic-gate */ 8900Sstevel@tonic-gate for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) { 8910Sstevel@tonic-gate if (pg == phyint_anongroup) 8920Sstevel@tonic-gate continue; 8930Sstevel@tonic-gate 8940Sstevel@tonic-gate v4_in_group = _B_FALSE; 8950Sstevel@tonic-gate v6_in_group = _B_FALSE; 8960Sstevel@tonic-gate /* 8970Sstevel@tonic-gate * 1st pass. Determine if at least 1 phyint in the group 8980Sstevel@tonic-gate * has IPv4 plumbed and if so set v4_in_group to true. 8990Sstevel@tonic-gate * Repeat similarly for IPv6. 9000Sstevel@tonic-gate */ 9010Sstevel@tonic-gate for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 9020Sstevel@tonic-gate if (pi->pi_v4 != NULL) 9030Sstevel@tonic-gate v4_in_group = _B_TRUE; 9040Sstevel@tonic-gate if (pi->pi_v6 != NULL) 9050Sstevel@tonic-gate v6_in_group = _B_TRUE; 9060Sstevel@tonic-gate } 9070Sstevel@tonic-gate 9080Sstevel@tonic-gate /* 9090Sstevel@tonic-gate * 2nd pass. If v4_in_group is true, check that phyint 9100Sstevel@tonic-gate * has IPv4 plumbed. Repeat similarly for IPv6. Print 9110Sstevel@tonic-gate * out a message the 1st time only. 9120Sstevel@tonic-gate */ 9130Sstevel@tonic-gate for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) { 9140Sstevel@tonic-gate if (pi->pi_flags & IFF_OFFLINE) 9150Sstevel@tonic-gate continue; 9160Sstevel@tonic-gate 9170Sstevel@tonic-gate if (v4_in_group == _B_TRUE && pi->pi_v4 == NULL) { 9180Sstevel@tonic-gate if (!pi->pi_cfgmsg_printed) { 9190Sstevel@tonic-gate logerr("NIC %s of group %s is" 9200Sstevel@tonic-gate " not plumbed for IPv4 and may" 9210Sstevel@tonic-gate " affect failover capability\n", 9220Sstevel@tonic-gate pi->pi_name, 9230Sstevel@tonic-gate pi->pi_group->pg_name); 9240Sstevel@tonic-gate pi->pi_cfgmsg_printed = 1; 9250Sstevel@tonic-gate } 9260Sstevel@tonic-gate } else if (v6_in_group == _B_TRUE && 9270Sstevel@tonic-gate pi->pi_v6 == NULL) { 9280Sstevel@tonic-gate if (!pi->pi_cfgmsg_printed) { 9290Sstevel@tonic-gate logerr("NIC %s of group %s is" 9300Sstevel@tonic-gate " not plumbed for IPv6 and may" 9310Sstevel@tonic-gate " affect failover capability\n", 9320Sstevel@tonic-gate pi->pi_name, 9330Sstevel@tonic-gate pi->pi_group->pg_name); 9340Sstevel@tonic-gate pi->pi_cfgmsg_printed = 1; 9350Sstevel@tonic-gate } 9360Sstevel@tonic-gate } else { 9370Sstevel@tonic-gate /* 9380Sstevel@tonic-gate * The phyint matches the group configuration, 9390Sstevel@tonic-gate * if we have reached this point. If it was 9400Sstevel@tonic-gate * improperly configured earlier, log an 9410Sstevel@tonic-gate * error recovery message 9420Sstevel@tonic-gate */ 9430Sstevel@tonic-gate if (pi->pi_cfgmsg_printed) { 9440Sstevel@tonic-gate logerr("NIC %s is now consistent with " 9450Sstevel@tonic-gate "group %s and failover capability " 9460Sstevel@tonic-gate "is restored\n", pi->pi_name, 9470Sstevel@tonic-gate pi->pi_group->pg_name); 9480Sstevel@tonic-gate pi->pi_cfgmsg_printed = 0; 9490Sstevel@tonic-gate } 9500Sstevel@tonic-gate } 9510Sstevel@tonic-gate 9520Sstevel@tonic-gate } 9530Sstevel@tonic-gate } 9540Sstevel@tonic-gate 9550Sstevel@tonic-gate /* 9560Sstevel@tonic-gate * In order to perform probe-based failure detection, a phyint must 9570Sstevel@tonic-gate * have at least 1 test/probe address for sending and receiving probes 9580Sstevel@tonic-gate * (either on IPv4 or IPv6 instance or both). If no test address has 9590Sstevel@tonic-gate * been configured, notify the administrator, but continue on since we 9600Sstevel@tonic-gate * can still perform load spreading, along with "link up/down" based 9610Sstevel@tonic-gate * failure detection. 9620Sstevel@tonic-gate */ 9630Sstevel@tonic-gate for (pi = phyints; pi != NULL; pi = pi->pi_next) { 9640Sstevel@tonic-gate if (pi->pi_flags & IFF_OFFLINE) 9650Sstevel@tonic-gate continue; 9660Sstevel@tonic-gate 9670Sstevel@tonic-gate if ((pi->pi_v4 == NULL || 9680Sstevel@tonic-gate pi->pi_v4->pii_probe_logint == NULL) && 9690Sstevel@tonic-gate (pi->pi_v6 == NULL || 9700Sstevel@tonic-gate pi->pi_v6->pii_probe_logint == NULL)) { 9710Sstevel@tonic-gate if (!pi->pi_taddrmsg_printed) { 9720Sstevel@tonic-gate logerr("No test address configured on " 9730Sstevel@tonic-gate "interface %s; disabling probe-based " 9740Sstevel@tonic-gate "failure detection on it\n", pi->pi_name); 9750Sstevel@tonic-gate pi->pi_taddrmsg_printed = 1; 9760Sstevel@tonic-gate } 9770Sstevel@tonic-gate } else if (pi->pi_taddrmsg_printed) { 9780Sstevel@tonic-gate logerr("Test address now configured on interface %s; " 9790Sstevel@tonic-gate "enabling probe-based failure detection on it\n", 9800Sstevel@tonic-gate pi->pi_name); 9810Sstevel@tonic-gate pi->pi_taddrmsg_printed = 0; 9820Sstevel@tonic-gate } 9830Sstevel@tonic-gate 9840Sstevel@tonic-gate } 9850Sstevel@tonic-gate } 9860Sstevel@tonic-gate 9870Sstevel@tonic-gate /* 9880Sstevel@tonic-gate * Timer mechanism using relative time (in milliseconds) from the 9890Sstevel@tonic-gate * previous timer event. Timers exceeding TIMER_INFINITY milliseconds 9900Sstevel@tonic-gate * will fire after TIMER_INFINITY milliseconds. 9910Sstevel@tonic-gate * Unsigned arithmetic note: We assume a 32-bit circular sequence space for 9920Sstevel@tonic-gate * time values. Hence 2 consecutive timer events cannot be spaced farther 9930Sstevel@tonic-gate * than 0x7fffffff. We call this TIMER_INFINITY, and it is the maximum value 9940Sstevel@tonic-gate * that can be passed for the delay parameter of timer_schedule() 9950Sstevel@tonic-gate */ 9960Sstevel@tonic-gate static uint_t timer_next; /* Currently scheduled timeout */ 9970Sstevel@tonic-gate static boolean_t timer_active = _B_FALSE; /* SIGALRM has not yet occurred */ 9980Sstevel@tonic-gate 9990Sstevel@tonic-gate static void 10000Sstevel@tonic-gate timer_init(void) 10010Sstevel@tonic-gate { 10020Sstevel@tonic-gate timer_next = getcurrenttime() + TIMER_INFINITY; 10030Sstevel@tonic-gate /* 10040Sstevel@tonic-gate * The call to run_timeouts() will get the timer started 10050Sstevel@tonic-gate * Since there are no phyints at this point, the timer will 10060Sstevel@tonic-gate * be set for IF_SCAN_INTERVAL ms. 10070Sstevel@tonic-gate */ 10080Sstevel@tonic-gate run_timeouts(); 10090Sstevel@tonic-gate } 10100Sstevel@tonic-gate 10110Sstevel@tonic-gate /* 10120Sstevel@tonic-gate * Make sure the next SIGALRM occurs delay milliseconds from the current 10130Sstevel@tonic-gate * time if not earlier. We are interested only in time differences. 10140Sstevel@tonic-gate */ 10150Sstevel@tonic-gate void 10160Sstevel@tonic-gate timer_schedule(uint_t delay) 10170Sstevel@tonic-gate { 10180Sstevel@tonic-gate uint_t now; 10190Sstevel@tonic-gate struct itimerval itimerval; 10200Sstevel@tonic-gate 10210Sstevel@tonic-gate if (debug & D_TIMER) 10220Sstevel@tonic-gate logdebug("timer_schedule(%u)\n", delay); 10230Sstevel@tonic-gate 10240Sstevel@tonic-gate assert(delay <= TIMER_INFINITY); 10250Sstevel@tonic-gate 10260Sstevel@tonic-gate now = getcurrenttime(); 10270Sstevel@tonic-gate if (delay == 0) { 10280Sstevel@tonic-gate /* Minimum allowed delay */ 10290Sstevel@tonic-gate delay = 1; 10300Sstevel@tonic-gate } 10310Sstevel@tonic-gate /* Will this timer occur before the currently scheduled SIGALRM? */ 10320Sstevel@tonic-gate if (timer_active && TIME_GE(now + delay, timer_next)) { 10330Sstevel@tonic-gate if (debug & D_TIMER) { 10340Sstevel@tonic-gate logdebug("timer_schedule(%u) - no action: " 10350Sstevel@tonic-gate "now %u next %u\n", delay, now, timer_next); 10360Sstevel@tonic-gate } 10370Sstevel@tonic-gate return; 10380Sstevel@tonic-gate } 10390Sstevel@tonic-gate timer_next = now + delay; 10400Sstevel@tonic-gate 10410Sstevel@tonic-gate itimerval.it_value.tv_sec = delay / 1000; 10420Sstevel@tonic-gate itimerval.it_value.tv_usec = (delay % 1000) * 1000; 10430Sstevel@tonic-gate itimerval.it_interval.tv_sec = 0; 10440Sstevel@tonic-gate itimerval.it_interval.tv_usec = 0; 10450Sstevel@tonic-gate if (debug & D_TIMER) { 10460Sstevel@tonic-gate logdebug("timer_schedule(%u): sec %ld usec %ld\n", 10470Sstevel@tonic-gate delay, itimerval.it_value.tv_sec, 10480Sstevel@tonic-gate itimerval.it_value.tv_usec); 10490Sstevel@tonic-gate } 10500Sstevel@tonic-gate timer_active = _B_TRUE; 10510Sstevel@tonic-gate if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0) { 10520Sstevel@tonic-gate logperror("timer_schedule: setitimer"); 10530Sstevel@tonic-gate exit(2); 10540Sstevel@tonic-gate } 10550Sstevel@tonic-gate } 10560Sstevel@tonic-gate 10570Sstevel@tonic-gate /* 10580Sstevel@tonic-gate * Timer has fired. Determine when the next timer event will occur by asking 10590Sstevel@tonic-gate * all the timer routines. Should not be called from a timer routine. 10600Sstevel@tonic-gate */ 10610Sstevel@tonic-gate static void 10620Sstevel@tonic-gate run_timeouts(void) 10630Sstevel@tonic-gate { 10640Sstevel@tonic-gate uint_t next; 10650Sstevel@tonic-gate uint_t next_event_time; 10660Sstevel@tonic-gate struct phyint_instance *pii; 10670Sstevel@tonic-gate struct phyint_instance *next_pii; 10680Sstevel@tonic-gate static boolean_t timeout_running; 10690Sstevel@tonic-gate 10700Sstevel@tonic-gate /* assert that recursive timeouts don't happen. */ 10710Sstevel@tonic-gate assert(!timeout_running); 10720Sstevel@tonic-gate 10730Sstevel@tonic-gate timeout_running = _B_TRUE; 10740Sstevel@tonic-gate 10750Sstevel@tonic-gate if (debug & D_TIMER) 10760Sstevel@tonic-gate logdebug("run_timeouts()\n"); 10770Sstevel@tonic-gate 10780Sstevel@tonic-gate next = TIMER_INFINITY; 10790Sstevel@tonic-gate 10800Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = next_pii) { 10810Sstevel@tonic-gate next_pii = pii->pii_next; 10820Sstevel@tonic-gate next_event_time = phyint_inst_timer(pii); 10830Sstevel@tonic-gate if (next_event_time != TIMER_INFINITY && next_event_time < next) 10840Sstevel@tonic-gate next = next_event_time; 10850Sstevel@tonic-gate 10860Sstevel@tonic-gate if (debug & D_TIMER) { 10870Sstevel@tonic-gate logdebug("run_timeouts(%s %s): next scheduled for" 10880Sstevel@tonic-gate " this phyint inst %u, next scheduled global" 10890Sstevel@tonic-gate " %u ms\n", 10900Sstevel@tonic-gate AF_STR(pii->pii_af), pii->pii_phyint->pi_name, 10910Sstevel@tonic-gate next_event_time, next); 10920Sstevel@tonic-gate } 10930Sstevel@tonic-gate } 10940Sstevel@tonic-gate 10950Sstevel@tonic-gate /* 10960Sstevel@tonic-gate * Make sure initifs() is called at least once every 10970Sstevel@tonic-gate * IF_SCAN_INTERVAL, to make sure that we are in sync 10980Sstevel@tonic-gate * with the kernel, in case we have missed any routing 10990Sstevel@tonic-gate * socket messages. 11000Sstevel@tonic-gate */ 11010Sstevel@tonic-gate if (next > IF_SCAN_INTERVAL) 11020Sstevel@tonic-gate next = IF_SCAN_INTERVAL; 11030Sstevel@tonic-gate 11040Sstevel@tonic-gate if ((getcurrenttime() - last_initifs_time) > IF_SCAN_INTERVAL) { 11050Sstevel@tonic-gate initifs(); 11060Sstevel@tonic-gate check_config(); 11070Sstevel@tonic-gate } 11080Sstevel@tonic-gate 11090Sstevel@tonic-gate if (debug & D_TIMER) 11100Sstevel@tonic-gate logdebug("run_timeouts: %u ms\n", next); 11110Sstevel@tonic-gate 11120Sstevel@tonic-gate timer_schedule(next); 11130Sstevel@tonic-gate timeout_running = _B_FALSE; 11140Sstevel@tonic-gate } 11150Sstevel@tonic-gate 11160Sstevel@tonic-gate static int eventpipe_read = -1; /* Used for synchronous signal delivery */ 11170Sstevel@tonic-gate static int eventpipe_write = -1; 11180Sstevel@tonic-gate static boolean_t cleanup_started = _B_FALSE; 11190Sstevel@tonic-gate /* Don't write to eventpipe if in cleanup */ 11200Sstevel@tonic-gate /* 11210Sstevel@tonic-gate * Ensure that signals are processed synchronously with the rest of 11220Sstevel@tonic-gate * the code by just writing a one character signal number on the pipe. 11230Sstevel@tonic-gate * The poll loop will pick this up and process the signal event. 11240Sstevel@tonic-gate */ 11250Sstevel@tonic-gate static void 11260Sstevel@tonic-gate sig_handler(int signo) 11270Sstevel@tonic-gate { 11280Sstevel@tonic-gate uchar_t buf = (uchar_t)signo; 11290Sstevel@tonic-gate 11300Sstevel@tonic-gate /* 11310Sstevel@tonic-gate * Don't write to pipe if cleanup has already begun. cleanup() 11320Sstevel@tonic-gate * might have closed the pipe already 11330Sstevel@tonic-gate */ 11340Sstevel@tonic-gate if (cleanup_started) 11350Sstevel@tonic-gate return; 11360Sstevel@tonic-gate 11370Sstevel@tonic-gate if (eventpipe_write == -1) { 11380Sstevel@tonic-gate logerr("sig_handler: no pipe found\n"); 11390Sstevel@tonic-gate return; 11400Sstevel@tonic-gate } 11410Sstevel@tonic-gate if (write(eventpipe_write, &buf, sizeof (buf)) < 0) 11420Sstevel@tonic-gate logperror("sig_handler: write"); 11430Sstevel@tonic-gate } 11440Sstevel@tonic-gate 11450Sstevel@tonic-gate extern struct probes_missed probes_missed; 11460Sstevel@tonic-gate 11470Sstevel@tonic-gate /* 11480Sstevel@tonic-gate * Pick up a signal "byte" from the pipe and process it. 11490Sstevel@tonic-gate */ 11500Sstevel@tonic-gate static void 11510Sstevel@tonic-gate in_signal(int fd) 11520Sstevel@tonic-gate { 11530Sstevel@tonic-gate uchar_t buf; 11540Sstevel@tonic-gate uint64_t sent, acked, lost, unacked, unknown; 11550Sstevel@tonic-gate struct phyint_instance *pii; 11560Sstevel@tonic-gate int pr_ndx; 11570Sstevel@tonic-gate 11580Sstevel@tonic-gate switch (read(fd, &buf, sizeof (buf))) { 11590Sstevel@tonic-gate case -1: 11600Sstevel@tonic-gate logperror("in_signal: read"); 11610Sstevel@tonic-gate exit(1); 11620Sstevel@tonic-gate /* NOTREACHED */ 11630Sstevel@tonic-gate case 1: 11640Sstevel@tonic-gate break; 11650Sstevel@tonic-gate case 0: 11660Sstevel@tonic-gate logerr("in_signal: read end of file\n"); 11670Sstevel@tonic-gate exit(1); 11680Sstevel@tonic-gate /* NOTREACHED */ 11690Sstevel@tonic-gate default: 11700Sstevel@tonic-gate logerr("in_signal: read > 1\n"); 11710Sstevel@tonic-gate exit(1); 11720Sstevel@tonic-gate } 11730Sstevel@tonic-gate 11740Sstevel@tonic-gate if (debug & D_TIMER) 11750Sstevel@tonic-gate logdebug("in_signal() got %d\n", buf); 11760Sstevel@tonic-gate 11770Sstevel@tonic-gate switch (buf) { 11780Sstevel@tonic-gate case SIGALRM: 11790Sstevel@tonic-gate if (debug & D_TIMER) { 11800Sstevel@tonic-gate uint_t now = getcurrenttime(); 11810Sstevel@tonic-gate 11820Sstevel@tonic-gate logdebug("in_signal(SIGALRM) delta %u\n", 11830Sstevel@tonic-gate now - timer_next); 11840Sstevel@tonic-gate } 11850Sstevel@tonic-gate timer_active = _B_FALSE; 11860Sstevel@tonic-gate run_timeouts(); 11870Sstevel@tonic-gate break; 11880Sstevel@tonic-gate case SIGUSR1: 11890Sstevel@tonic-gate logdebug("Printing configuration:\n"); 11900Sstevel@tonic-gate /* Print out the internal tables */ 11910Sstevel@tonic-gate phyint_inst_print_all(); 11920Sstevel@tonic-gate 11930Sstevel@tonic-gate /* 11940Sstevel@tonic-gate * Print out the accumulated statistics about missed 11950Sstevel@tonic-gate * probes (happens due to scheduling delay). 11960Sstevel@tonic-gate */ 11970Sstevel@tonic-gate logerr("Missed sending total of %d probes spread over" 11980Sstevel@tonic-gate " %d occurrences\n", probes_missed.pm_nprobes, 11990Sstevel@tonic-gate probes_missed.pm_ntimes); 12000Sstevel@tonic-gate 12010Sstevel@tonic-gate /* 12020Sstevel@tonic-gate * Print out the accumulated statistics about probes 12030Sstevel@tonic-gate * that were sent. 12040Sstevel@tonic-gate */ 12050Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; 12060Sstevel@tonic-gate pii = pii->pii_next) { 12070Sstevel@tonic-gate unacked = 0; 12080Sstevel@tonic-gate acked = pii->pii_cum_stats.acked; 12090Sstevel@tonic-gate lost = pii->pii_cum_stats.lost; 12100Sstevel@tonic-gate sent = pii->pii_cum_stats.sent; 12110Sstevel@tonic-gate unknown = pii->pii_cum_stats.unknown; 12120Sstevel@tonic-gate for (pr_ndx = 0; pr_ndx < PROBE_STATS_COUNT; pr_ndx++) { 12130Sstevel@tonic-gate switch (pii->pii_probes[pr_ndx].pr_status) { 12140Sstevel@tonic-gate case PR_ACKED: 12150Sstevel@tonic-gate acked++; 12160Sstevel@tonic-gate break; 12170Sstevel@tonic-gate case PR_LOST: 12180Sstevel@tonic-gate lost++; 12190Sstevel@tonic-gate break; 12200Sstevel@tonic-gate case PR_UNACKED: 12210Sstevel@tonic-gate unacked++; 12220Sstevel@tonic-gate break; 12230Sstevel@tonic-gate } 12240Sstevel@tonic-gate } 12250Sstevel@tonic-gate logerr("\nProbe stats on (%s %s)\n" 12260Sstevel@tonic-gate "Number of probes sent %lld\n" 12270Sstevel@tonic-gate "Number of probe acks received %lld\n" 12280Sstevel@tonic-gate "Number of probes/acks lost %lld\n" 12290Sstevel@tonic-gate "Number of valid unacknowled probes %lld\n" 12300Sstevel@tonic-gate "Number of ambiguous probe acks received %lld\n", 12310Sstevel@tonic-gate AF_STR(pii->pii_af), pii->pii_name, 12320Sstevel@tonic-gate sent, acked, lost, unacked, unknown); 12330Sstevel@tonic-gate } 12340Sstevel@tonic-gate break; 12350Sstevel@tonic-gate case SIGHUP: 12360Sstevel@tonic-gate logerr("SIGHUP: restart and reread config file\n"); 12370Sstevel@tonic-gate cleanup(); 12380Sstevel@tonic-gate (void) execv(argv0[0], argv0); 12390Sstevel@tonic-gate _exit(0177); 12400Sstevel@tonic-gate /* NOTREACHED */ 12410Sstevel@tonic-gate case SIGINT: 12420Sstevel@tonic-gate case SIGTERM: 12430Sstevel@tonic-gate case SIGQUIT: 12440Sstevel@tonic-gate cleanup(); 12450Sstevel@tonic-gate exit(0); 12460Sstevel@tonic-gate /* NOTREACHED */ 12470Sstevel@tonic-gate default: 12480Sstevel@tonic-gate logerr("in_signal: unknown signal: %d\n", buf); 12490Sstevel@tonic-gate } 12500Sstevel@tonic-gate } 12510Sstevel@tonic-gate 12520Sstevel@tonic-gate static void 12530Sstevel@tonic-gate cleanup(void) 12540Sstevel@tonic-gate { 12550Sstevel@tonic-gate struct phyint_instance *pii; 12560Sstevel@tonic-gate struct phyint_instance *next_pii; 12570Sstevel@tonic-gate 12580Sstevel@tonic-gate /* 12590Sstevel@tonic-gate * Make sure that we don't write to eventpipe in 12600Sstevel@tonic-gate * sig_handler() if any signal notably SIGALRM, 12610Sstevel@tonic-gate * occurs after we close the eventpipe descriptor below 12620Sstevel@tonic-gate */ 12630Sstevel@tonic-gate cleanup_started = _B_TRUE; 12640Sstevel@tonic-gate 12650Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = next_pii) { 12660Sstevel@tonic-gate next_pii = pii->pii_next; 12670Sstevel@tonic-gate phyint_inst_delete(pii); 12680Sstevel@tonic-gate } 12690Sstevel@tonic-gate 12700Sstevel@tonic-gate (void) close(ifsock_v4); 12710Sstevel@tonic-gate (void) close(ifsock_v6); 12720Sstevel@tonic-gate (void) close(rtsock_v4); 12730Sstevel@tonic-gate (void) close(rtsock_v6); 12740Sstevel@tonic-gate (void) close(lsock_v4); 12750Sstevel@tonic-gate (void) close(lsock_v6); 12760Sstevel@tonic-gate (void) close(0); 12770Sstevel@tonic-gate (void) close(1); 12780Sstevel@tonic-gate (void) close(2); 12790Sstevel@tonic-gate (void) close(mibfd); 12800Sstevel@tonic-gate (void) close(eventpipe_read); 12810Sstevel@tonic-gate (void) close(eventpipe_write); 12820Sstevel@tonic-gate } 12830Sstevel@tonic-gate 12840Sstevel@tonic-gate /* 12850Sstevel@tonic-gate * Create pipe for signal delivery and set up signal handlers. 12860Sstevel@tonic-gate */ 12870Sstevel@tonic-gate static void 12880Sstevel@tonic-gate setup_eventpipe(void) 12890Sstevel@tonic-gate { 12900Sstevel@tonic-gate int fds[2]; 12910Sstevel@tonic-gate struct sigaction act; 12920Sstevel@tonic-gate 12930Sstevel@tonic-gate if ((pipe(fds)) < 0) { 12940Sstevel@tonic-gate logperror("setup_eventpipe: pipe"); 12950Sstevel@tonic-gate exit(1); 12960Sstevel@tonic-gate } 12970Sstevel@tonic-gate eventpipe_read = fds[0]; 12980Sstevel@tonic-gate eventpipe_write = fds[1]; 12990Sstevel@tonic-gate if (poll_add(eventpipe_read) == -1) { 13000Sstevel@tonic-gate exit(1); 13010Sstevel@tonic-gate } 13020Sstevel@tonic-gate 13030Sstevel@tonic-gate act.sa_handler = sig_handler; 13040Sstevel@tonic-gate act.sa_flags = SA_RESTART; 13050Sstevel@tonic-gate (void) sigaction(SIGALRM, &act, NULL); 13060Sstevel@tonic-gate 13070Sstevel@tonic-gate (void) sigset(SIGHUP, sig_handler); 13080Sstevel@tonic-gate (void) sigset(SIGUSR1, sig_handler); 13090Sstevel@tonic-gate (void) sigset(SIGTERM, sig_handler); 13100Sstevel@tonic-gate (void) sigset(SIGINT, sig_handler); 13110Sstevel@tonic-gate (void) sigset(SIGQUIT, sig_handler); 13120Sstevel@tonic-gate } 13130Sstevel@tonic-gate 13140Sstevel@tonic-gate /* 13150Sstevel@tonic-gate * Create a routing socket for receiving RTM_IFINFO messages. 13160Sstevel@tonic-gate */ 13170Sstevel@tonic-gate static int 13180Sstevel@tonic-gate setup_rtsock(int af) 13190Sstevel@tonic-gate { 13200Sstevel@tonic-gate int s; 13210Sstevel@tonic-gate int flags; 13220Sstevel@tonic-gate 13230Sstevel@tonic-gate s = socket(PF_ROUTE, SOCK_RAW, af); 13240Sstevel@tonic-gate if (s == -1) { 13250Sstevel@tonic-gate logperror("setup_rtsock: socket PF_ROUTE"); 13260Sstevel@tonic-gate exit(1); 13270Sstevel@tonic-gate } 13280Sstevel@tonic-gate if ((flags = fcntl(s, F_GETFL, 0)) < 0) { 13290Sstevel@tonic-gate logperror("setup_rtsock: fcntl F_GETFL"); 13300Sstevel@tonic-gate (void) close(s); 13310Sstevel@tonic-gate exit(1); 13320Sstevel@tonic-gate } 13330Sstevel@tonic-gate if ((fcntl(s, F_SETFL, flags | O_NONBLOCK)) < 0) { 13340Sstevel@tonic-gate logperror("setup_rtsock: fcntl F_SETFL"); 13350Sstevel@tonic-gate (void) close(s); 13360Sstevel@tonic-gate exit(1); 13370Sstevel@tonic-gate } 13380Sstevel@tonic-gate if (poll_add(s) == -1) { 13390Sstevel@tonic-gate (void) close(s); 13400Sstevel@tonic-gate exit(1); 13410Sstevel@tonic-gate } 13420Sstevel@tonic-gate return (s); 13430Sstevel@tonic-gate } 13440Sstevel@tonic-gate 13450Sstevel@tonic-gate /* 13460Sstevel@tonic-gate * Process an RTM_IFINFO message received on a routing socket. 13470Sstevel@tonic-gate * The return value indicates whether a full interface scan is required. 13480Sstevel@tonic-gate * Link up/down notifications from the NICs are reflected in the 13490Sstevel@tonic-gate * IFF_RUNNING flag. 13500Sstevel@tonic-gate * If just the state of the IFF_RUNNING interface flag has changed, a 13510Sstevel@tonic-gate * a full interface scan isn't required. 13520Sstevel@tonic-gate */ 13530Sstevel@tonic-gate static boolean_t 13540Sstevel@tonic-gate process_rtm_ifinfo(if_msghdr_t *ifm, int type) 13550Sstevel@tonic-gate { 13560Sstevel@tonic-gate struct sockaddr_dl *sdl; 13570Sstevel@tonic-gate struct phyint *pi; 13580Sstevel@tonic-gate uint64_t old_flags; 13590Sstevel@tonic-gate struct phyint_instance *pii; 13600Sstevel@tonic-gate 13610Sstevel@tonic-gate assert(ifm->ifm_type == RTM_IFINFO && ifm->ifm_addrs == RTA_IFP); 13620Sstevel@tonic-gate 13630Sstevel@tonic-gate /* 13640Sstevel@tonic-gate * Although the sockaddr_dl structure is directly after the 13650Sstevel@tonic-gate * if_msghdr_t structure. At the time of writing, the size of the 13660Sstevel@tonic-gate * if_msghdr_t structure is different on 32 and 64 bit kernels, due 13670Sstevel@tonic-gate * to the presence of a timeval structure, which contains longs, 13680Sstevel@tonic-gate * in the if_data structure. Anyway, we know where the message ends, 13690Sstevel@tonic-gate * so we work backwards to get the start of the sockaddr_dl structure. 13700Sstevel@tonic-gate */ 13710Sstevel@tonic-gate /*LINTED*/ 13720Sstevel@tonic-gate sdl = (struct sockaddr_dl *)((char *)ifm + ifm->ifm_msglen - 13730Sstevel@tonic-gate sizeof (struct sockaddr_dl)); 13740Sstevel@tonic-gate 13750Sstevel@tonic-gate assert(sdl->sdl_family == AF_LINK); 13760Sstevel@tonic-gate 13770Sstevel@tonic-gate /* 13780Sstevel@tonic-gate * The interface name is in sdl_data. 13790Sstevel@tonic-gate * RTM_IFINFO messages are only generated for logical interface 13800Sstevel@tonic-gate * zero, so there is no colon and logical interface number to 13810Sstevel@tonic-gate * strip from the name. The name is not null terminated, but 13820Sstevel@tonic-gate * there should be enough space in sdl_data to add the null. 13830Sstevel@tonic-gate */ 13840Sstevel@tonic-gate if (sdl->sdl_nlen >= sizeof (sdl->sdl_data)) { 13850Sstevel@tonic-gate if (debug & D_LINKNOTE) 13860Sstevel@tonic-gate logdebug("process_rtm_ifinfo: " 13870Sstevel@tonic-gate "phyint name too long\n"); 13880Sstevel@tonic-gate return (_B_TRUE); 13890Sstevel@tonic-gate } 13900Sstevel@tonic-gate sdl->sdl_data[sdl->sdl_nlen] = 0; 13910Sstevel@tonic-gate 13920Sstevel@tonic-gate pi = phyint_lookup(sdl->sdl_data); 13930Sstevel@tonic-gate if (pi == NULL) { 13940Sstevel@tonic-gate if (debug & D_LINKNOTE) 13950Sstevel@tonic-gate logdebug("process_rtm_ifinfo: phyint lookup failed" 13960Sstevel@tonic-gate " for %s\n", sdl->sdl_data); 13970Sstevel@tonic-gate return (_B_TRUE); 13980Sstevel@tonic-gate } 13990Sstevel@tonic-gate 14000Sstevel@tonic-gate /* 14010Sstevel@tonic-gate * We want to try and avoid doing a full interface scan for 14020Sstevel@tonic-gate * link state notifications from the NICs, as indicated 14030Sstevel@tonic-gate * by the state of the IFF_RUNNING flag. If just the 14040Sstevel@tonic-gate * IFF_RUNNING flag has changed state, the link state changes 14050Sstevel@tonic-gate * are processed without a full scan. 14060Sstevel@tonic-gate * If there is both an IPv4 and IPv6 instance associated with 14070Sstevel@tonic-gate * the physical interface, we will get an RTM_IFINFO message 14080Sstevel@tonic-gate * for each instance. If we just maintained a single copy of 14090Sstevel@tonic-gate * the physical interface flags, it would appear that no flags 14100Sstevel@tonic-gate * had changed when the second message is processed, leading us 14110Sstevel@tonic-gate * to believe that the message wasn't generated by a flags change, 14120Sstevel@tonic-gate * and that a full interface scan is required. 14130Sstevel@tonic-gate * To get around this problem, two additional copies of the flags 14140Sstevel@tonic-gate * are kept, one copy for each instance. These are only used in 14150Sstevel@tonic-gate * this routine. At any one time, all three copies of the flags 14160Sstevel@tonic-gate * should be identical except for the IFF_RUNNING flag. The 14170Sstevel@tonic-gate * copy of the flags in the "phyint" structure is always up to 14180Sstevel@tonic-gate * date. 14190Sstevel@tonic-gate */ 14200Sstevel@tonic-gate pii = (type == AF_INET) ? pi->pi_v4 : pi->pi_v6; 14210Sstevel@tonic-gate if (pii == NULL) { 14220Sstevel@tonic-gate if (debug & D_LINKNOTE) 14230Sstevel@tonic-gate logdebug("process_rtm_ifinfo: no instance of address " 14240Sstevel@tonic-gate "family %s for %s\n", AF_STR(type), pi->pi_name); 14250Sstevel@tonic-gate return (_B_TRUE); 14260Sstevel@tonic-gate } 14270Sstevel@tonic-gate 14280Sstevel@tonic-gate old_flags = pii->pii_flags; 14290Sstevel@tonic-gate pii->pii_flags = PHYINT_FLAGS(ifm->ifm_flags); 14300Sstevel@tonic-gate pi->pi_flags = pii->pii_flags; 14310Sstevel@tonic-gate 14320Sstevel@tonic-gate if (debug & D_LINKNOTE) { 14330Sstevel@tonic-gate logdebug("process_rtm_ifinfo: %s address family: %s, " 14340Sstevel@tonic-gate "old flags: %llx, new flags: %llx\n", pi->pi_name, 14350Sstevel@tonic-gate AF_STR(type), old_flags, pi->pi_flags); 14360Sstevel@tonic-gate } 14370Sstevel@tonic-gate 14380Sstevel@tonic-gate /* 14390Sstevel@tonic-gate * If IFF_STANDBY has changed, indicate that the interface has changed 14400Sstevel@tonic-gate * types. 14410Sstevel@tonic-gate */ 14420Sstevel@tonic-gate if ((old_flags ^ pii->pii_flags) & IFF_STANDBY) 14430Sstevel@tonic-gate phyint_newtype(pi); 14440Sstevel@tonic-gate 14450Sstevel@tonic-gate /* 14460Sstevel@tonic-gate * If IFF_INACTIVE has been set, then no data addresses should be 14470Sstevel@tonic-gate * hosted on the interface. If IFF_INACTIVE has been cleared, then 14480Sstevel@tonic-gate * move previously failed-over addresses back to it, provided it is 14490Sstevel@tonic-gate * not failed. For details, see the state diagram in mpd_probe.c. 14500Sstevel@tonic-gate */ 14510Sstevel@tonic-gate if ((old_flags ^ pii->pii_flags) & IFF_INACTIVE) { 14520Sstevel@tonic-gate if (pii->pii_flags & IFF_INACTIVE) { 1453704Sethindra if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY)) 14540Sstevel@tonic-gate (void) try_failover(pi, FAILOVER_TO_NONSTANDBY); 14550Sstevel@tonic-gate } else { 14560Sstevel@tonic-gate if (pi->pi_state == PI_RUNNING && !pi->pi_full) { 14570Sstevel@tonic-gate pi->pi_empty = 0; 14580Sstevel@tonic-gate (void) try_failback(pi, _B_FALSE); 14590Sstevel@tonic-gate } 14600Sstevel@tonic-gate } 14610Sstevel@tonic-gate } 14620Sstevel@tonic-gate 14630Sstevel@tonic-gate /* Has just the IFF_RUNNING flag changed state ? */ 14640Sstevel@tonic-gate if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) { 14650Sstevel@tonic-gate struct phyint_instance *pii_other; 14660Sstevel@tonic-gate /* 14670Sstevel@tonic-gate * It wasn't just a link state change. Update 14680Sstevel@tonic-gate * the other instance's copy of the flags. 14690Sstevel@tonic-gate */ 14700Sstevel@tonic-gate pii_other = phyint_inst_other(pii); 14710Sstevel@tonic-gate if (pii_other != NULL) 14720Sstevel@tonic-gate pii_other->pii_flags = pii->pii_flags; 14730Sstevel@tonic-gate return (_B_TRUE); 14740Sstevel@tonic-gate } 14750Sstevel@tonic-gate 14760Sstevel@tonic-gate return (_B_FALSE); 14770Sstevel@tonic-gate } 14780Sstevel@tonic-gate 14790Sstevel@tonic-gate /* 14800Sstevel@tonic-gate * Retrieve as many routing socket messages as possible, and try to 14810Sstevel@tonic-gate * empty the routing sockets. Initiate full scan of targets or interfaces 14820Sstevel@tonic-gate * as needed. 14830Sstevel@tonic-gate * We listen on separate IPv4 an IPv6 sockets so that we can accurately 14840Sstevel@tonic-gate * detect changes in certain flags (see "process_rtm_ifinfo()" above). 14850Sstevel@tonic-gate */ 14860Sstevel@tonic-gate static void 14870Sstevel@tonic-gate process_rtsock(int rtsock_v4, int rtsock_v6) 14880Sstevel@tonic-gate { 14890Sstevel@tonic-gate int nbytes; 14900Sstevel@tonic-gate int64_t msg[2048 / 8]; 14910Sstevel@tonic-gate struct rt_msghdr *rtm; 14920Sstevel@tonic-gate boolean_t need_if_scan = _B_FALSE; 14930Sstevel@tonic-gate boolean_t need_rt_scan = _B_FALSE; 14940Sstevel@tonic-gate boolean_t rtm_ifinfo_seen = _B_FALSE; 14950Sstevel@tonic-gate int type; 14960Sstevel@tonic-gate 14970Sstevel@tonic-gate /* Read as many messages as possible and try to empty the sockets */ 14980Sstevel@tonic-gate for (type = AF_INET; ; type = AF_INET6) { 14990Sstevel@tonic-gate for (;;) { 15000Sstevel@tonic-gate nbytes = read((type == AF_INET) ? rtsock_v4 : 15010Sstevel@tonic-gate rtsock_v6, msg, sizeof (msg)); 15020Sstevel@tonic-gate if (nbytes <= 0) { 15030Sstevel@tonic-gate /* No more messages */ 15040Sstevel@tonic-gate break; 15050Sstevel@tonic-gate } 15060Sstevel@tonic-gate rtm = (struct rt_msghdr *)msg; 15070Sstevel@tonic-gate if (rtm->rtm_version != RTM_VERSION) { 15080Sstevel@tonic-gate logerr("process_rtsock: version %d " 15090Sstevel@tonic-gate "not understood\n", rtm->rtm_version); 15100Sstevel@tonic-gate break; 15110Sstevel@tonic-gate } 15120Sstevel@tonic-gate 15130Sstevel@tonic-gate if (debug & D_PHYINT) { 15140Sstevel@tonic-gate logdebug("process_rtsock: message %d\n", 15150Sstevel@tonic-gate rtm->rtm_type); 15160Sstevel@tonic-gate } 15170Sstevel@tonic-gate 15180Sstevel@tonic-gate switch (rtm->rtm_type) { 15190Sstevel@tonic-gate case RTM_NEWADDR: 15200Sstevel@tonic-gate case RTM_DELADDR: 15210Sstevel@tonic-gate /* 15220Sstevel@tonic-gate * Some logical interface has changed, 15230Sstevel@tonic-gate * have to scan everything to determine 15240Sstevel@tonic-gate * what actually changed. 15250Sstevel@tonic-gate */ 15260Sstevel@tonic-gate need_if_scan = _B_TRUE; 15270Sstevel@tonic-gate break; 15280Sstevel@tonic-gate 15290Sstevel@tonic-gate case RTM_IFINFO: 15300Sstevel@tonic-gate rtm_ifinfo_seen = _B_TRUE; 15310Sstevel@tonic-gate need_if_scan |= 15320Sstevel@tonic-gate process_rtm_ifinfo((if_msghdr_t *)rtm, 15330Sstevel@tonic-gate type); 15340Sstevel@tonic-gate break; 15350Sstevel@tonic-gate 15360Sstevel@tonic-gate case RTM_ADD: 15370Sstevel@tonic-gate case RTM_DELETE: 15380Sstevel@tonic-gate case RTM_CHANGE: 15390Sstevel@tonic-gate case RTM_OLDADD: 15400Sstevel@tonic-gate case RTM_OLDDEL: 15410Sstevel@tonic-gate need_rt_scan = _B_TRUE; 15420Sstevel@tonic-gate break; 15430Sstevel@tonic-gate 15440Sstevel@tonic-gate default: 15450Sstevel@tonic-gate /* Not interesting */ 15460Sstevel@tonic-gate break; 15470Sstevel@tonic-gate } 15480Sstevel@tonic-gate } 15490Sstevel@tonic-gate if (type == AF_INET6) 15500Sstevel@tonic-gate break; 15510Sstevel@tonic-gate } 15520Sstevel@tonic-gate 15530Sstevel@tonic-gate if (need_if_scan) { 15540Sstevel@tonic-gate if (debug & D_LINKNOTE && rtm_ifinfo_seen) 15550Sstevel@tonic-gate logdebug("process_rtsock: synchronizing with kernel\n"); 15560Sstevel@tonic-gate initifs(); 15570Sstevel@tonic-gate } else if (rtm_ifinfo_seen) { 15580Sstevel@tonic-gate if (debug & D_LINKNOTE) 15590Sstevel@tonic-gate logdebug("process_rtsock: " 15600Sstevel@tonic-gate "link up/down notification(s) seen\n"); 15610Sstevel@tonic-gate process_link_state_changes(); 15620Sstevel@tonic-gate } 15630Sstevel@tonic-gate 15640Sstevel@tonic-gate if (need_rt_scan) 15650Sstevel@tonic-gate init_router_targets(); 15660Sstevel@tonic-gate } 15670Sstevel@tonic-gate 15680Sstevel@tonic-gate /* 15690Sstevel@tonic-gate * Look if the phyint instance or one of its logints have been removed from 15700Sstevel@tonic-gate * the kernel and take appropriate action. 15710Sstevel@tonic-gate * Uses {pii,li}_in_use. 15720Sstevel@tonic-gate */ 15730Sstevel@tonic-gate static void 15740Sstevel@tonic-gate check_if_removed(struct phyint_instance *pii) 15750Sstevel@tonic-gate { 15760Sstevel@tonic-gate struct logint *li; 15770Sstevel@tonic-gate struct logint *next_li; 15780Sstevel@tonic-gate 15790Sstevel@tonic-gate /* Detect phyints that have been removed from the kernel. */ 15800Sstevel@tonic-gate if (!pii->pii_in_use) { 15810Sstevel@tonic-gate logtrace("%s %s has been removed from kernel\n", 15820Sstevel@tonic-gate AF_STR(pii->pii_af), pii->pii_phyint->pi_name); 15830Sstevel@tonic-gate phyint_inst_delete(pii); 15840Sstevel@tonic-gate } else { 15850Sstevel@tonic-gate /* Detect logints that have been removed. */ 15860Sstevel@tonic-gate for (li = pii->pii_logint; li != NULL; li = next_li) { 15870Sstevel@tonic-gate next_li = li->li_next; 15880Sstevel@tonic-gate if (!li->li_in_use) { 15890Sstevel@tonic-gate logint_delete(li); 15900Sstevel@tonic-gate } 15910Sstevel@tonic-gate } 15920Sstevel@tonic-gate } 15930Sstevel@tonic-gate } 15940Sstevel@tonic-gate 15950Sstevel@tonic-gate /* 15960Sstevel@tonic-gate * Send down a T_OPTMGMT_REQ to ip asking for all data in the various 15970Sstevel@tonic-gate * tables defined by mib2.h. Parse the returned data and extract 15980Sstevel@tonic-gate * the 'routing' information table. Process the 'routing' table 15990Sstevel@tonic-gate * to get the list of known onlink routers, and update our database. 16000Sstevel@tonic-gate * These onlink routers will serve as our probe targets. 16010Sstevel@tonic-gate * Returns false, if any system calls resulted in errors, true otherwise. 16020Sstevel@tonic-gate */ 16030Sstevel@tonic-gate static boolean_t 16040Sstevel@tonic-gate update_router_list(int fd) 16050Sstevel@tonic-gate { 16060Sstevel@tonic-gate union { 16070Sstevel@tonic-gate char ubuf[1024]; 16080Sstevel@tonic-gate union T_primitives uprim; 16090Sstevel@tonic-gate } buf; 16100Sstevel@tonic-gate 16110Sstevel@tonic-gate int flags; 16120Sstevel@tonic-gate struct strbuf ctlbuf; 16130Sstevel@tonic-gate struct strbuf databuf; 16140Sstevel@tonic-gate struct T_optmgmt_req *tor; 16150Sstevel@tonic-gate struct T_optmgmt_ack *toa; 16160Sstevel@tonic-gate struct T_error_ack *tea; 16170Sstevel@tonic-gate struct opthdr *optp; 16180Sstevel@tonic-gate struct opthdr *req; 16190Sstevel@tonic-gate int status; 16200Sstevel@tonic-gate t_scalar_t prim; 16210Sstevel@tonic-gate 16220Sstevel@tonic-gate tor = (struct T_optmgmt_req *)&buf; 16230Sstevel@tonic-gate 16240Sstevel@tonic-gate tor->PRIM_type = T_SVR4_OPTMGMT_REQ; 16250Sstevel@tonic-gate tor->OPT_offset = sizeof (struct T_optmgmt_req); 16260Sstevel@tonic-gate tor->OPT_length = sizeof (struct opthdr); 16270Sstevel@tonic-gate tor->MGMT_flags = T_CURRENT; 16280Sstevel@tonic-gate 16290Sstevel@tonic-gate req = (struct opthdr *)&tor[1]; 16300Sstevel@tonic-gate req->level = MIB2_IP; /* any MIB2_xxx value ok here */ 16310Sstevel@tonic-gate req->name = 0; 16320Sstevel@tonic-gate req->len = 0; 16330Sstevel@tonic-gate 16340Sstevel@tonic-gate ctlbuf.buf = (char *)&buf; 16350Sstevel@tonic-gate ctlbuf.len = tor->OPT_length + tor->OPT_offset; 16360Sstevel@tonic-gate ctlbuf.maxlen = sizeof (buf); 16370Sstevel@tonic-gate flags = 0; 16380Sstevel@tonic-gate if (putmsg(fd, &ctlbuf, NULL, flags) == -1) { 16390Sstevel@tonic-gate logperror("update_router_list: putmsg(ctl)"); 16400Sstevel@tonic-gate return (_B_FALSE); 16410Sstevel@tonic-gate } 16420Sstevel@tonic-gate 16430Sstevel@tonic-gate /* 16440Sstevel@tonic-gate * The response consists of multiple T_OPTMGMT_ACK msgs, 1 msg for 16450Sstevel@tonic-gate * each table defined in mib2.h. Each T_OPTMGMT_ACK msg contains 16460Sstevel@tonic-gate * a control and data part. The control part contains a struct 16470Sstevel@tonic-gate * T_optmgmt_ack followed by a struct opthdr. The 'opthdr' identifies 16480Sstevel@tonic-gate * the level, name and length of the data in the data part. The 16490Sstevel@tonic-gate * data part contains the actual table data. The last message 16500Sstevel@tonic-gate * is an end-of-data (EOD), consisting of a T_OPTMGMT_ACK and a 16510Sstevel@tonic-gate * single option with zero optlen. 16520Sstevel@tonic-gate */ 16530Sstevel@tonic-gate 16540Sstevel@tonic-gate for (;;) { 16550Sstevel@tonic-gate /* 16560Sstevel@tonic-gate * Go around this loop once for each table. Ignore 16570Sstevel@tonic-gate * all tables except the routing information table. 16580Sstevel@tonic-gate */ 16590Sstevel@tonic-gate flags = 0; 16600Sstevel@tonic-gate status = getmsg(fd, &ctlbuf, NULL, &flags); 16610Sstevel@tonic-gate if (status < 0) { 16620Sstevel@tonic-gate if (errno == EINTR) 16630Sstevel@tonic-gate continue; 16640Sstevel@tonic-gate logperror("update_router_list: getmsg(ctl)"); 16650Sstevel@tonic-gate return (_B_FALSE); 16660Sstevel@tonic-gate } 16670Sstevel@tonic-gate if (ctlbuf.len < sizeof (t_scalar_t)) { 16680Sstevel@tonic-gate logerr("update_router_list: ctlbuf.len %d\n", 16690Sstevel@tonic-gate ctlbuf.len); 16700Sstevel@tonic-gate return (_B_FALSE); 16710Sstevel@tonic-gate } 16720Sstevel@tonic-gate 16730Sstevel@tonic-gate prim = buf.uprim.type; 16740Sstevel@tonic-gate 16750Sstevel@tonic-gate switch (prim) { 16760Sstevel@tonic-gate 16770Sstevel@tonic-gate case T_ERROR_ACK: 16780Sstevel@tonic-gate tea = &buf.uprim.error_ack; 16790Sstevel@tonic-gate if (ctlbuf.len < sizeof (struct T_error_ack)) { 16800Sstevel@tonic-gate logerr("update_router_list: T_ERROR_ACK" 16810Sstevel@tonic-gate " ctlbuf.len %d\n", ctlbuf.len); 16820Sstevel@tonic-gate return (_B_FALSE); 16830Sstevel@tonic-gate } 16840Sstevel@tonic-gate logerr("update_router_list: T_ERROR_ACK:" 16850Sstevel@tonic-gate " TLI_error = 0x%lx, UNIX_error = 0x%lx\n", 16860Sstevel@tonic-gate tea->TLI_error, tea->UNIX_error); 16870Sstevel@tonic-gate return (_B_FALSE); 16880Sstevel@tonic-gate 16890Sstevel@tonic-gate case T_OPTMGMT_ACK: 16900Sstevel@tonic-gate toa = &buf.uprim.optmgmt_ack; 16910Sstevel@tonic-gate optp = (struct opthdr *)&toa[1]; 16920Sstevel@tonic-gate if (ctlbuf.len < sizeof (struct T_optmgmt_ack)) { 16930Sstevel@tonic-gate logerr("update_router_list: ctlbuf.len %d\n", 16940Sstevel@tonic-gate ctlbuf.len); 16950Sstevel@tonic-gate return (_B_FALSE); 16960Sstevel@tonic-gate } 16970Sstevel@tonic-gate if (toa->MGMT_flags != T_SUCCESS) { 16980Sstevel@tonic-gate logerr("update_router_list: MGMT_flags 0x%lx\n", 16990Sstevel@tonic-gate toa->MGMT_flags); 17000Sstevel@tonic-gate return (_B_FALSE); 17010Sstevel@tonic-gate } 17020Sstevel@tonic-gate break; 17030Sstevel@tonic-gate 17040Sstevel@tonic-gate default: 17050Sstevel@tonic-gate logerr("update_router_list: unknown primitive %ld\n", 17060Sstevel@tonic-gate prim); 17070Sstevel@tonic-gate return (_B_FALSE); 17080Sstevel@tonic-gate } 17090Sstevel@tonic-gate 17100Sstevel@tonic-gate /* Process the T_OPGMGMT_ACK below */ 17110Sstevel@tonic-gate assert(prim == T_OPTMGMT_ACK); 17120Sstevel@tonic-gate 17130Sstevel@tonic-gate switch (status) { 17140Sstevel@tonic-gate case 0: 17150Sstevel@tonic-gate /* 17160Sstevel@tonic-gate * We have reached the end of this T_OPTMGMT_ACK 17170Sstevel@tonic-gate * message. If this is the last message i.e EOD, 17180Sstevel@tonic-gate * return, else process the next T_OPTMGMT_ACK msg. 17190Sstevel@tonic-gate */ 17200Sstevel@tonic-gate if ((ctlbuf.len == sizeof (struct T_optmgmt_ack) + 17210Sstevel@tonic-gate sizeof (struct opthdr)) && optp->len == 0 && 17220Sstevel@tonic-gate optp->name == 0 && optp->level == 0) { 17230Sstevel@tonic-gate /* 17240Sstevel@tonic-gate * This is the EOD message. Return 17250Sstevel@tonic-gate */ 17260Sstevel@tonic-gate return (_B_TRUE); 17270Sstevel@tonic-gate } 17280Sstevel@tonic-gate continue; 17290Sstevel@tonic-gate 17300Sstevel@tonic-gate case MORECTL: 17310Sstevel@tonic-gate case MORECTL | MOREDATA: 17320Sstevel@tonic-gate /* 17330Sstevel@tonic-gate * This should not happen. We should be able to read 17340Sstevel@tonic-gate * the control portion in a single getmsg. 17350Sstevel@tonic-gate */ 17360Sstevel@tonic-gate logerr("update_router_list: MORECTL\n"); 17370Sstevel@tonic-gate return (_B_FALSE); 17380Sstevel@tonic-gate 17390Sstevel@tonic-gate case MOREDATA: 17400Sstevel@tonic-gate databuf.maxlen = optp->len; 17410Sstevel@tonic-gate /* malloc of 0 bytes is ok */ 17420Sstevel@tonic-gate databuf.buf = malloc((size_t)optp->len); 17430Sstevel@tonic-gate if (databuf.maxlen != 0 && databuf.buf == NULL) { 17440Sstevel@tonic-gate logperror("update_router_list: malloc"); 17450Sstevel@tonic-gate return (_B_FALSE); 17460Sstevel@tonic-gate } 17470Sstevel@tonic-gate databuf.len = 0; 17480Sstevel@tonic-gate flags = 0; 17490Sstevel@tonic-gate for (;;) { 17500Sstevel@tonic-gate status = getmsg(fd, NULL, &databuf, &flags); 17510Sstevel@tonic-gate if (status >= 0) { 17520Sstevel@tonic-gate break; 17530Sstevel@tonic-gate } else if (errno == EINTR) { 17540Sstevel@tonic-gate continue; 17550Sstevel@tonic-gate } else { 17560Sstevel@tonic-gate logperror("update_router_list:" 17570Sstevel@tonic-gate " getmsg(data)"); 17580Sstevel@tonic-gate free(databuf.buf); 17590Sstevel@tonic-gate return (_B_FALSE); 17600Sstevel@tonic-gate } 17610Sstevel@tonic-gate } 17620Sstevel@tonic-gate 17630Sstevel@tonic-gate if (optp->level == MIB2_IP && 17640Sstevel@tonic-gate optp->name == MIB2_IP_ROUTE) { 17650Sstevel@tonic-gate /* LINTED */ 17660Sstevel@tonic-gate ire_process_v4((mib2_ipRouteEntry_t *) 17670Sstevel@tonic-gate databuf.buf, databuf.len); 17680Sstevel@tonic-gate } else if (optp->level == MIB2_IP6 && 17690Sstevel@tonic-gate optp->name == MIB2_IP6_ROUTE) { 17700Sstevel@tonic-gate /* LINTED */ 17710Sstevel@tonic-gate ire_process_v6((mib2_ipv6RouteEntry_t *) 17720Sstevel@tonic-gate databuf.buf, databuf.len); 17730Sstevel@tonic-gate } 17740Sstevel@tonic-gate free(databuf.buf); 17750Sstevel@tonic-gate } 17760Sstevel@tonic-gate } 17770Sstevel@tonic-gate /* NOTREACHED */ 17780Sstevel@tonic-gate } 17790Sstevel@tonic-gate 17800Sstevel@tonic-gate /* 17810Sstevel@tonic-gate * Examine the IPv4 routing table, for default routers. For each default 17820Sstevel@tonic-gate * router, populate the list of targets of each phyint that is on the same 17830Sstevel@tonic-gate * link as the default router 17840Sstevel@tonic-gate */ 17850Sstevel@tonic-gate static void 17860Sstevel@tonic-gate ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len) 17870Sstevel@tonic-gate { 17880Sstevel@tonic-gate mib2_ipRouteEntry_t *rp; 17890Sstevel@tonic-gate mib2_ipRouteEntry_t *rp1; 17900Sstevel@tonic-gate struct in_addr nexthop_v4; 17910Sstevel@tonic-gate mib2_ipRouteEntry_t *endp; 17920Sstevel@tonic-gate 17930Sstevel@tonic-gate if (len == 0) 17940Sstevel@tonic-gate return; 17950Sstevel@tonic-gate assert((len % sizeof (mib2_ipRouteEntry_t)) == 0); 17960Sstevel@tonic-gate 17970Sstevel@tonic-gate endp = buf + (len / sizeof (mib2_ipRouteEntry_t)); 17980Sstevel@tonic-gate 17990Sstevel@tonic-gate /* 18000Sstevel@tonic-gate * Loop thru the routing table entries. Process any IRE_DEFAULT, 18010Sstevel@tonic-gate * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others. 18020Sstevel@tonic-gate * For each such IRE_OFFSUBNET ire, get the nexthop gateway address. 18030Sstevel@tonic-gate * This is a potential target for probing, which we try to add 18040Sstevel@tonic-gate * to the list of probe targets. 18050Sstevel@tonic-gate */ 18060Sstevel@tonic-gate for (rp = buf; rp < endp; rp++) { 18070Sstevel@tonic-gate if (!(rp->ipRouteInfo.re_ire_type & IRE_OFFSUBNET)) 18080Sstevel@tonic-gate continue; 18090Sstevel@tonic-gate 18100Sstevel@tonic-gate /* Get the nexthop address. */ 18110Sstevel@tonic-gate nexthop_v4.s_addr = rp->ipRouteNextHop; 18120Sstevel@tonic-gate 18130Sstevel@tonic-gate /* 18140Sstevel@tonic-gate * Get the nexthop address. Then determine the outgoing 18150Sstevel@tonic-gate * interface, by examining all interface IREs, and picking the 18160Sstevel@tonic-gate * match. We don't look at the interface specified in the route 18170Sstevel@tonic-gate * because we need to add the router target on all matching 18180Sstevel@tonic-gate * interfaces anyway; the goal is to avoid falling back to 18190Sstevel@tonic-gate * multicast when some interfaces are in the same subnet but 18200Sstevel@tonic-gate * not in the same group. 18210Sstevel@tonic-gate */ 18220Sstevel@tonic-gate for (rp1 = buf; rp1 < endp; rp1++) { 18230Sstevel@tonic-gate if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE)) { 18240Sstevel@tonic-gate continue; 18250Sstevel@tonic-gate } 18260Sstevel@tonic-gate 18270Sstevel@tonic-gate /* 18280Sstevel@tonic-gate * Determine the interface IRE that matches the nexthop. 18290Sstevel@tonic-gate * i.e. (IRE addr & IRE mask) == (nexthop & IRE mask) 18300Sstevel@tonic-gate */ 18310Sstevel@tonic-gate if ((rp1->ipRouteDest & rp1->ipRouteMask) == 18320Sstevel@tonic-gate (nexthop_v4.s_addr & rp1->ipRouteMask)) { 18330Sstevel@tonic-gate /* 18340Sstevel@tonic-gate * We found the interface ire 18350Sstevel@tonic-gate */ 18360Sstevel@tonic-gate router_add_v4(rp1, nexthop_v4); 18370Sstevel@tonic-gate } 18380Sstevel@tonic-gate } 18390Sstevel@tonic-gate } 18400Sstevel@tonic-gate } 18410Sstevel@tonic-gate 18420Sstevel@tonic-gate void 18430Sstevel@tonic-gate router_add_v4(mib2_ipRouteEntry_t *rp1, struct in_addr nexthop_v4) 18440Sstevel@tonic-gate { 18450Sstevel@tonic-gate char *cp; 18460Sstevel@tonic-gate char ifname[LIFNAMSIZ + 1]; 18470Sstevel@tonic-gate struct in6_addr nexthop; 18480Sstevel@tonic-gate int len; 18490Sstevel@tonic-gate 18500Sstevel@tonic-gate if (debug & D_TARGET) 18510Sstevel@tonic-gate logdebug("router_add_v4()\n"); 18520Sstevel@tonic-gate 18530Sstevel@tonic-gate len = MIN(rp1->ipRouteIfIndex.o_length, sizeof (ifname) - 1); 18540Sstevel@tonic-gate (void) memcpy(ifname, rp1->ipRouteIfIndex.o_bytes, len); 18550Sstevel@tonic-gate ifname[len] = '\0'; 18560Sstevel@tonic-gate 18570Sstevel@tonic-gate if (ifname[0] == '\0') 18580Sstevel@tonic-gate return; 18590Sstevel@tonic-gate 18600Sstevel@tonic-gate cp = strchr(ifname, IF_SEPARATOR); 18610Sstevel@tonic-gate if (cp != NULL) 18620Sstevel@tonic-gate *cp = '\0'; 18630Sstevel@tonic-gate 18640Sstevel@tonic-gate IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop); 18650Sstevel@tonic-gate router_add_common(AF_INET, ifname, nexthop); 18660Sstevel@tonic-gate } 18670Sstevel@tonic-gate 18680Sstevel@tonic-gate void 18690Sstevel@tonic-gate router_add_common(int af, char *ifname, struct in6_addr nexthop) 18700Sstevel@tonic-gate { 18710Sstevel@tonic-gate struct phyint_instance *pii; 18720Sstevel@tonic-gate struct phyint *pi; 18730Sstevel@tonic-gate 18740Sstevel@tonic-gate if (debug & D_TARGET) 18750Sstevel@tonic-gate logdebug("router_add_common(%s %s)\n", AF_STR(af), ifname); 18760Sstevel@tonic-gate 18770Sstevel@tonic-gate /* 18780Sstevel@tonic-gate * Retrieve the phyint instance; bail if it's not known to us yet. 18790Sstevel@tonic-gate */ 18800Sstevel@tonic-gate pii = phyint_inst_lookup(af, ifname); 18810Sstevel@tonic-gate if (pii == NULL) 18820Sstevel@tonic-gate return; 18830Sstevel@tonic-gate 18840Sstevel@tonic-gate /* 18850Sstevel@tonic-gate * Don't use our own addresses as targets. 18860Sstevel@tonic-gate */ 1887*2250Srk129064 if (own_address(nexthop)) 18880Sstevel@tonic-gate return; 18890Sstevel@tonic-gate 18900Sstevel@tonic-gate /* 18910Sstevel@tonic-gate * If the phyint is part a named group, then add the address to all 18920Sstevel@tonic-gate * members of the group; note that this is suboptimal in the IPv4 case 18930Sstevel@tonic-gate * as it has already been added to all matching interfaces in 18940Sstevel@tonic-gate * ire_process_v4(). Otherwise, add the address only to the phyint 18950Sstevel@tonic-gate * itself, since other phyints in the anongroup may not be on the same 18960Sstevel@tonic-gate * subnet. 18970Sstevel@tonic-gate */ 18980Sstevel@tonic-gate pi = pii->pii_phyint; 18990Sstevel@tonic-gate if (pi->pi_group == phyint_anongroup) { 19000Sstevel@tonic-gate target_add(pii, nexthop, _B_TRUE); 19010Sstevel@tonic-gate } else { 19020Sstevel@tonic-gate pi = pi->pi_group->pg_phyint; 19030Sstevel@tonic-gate for (; pi != NULL; pi = pi->pi_pgnext) 19040Sstevel@tonic-gate target_add(PHYINT_INSTANCE(pi, af), nexthop, _B_TRUE); 19050Sstevel@tonic-gate } 19060Sstevel@tonic-gate } 19070Sstevel@tonic-gate 19080Sstevel@tonic-gate /* 19090Sstevel@tonic-gate * Examine the IPv6 routing table, for default routers. For each default 19100Sstevel@tonic-gate * router, populate the list of targets of each phyint that is on the same 19110Sstevel@tonic-gate * link as the default router 19120Sstevel@tonic-gate */ 19130Sstevel@tonic-gate static void 19140Sstevel@tonic-gate ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len) 19150Sstevel@tonic-gate { 19160Sstevel@tonic-gate mib2_ipv6RouteEntry_t *rp; 19170Sstevel@tonic-gate mib2_ipv6RouteEntry_t *endp; 19180Sstevel@tonic-gate struct in6_addr nexthop_v6; 19190Sstevel@tonic-gate 19200Sstevel@tonic-gate if (debug & D_TARGET) 19210Sstevel@tonic-gate logdebug("ire_process_v6(len %d)\n", len); 19220Sstevel@tonic-gate 19230Sstevel@tonic-gate if (len == 0) 19240Sstevel@tonic-gate return; 19250Sstevel@tonic-gate 19260Sstevel@tonic-gate assert((len % sizeof (mib2_ipv6RouteEntry_t)) == 0); 19270Sstevel@tonic-gate endp = buf + (len / sizeof (mib2_ipv6RouteEntry_t)); 19280Sstevel@tonic-gate 19290Sstevel@tonic-gate /* 19300Sstevel@tonic-gate * Loop thru the routing table entries. Process any IRE_DEFAULT, 19310Sstevel@tonic-gate * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others. 19320Sstevel@tonic-gate * For each such IRE_OFFSUBNET ire, get the nexthop gateway address. 19330Sstevel@tonic-gate * This is a potential target for probing, which we try to add 19340Sstevel@tonic-gate * to the list of probe targets. 19350Sstevel@tonic-gate */ 19360Sstevel@tonic-gate for (rp = buf; rp < endp; rp++) { 19370Sstevel@tonic-gate if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET)) 19380Sstevel@tonic-gate continue; 19390Sstevel@tonic-gate 19400Sstevel@tonic-gate /* 19410Sstevel@tonic-gate * We have the outgoing interface in ipv6RouteIfIndex 19420Sstevel@tonic-gate * if ipv6RouteIfindex.o_length is non-zero. The outgoing 19430Sstevel@tonic-gate * interface must be present for link-local addresses. Since 19440Sstevel@tonic-gate * we use only link-local addreses for probing, we don't 19450Sstevel@tonic-gate * consider the case when the outgoing interface is not 19460Sstevel@tonic-gate * known and we need to scan interface ires 19470Sstevel@tonic-gate */ 19480Sstevel@tonic-gate nexthop_v6 = rp->ipv6RouteNextHop; 19490Sstevel@tonic-gate if (rp->ipv6RouteIfIndex.o_length != 0) { 19500Sstevel@tonic-gate /* 19510Sstevel@tonic-gate * We already have the outgoing interface 19520Sstevel@tonic-gate * in ipv6RouteIfIndex. 19530Sstevel@tonic-gate */ 19540Sstevel@tonic-gate router_add_v6(rp, nexthop_v6); 19550Sstevel@tonic-gate } 19560Sstevel@tonic-gate } 19570Sstevel@tonic-gate } 19580Sstevel@tonic-gate 19590Sstevel@tonic-gate 19600Sstevel@tonic-gate void 19610Sstevel@tonic-gate router_add_v6(mib2_ipv6RouteEntry_t *rp1, struct in6_addr nexthop_v6) 19620Sstevel@tonic-gate { 19630Sstevel@tonic-gate char ifname[LIFNAMSIZ + 1]; 19640Sstevel@tonic-gate char *cp; 19650Sstevel@tonic-gate int len; 19660Sstevel@tonic-gate 19670Sstevel@tonic-gate if (debug & D_TARGET) 19680Sstevel@tonic-gate logdebug("router_add_v6()\n"); 19690Sstevel@tonic-gate 19700Sstevel@tonic-gate len = MIN(rp1->ipv6RouteIfIndex.o_length, sizeof (ifname) - 1); 19710Sstevel@tonic-gate (void) memcpy(ifname, rp1->ipv6RouteIfIndex.o_bytes, len); 19720Sstevel@tonic-gate ifname[len] = '\0'; 19730Sstevel@tonic-gate 19740Sstevel@tonic-gate if (ifname[0] == '\0') 19750Sstevel@tonic-gate return; 19760Sstevel@tonic-gate 19770Sstevel@tonic-gate cp = strchr(ifname, IF_SEPARATOR); 19780Sstevel@tonic-gate if (cp != NULL) 19790Sstevel@tonic-gate *cp = '\0'; 19800Sstevel@tonic-gate 19810Sstevel@tonic-gate router_add_common(AF_INET6, ifname, nexthop_v6); 19820Sstevel@tonic-gate } 19830Sstevel@tonic-gate 19840Sstevel@tonic-gate 19850Sstevel@tonic-gate 19860Sstevel@tonic-gate /* 19870Sstevel@tonic-gate * Build a list of target routers, by scanning the routing tables. 19880Sstevel@tonic-gate * It is assumed that interface routes exist, to reach the routers. 19890Sstevel@tonic-gate */ 19900Sstevel@tonic-gate static void 19910Sstevel@tonic-gate init_router_targets(void) 19920Sstevel@tonic-gate { 19930Sstevel@tonic-gate struct target *tg; 19940Sstevel@tonic-gate struct target *next_tg; 19950Sstevel@tonic-gate struct phyint_instance *pii; 19960Sstevel@tonic-gate struct phyint *pi; 19970Sstevel@tonic-gate 19980Sstevel@tonic-gate if (force_mcast) 19990Sstevel@tonic-gate return; 20000Sstevel@tonic-gate 20010Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 20020Sstevel@tonic-gate pi = pii->pii_phyint; 20030Sstevel@tonic-gate /* 20040Sstevel@tonic-gate * Exclude ptp and host targets. Set tg_in_use to false, 20050Sstevel@tonic-gate * only for router targets. 20060Sstevel@tonic-gate */ 20070Sstevel@tonic-gate if (!pii->pii_targets_are_routers || 20080Sstevel@tonic-gate (pi->pi_flags & IFF_POINTOPOINT)) 20090Sstevel@tonic-gate continue; 20100Sstevel@tonic-gate 20110Sstevel@tonic-gate for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) 20120Sstevel@tonic-gate tg->tg_in_use = 0; 20130Sstevel@tonic-gate } 20140Sstevel@tonic-gate 20150Sstevel@tonic-gate if (mibfd < 0) { 20160Sstevel@tonic-gate mibfd = open("/dev/ip", O_RDWR); 20170Sstevel@tonic-gate if (mibfd < 0) { 20180Sstevel@tonic-gate logperror("mibopen: ip open"); 20190Sstevel@tonic-gate exit(1); 20200Sstevel@tonic-gate } 20210Sstevel@tonic-gate } 20220Sstevel@tonic-gate 20230Sstevel@tonic-gate if (!update_router_list(mibfd)) { 20240Sstevel@tonic-gate (void) close(mibfd); 20250Sstevel@tonic-gate mibfd = -1; 20260Sstevel@tonic-gate } 20270Sstevel@tonic-gate 20280Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 20290Sstevel@tonic-gate if (!pii->pii_targets_are_routers || 20300Sstevel@tonic-gate (pi->pi_flags & IFF_POINTOPOINT)) 20310Sstevel@tonic-gate continue; 20320Sstevel@tonic-gate 20330Sstevel@tonic-gate for (tg = pii->pii_targets; tg != NULL; tg = next_tg) { 20340Sstevel@tonic-gate next_tg = tg->tg_next; 20350Sstevel@tonic-gate if (!tg->tg_in_use) { 20360Sstevel@tonic-gate target_delete(tg); 20370Sstevel@tonic-gate } 20380Sstevel@tonic-gate } 20390Sstevel@tonic-gate } 20400Sstevel@tonic-gate } 20410Sstevel@tonic-gate 20420Sstevel@tonic-gate /* 20430Sstevel@tonic-gate * Attempt to assign host targets to any interfaces that do not currently 20440Sstevel@tonic-gate * have probe targets by sharing targets with other interfaces in the group. 20450Sstevel@tonic-gate */ 20460Sstevel@tonic-gate static void 20470Sstevel@tonic-gate init_host_targets(void) 20480Sstevel@tonic-gate { 20490Sstevel@tonic-gate struct phyint_instance *pii; 20500Sstevel@tonic-gate struct phyint_group *pg; 20510Sstevel@tonic-gate 20520Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) { 20530Sstevel@tonic-gate pg = pii->pii_phyint->pi_group; 20540Sstevel@tonic-gate if (pg != phyint_anongroup && pii->pii_targets == NULL) 20550Sstevel@tonic-gate dup_host_targets(pii); 20560Sstevel@tonic-gate } 20570Sstevel@tonic-gate } 20580Sstevel@tonic-gate 20590Sstevel@tonic-gate /* 20600Sstevel@tonic-gate * Duplicate host targets from other phyints of the group to 20610Sstevel@tonic-gate * the phyint instance 'desired_pii'. 20620Sstevel@tonic-gate */ 20630Sstevel@tonic-gate static void 20640Sstevel@tonic-gate dup_host_targets(struct phyint_instance *desired_pii) 20650Sstevel@tonic-gate { 20660Sstevel@tonic-gate int af; 20670Sstevel@tonic-gate struct phyint *pi; 20680Sstevel@tonic-gate struct phyint_instance *pii; 20690Sstevel@tonic-gate struct target *tg; 20700Sstevel@tonic-gate 20710Sstevel@tonic-gate assert(desired_pii->pii_phyint->pi_group != phyint_anongroup); 20720Sstevel@tonic-gate 20730Sstevel@tonic-gate af = desired_pii->pii_af; 20740Sstevel@tonic-gate 20750Sstevel@tonic-gate /* 20760Sstevel@tonic-gate * For every phyint in the same group as desired_pii, check if 20770Sstevel@tonic-gate * it has any host targets. If so add them to desired_pii. 20780Sstevel@tonic-gate */ 20790Sstevel@tonic-gate for (pi = desired_pii->pii_phyint; pi != NULL; pi = pi->pi_pgnext) { 20800Sstevel@tonic-gate pii = PHYINT_INSTANCE(pi, af); 20810Sstevel@tonic-gate /* 20820Sstevel@tonic-gate * We know that we don't have targets on this phyint instance 20830Sstevel@tonic-gate * since we have been called. But we still check for 20840Sstevel@tonic-gate * pii_targets_are_routers because another phyint instance 20850Sstevel@tonic-gate * could have router targets, since IFF_NOFAILOVER addresses 20860Sstevel@tonic-gate * on different phyint instances may belong to different 20870Sstevel@tonic-gate * subnets. 20880Sstevel@tonic-gate */ 20890Sstevel@tonic-gate if ((pii == NULL) || (pii == desired_pii) || 20900Sstevel@tonic-gate pii->pii_targets_are_routers) 20910Sstevel@tonic-gate continue; 20920Sstevel@tonic-gate for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) { 20930Sstevel@tonic-gate target_create(desired_pii, tg->tg_address, _B_FALSE); 20940Sstevel@tonic-gate } 20950Sstevel@tonic-gate } 20960Sstevel@tonic-gate } 20970Sstevel@tonic-gate 20980Sstevel@tonic-gate static void 20990Sstevel@tonic-gate usage(char *cmd) 21000Sstevel@tonic-gate { 21010Sstevel@tonic-gate (void) fprintf(stderr, "usage: %s\n", cmd); 21020Sstevel@tonic-gate } 21030Sstevel@tonic-gate 21040Sstevel@tonic-gate 21050Sstevel@tonic-gate #define MPATHD_DEFAULT_FILE "/etc/default/mpathd" 21060Sstevel@tonic-gate 21070Sstevel@tonic-gate /* Get an option from the /etc/default/mpathd file */ 21080Sstevel@tonic-gate static char * 21090Sstevel@tonic-gate getdefault(char *name) 21100Sstevel@tonic-gate { 21110Sstevel@tonic-gate char namebuf[BUFSIZ]; 21120Sstevel@tonic-gate char *value = NULL; 21130Sstevel@tonic-gate 21140Sstevel@tonic-gate if (defopen(MPATHD_DEFAULT_FILE) == 0) { 21150Sstevel@tonic-gate char *cp; 21160Sstevel@tonic-gate int flags; 21170Sstevel@tonic-gate 21180Sstevel@tonic-gate /* 21190Sstevel@tonic-gate * ignore case 21200Sstevel@tonic-gate */ 21210Sstevel@tonic-gate flags = defcntl(DC_GETFLAGS, 0); 21220Sstevel@tonic-gate TURNOFF(flags, DC_CASE); 21230Sstevel@tonic-gate (void) defcntl(DC_SETFLAGS, flags); 21240Sstevel@tonic-gate 21250Sstevel@tonic-gate /* Add "=" to the name */ 21260Sstevel@tonic-gate (void) strncpy(namebuf, name, sizeof (namebuf) - 2); 21270Sstevel@tonic-gate (void) strncat(namebuf, "=", 2); 21280Sstevel@tonic-gate 21290Sstevel@tonic-gate if ((cp = defread(namebuf)) != NULL) 21300Sstevel@tonic-gate value = strdup(cp); 21310Sstevel@tonic-gate 21320Sstevel@tonic-gate /* close */ 21330Sstevel@tonic-gate (void) defopen((char *)NULL); 21340Sstevel@tonic-gate } 21350Sstevel@tonic-gate return (value); 21360Sstevel@tonic-gate } 21370Sstevel@tonic-gate 21380Sstevel@tonic-gate 21390Sstevel@tonic-gate /* 21400Sstevel@tonic-gate * Command line options below 21410Sstevel@tonic-gate */ 21420Sstevel@tonic-gate boolean_t failback_enabled = _B_TRUE; /* failback enabled/disabled */ 21430Sstevel@tonic-gate boolean_t track_all_phyints = _B_FALSE; /* option to track all NICs */ 21440Sstevel@tonic-gate static boolean_t adopt = _B_FALSE; 21450Sstevel@tonic-gate static boolean_t foreground = _B_FALSE; 21460Sstevel@tonic-gate 21470Sstevel@tonic-gate int 21480Sstevel@tonic-gate main(int argc, char *argv[]) 21490Sstevel@tonic-gate { 21500Sstevel@tonic-gate int i; 21510Sstevel@tonic-gate int c; 21520Sstevel@tonic-gate struct phyint_instance *pii; 21530Sstevel@tonic-gate char *value; 21540Sstevel@tonic-gate 21550Sstevel@tonic-gate argv0 = argv; /* Saved for re-exec on SIGHUP */ 21560Sstevel@tonic-gate srandom(gethostid()); /* Initialize the random number generator */ 21570Sstevel@tonic-gate 21580Sstevel@tonic-gate /* 21590Sstevel@tonic-gate * NOTE: The messages output by in.mpathd are not suitable for 21600Sstevel@tonic-gate * translation, so we do not call textdomain(). 21610Sstevel@tonic-gate */ 21620Sstevel@tonic-gate (void) setlocale(LC_ALL, ""); 21630Sstevel@tonic-gate 21640Sstevel@tonic-gate /* 21650Sstevel@tonic-gate * Get the user specified value of 'failure detection time' 21660Sstevel@tonic-gate * from /etc/default/mpathd 21670Sstevel@tonic-gate */ 21680Sstevel@tonic-gate value = getdefault("FAILURE_DETECTION_TIME"); 21690Sstevel@tonic-gate if (value != NULL) { 21700Sstevel@tonic-gate user_failure_detection_time = 21710Sstevel@tonic-gate (int)strtol((char *)value, NULL, 0); 21720Sstevel@tonic-gate 21730Sstevel@tonic-gate if (user_failure_detection_time <= 0) { 21740Sstevel@tonic-gate user_failure_detection_time = FAILURE_DETECTION_TIME; 21750Sstevel@tonic-gate logerr("Invalid failure detection time %s, assuming " 21760Sstevel@tonic-gate "default %d\n", value, user_failure_detection_time); 21770Sstevel@tonic-gate 21780Sstevel@tonic-gate } else if (user_failure_detection_time < 21790Sstevel@tonic-gate MIN_FAILURE_DETECTION_TIME) { 21800Sstevel@tonic-gate user_failure_detection_time = 21810Sstevel@tonic-gate MIN_FAILURE_DETECTION_TIME; 21820Sstevel@tonic-gate logerr("Too small failure detection time of %s, " 21830Sstevel@tonic-gate "assuming minimum %d\n", value, 21840Sstevel@tonic-gate user_failure_detection_time); 21850Sstevel@tonic-gate } 21860Sstevel@tonic-gate free(value); 21870Sstevel@tonic-gate } else { 21880Sstevel@tonic-gate /* User has not specified the parameter, Use default value */ 21890Sstevel@tonic-gate user_failure_detection_time = FAILURE_DETECTION_TIME; 21900Sstevel@tonic-gate } 21910Sstevel@tonic-gate 21920Sstevel@tonic-gate /* 21930Sstevel@tonic-gate * This gives the frequency at which probes will be sent. 21940Sstevel@tonic-gate * When fdt ms elapses, we should be able to determine 21950Sstevel@tonic-gate * whether 5 consecutive probes have failed or not. 21960Sstevel@tonic-gate * 1 probe will be sent in every user_probe_interval ms, 21970Sstevel@tonic-gate * randomly anytime in the (0.5 - 1.0) 2nd half of every 21980Sstevel@tonic-gate * user_probe_interval. Thus when we send out probe 'n' we 21990Sstevel@tonic-gate * can be sure that probe 'n - 2' is lost, if we have not 22000Sstevel@tonic-gate * got the ack. (since the probe interval is > crtt). But 22010Sstevel@tonic-gate * probe 'n - 1' may be a valid unacked probe, since the 22020Sstevel@tonic-gate * time between 2 successive probes could be as small as 22030Sstevel@tonic-gate * 0.5 * user_probe_interval. Hence the NUM_PROBE_FAILS + 2 22040Sstevel@tonic-gate */ 22050Sstevel@tonic-gate user_probe_interval = user_failure_detection_time / 22060Sstevel@tonic-gate (NUM_PROBE_FAILS + 2); 22070Sstevel@tonic-gate 22080Sstevel@tonic-gate /* 22090Sstevel@tonic-gate * Get the user specified value of failback_enabled from 22100Sstevel@tonic-gate * /etc/default/mpathd 22110Sstevel@tonic-gate */ 22120Sstevel@tonic-gate value = getdefault("FAILBACK"); 22130Sstevel@tonic-gate if (value != NULL) { 22140Sstevel@tonic-gate if (strncasecmp(value, "yes", 3) == 0) 22150Sstevel@tonic-gate failback_enabled = _B_TRUE; 22160Sstevel@tonic-gate else if (strncasecmp(value, "no", 2) == 0) 22170Sstevel@tonic-gate failback_enabled = _B_FALSE; 22180Sstevel@tonic-gate else 22190Sstevel@tonic-gate logerr("Invalid value for FAILBACK %s\n", value); 22200Sstevel@tonic-gate free(value); 22210Sstevel@tonic-gate } else { 22220Sstevel@tonic-gate failback_enabled = _B_TRUE; 22230Sstevel@tonic-gate } 22240Sstevel@tonic-gate 22250Sstevel@tonic-gate /* 22260Sstevel@tonic-gate * Get the user specified value of track_all_phyints from 22270Sstevel@tonic-gate * /etc/default/mpathd. The sense is reversed in 22280Sstevel@tonic-gate * TRACK_INTERFACES_ONLY_WITH_GROUPS. 22290Sstevel@tonic-gate */ 22300Sstevel@tonic-gate value = getdefault("TRACK_INTERFACES_ONLY_WITH_GROUPS"); 22310Sstevel@tonic-gate if (value != NULL) { 22320Sstevel@tonic-gate if (strncasecmp(value, "yes", 3) == 0) 22330Sstevel@tonic-gate track_all_phyints = _B_FALSE; 22340Sstevel@tonic-gate else if (strncasecmp(value, "no", 2) == 0) 22350Sstevel@tonic-gate track_all_phyints = _B_TRUE; 22360Sstevel@tonic-gate else 22370Sstevel@tonic-gate logerr("Invalid value for " 22380Sstevel@tonic-gate "TRACK_INTERFACES_ONLY_WITH_GROUPS %s\n", value); 22390Sstevel@tonic-gate free(value); 22400Sstevel@tonic-gate } else { 22410Sstevel@tonic-gate track_all_phyints = _B_FALSE; 22420Sstevel@tonic-gate } 22430Sstevel@tonic-gate 22440Sstevel@tonic-gate while ((c = getopt(argc, argv, "adD:ml")) != EOF) { 22450Sstevel@tonic-gate switch (c) { 22460Sstevel@tonic-gate case 'a': 22470Sstevel@tonic-gate adopt = _B_TRUE; 22480Sstevel@tonic-gate break; 22490Sstevel@tonic-gate case 'm': 22500Sstevel@tonic-gate force_mcast = _B_TRUE; 22510Sstevel@tonic-gate break; 22520Sstevel@tonic-gate case 'd': 22530Sstevel@tonic-gate debug = D_ALL; 22540Sstevel@tonic-gate foreground = _B_TRUE; 22550Sstevel@tonic-gate break; 22560Sstevel@tonic-gate case 'D': 22570Sstevel@tonic-gate i = (int)strtol(optarg, NULL, 0); 22580Sstevel@tonic-gate if (i == 0) { 22590Sstevel@tonic-gate (void) fprintf(stderr, "Bad debug flags: %s\n", 22600Sstevel@tonic-gate optarg); 22610Sstevel@tonic-gate exit(1); 22620Sstevel@tonic-gate } 22630Sstevel@tonic-gate debug |= i; 22640Sstevel@tonic-gate foreground = _B_TRUE; 22650Sstevel@tonic-gate break; 22660Sstevel@tonic-gate case 'l': 22670Sstevel@tonic-gate /* 22680Sstevel@tonic-gate * Turn off link state notification handling. 22690Sstevel@tonic-gate * Undocumented command line flag, for debugging 22700Sstevel@tonic-gate * purposes. 22710Sstevel@tonic-gate */ 22720Sstevel@tonic-gate handle_link_notifications = _B_FALSE; 22730Sstevel@tonic-gate break; 22740Sstevel@tonic-gate default: 22750Sstevel@tonic-gate usage(argv[0]); 22760Sstevel@tonic-gate exit(1); 22770Sstevel@tonic-gate } 22780Sstevel@tonic-gate } 22790Sstevel@tonic-gate 22800Sstevel@tonic-gate /* 22810Sstevel@tonic-gate * The sockets for the loopback command interface should be listening 22820Sstevel@tonic-gate * before we fork and exit in daemonize(). This way, whoever started us 22830Sstevel@tonic-gate * can use the loopback interface as soon as they get a zero exit 22840Sstevel@tonic-gate * status. 22850Sstevel@tonic-gate */ 22860Sstevel@tonic-gate lsock_v4 = setup_listener(AF_INET); 22870Sstevel@tonic-gate lsock_v6 = setup_listener(AF_INET6); 22880Sstevel@tonic-gate 22890Sstevel@tonic-gate if (lsock_v4 < 0 && lsock_v6 < 0) { 22900Sstevel@tonic-gate logerr("main: setup_listener failed for both IPv4 and IPv6\n"); 22910Sstevel@tonic-gate exit(1); 22920Sstevel@tonic-gate } 22930Sstevel@tonic-gate 22940Sstevel@tonic-gate if (!foreground) { 22950Sstevel@tonic-gate if (!daemonize()) { 22960Sstevel@tonic-gate logerr("cannot daemonize\n"); 22970Sstevel@tonic-gate exit(EXIT_FAILURE); 22980Sstevel@tonic-gate } 22990Sstevel@tonic-gate initlog(); 23000Sstevel@tonic-gate } 23010Sstevel@tonic-gate 23020Sstevel@tonic-gate /* 23030Sstevel@tonic-gate * Initializations: 23040Sstevel@tonic-gate * 1. Create ifsock* sockets. These are used for performing SIOC* 23050Sstevel@tonic-gate * ioctls. We have 2 sockets 1 each for IPv4 and IPv6. 23060Sstevel@tonic-gate * 2. Initialize a pipe for handling/recording signal events. 23070Sstevel@tonic-gate * 3. Create the routing sockets, used for listening 23080Sstevel@tonic-gate * to routing / interface changes. 23090Sstevel@tonic-gate * 4. phyint_init() - Initialize physical interface state 23100Sstevel@tonic-gate * (in mpd_tables.c). Must be done before creating interfaces, 23110Sstevel@tonic-gate * which timer_init() does indirectly. 23120Sstevel@tonic-gate * 5. timer_init() - Initialize timer related stuff 23130Sstevel@tonic-gate * 6. initifs() - Initialize our database of all known interfaces 23140Sstevel@tonic-gate * 7. init_router_targets() - Initialize our database of all known 23150Sstevel@tonic-gate * router targets. 23160Sstevel@tonic-gate */ 23170Sstevel@tonic-gate ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0); 23180Sstevel@tonic-gate if (ifsock_v4 < 0) { 23190Sstevel@tonic-gate logperror("main: IPv4 socket open"); 23200Sstevel@tonic-gate exit(1); 23210Sstevel@tonic-gate } 23220Sstevel@tonic-gate 23230Sstevel@tonic-gate ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0); 23240Sstevel@tonic-gate if (ifsock_v6 < 0) { 23250Sstevel@tonic-gate logperror("main: IPv6 socket open"); 23260Sstevel@tonic-gate exit(1); 23270Sstevel@tonic-gate } 23280Sstevel@tonic-gate 23290Sstevel@tonic-gate setup_eventpipe(); 23300Sstevel@tonic-gate 23310Sstevel@tonic-gate rtsock_v4 = setup_rtsock(AF_INET); 23320Sstevel@tonic-gate rtsock_v6 = setup_rtsock(AF_INET6); 23330Sstevel@tonic-gate 23340Sstevel@tonic-gate if (phyint_init() == -1) { 23350Sstevel@tonic-gate logerr("cannot initialize physical interface structures"); 23360Sstevel@tonic-gate exit(1); 23370Sstevel@tonic-gate } 23380Sstevel@tonic-gate 23390Sstevel@tonic-gate timer_init(); 23400Sstevel@tonic-gate 23410Sstevel@tonic-gate initifs(); 23420Sstevel@tonic-gate 2343704Sethindra /* Inform kernel whether failback is enabled or disabled */ 2344704Sethindra if (ioctl(ifsock_v4, SIOCSIPMPFAILBACK, (int *)&failback_enabled) < 0) { 2345704Sethindra logperror("main: ioctl (SIOCSIPMPFAILBACK)"); 2346704Sethindra exit(1); 2347704Sethindra } 2348704Sethindra 23490Sstevel@tonic-gate /* 23500Sstevel@tonic-gate * If we're operating in "adopt" mode and no interfaces need to be 23510Sstevel@tonic-gate * tracked, shut down (ifconfig(1M) will restart us on demand if 23520Sstevel@tonic-gate * interfaces are subsequently put into multipathing groups). 23530Sstevel@tonic-gate */ 23540Sstevel@tonic-gate if (adopt && phyint_instances == NULL) 23550Sstevel@tonic-gate exit(0); 23560Sstevel@tonic-gate 23570Sstevel@tonic-gate /* 23580Sstevel@tonic-gate * Main body. Keep listening for activity on any of the sockets 23590Sstevel@tonic-gate * that we are monitoring and take appropriate action as necessary. 23600Sstevel@tonic-gate * signals are also handled synchronously. 23610Sstevel@tonic-gate */ 23620Sstevel@tonic-gate for (;;) { 23630Sstevel@tonic-gate if (poll(pollfds, pollfd_num, -1) < 0) { 23640Sstevel@tonic-gate if (errno == EINTR) 23650Sstevel@tonic-gate continue; 23660Sstevel@tonic-gate logperror("main: poll"); 23670Sstevel@tonic-gate exit(1); 23680Sstevel@tonic-gate } 23690Sstevel@tonic-gate for (i = 0; i < pollfd_num; i++) { 23700Sstevel@tonic-gate if ((pollfds[i].fd == -1) || 23710Sstevel@tonic-gate !(pollfds[i].revents & POLLIN)) 23720Sstevel@tonic-gate continue; 23730Sstevel@tonic-gate if (pollfds[i].fd == eventpipe_read) { 23740Sstevel@tonic-gate in_signal(eventpipe_read); 23750Sstevel@tonic-gate break; 23760Sstevel@tonic-gate } 23770Sstevel@tonic-gate if (pollfds[i].fd == rtsock_v4 || 23780Sstevel@tonic-gate pollfds[i].fd == rtsock_v6) { 23790Sstevel@tonic-gate process_rtsock(rtsock_v4, rtsock_v6); 23800Sstevel@tonic-gate break; 23810Sstevel@tonic-gate } 23820Sstevel@tonic-gate for (pii = phyint_instances; pii != NULL; 23830Sstevel@tonic-gate pii = pii->pii_next) { 23840Sstevel@tonic-gate if (pollfds[i].fd == pii->pii_probe_sock) { 23850Sstevel@tonic-gate if (pii->pii_af == AF_INET) 23860Sstevel@tonic-gate in_data(pii); 23870Sstevel@tonic-gate else 23880Sstevel@tonic-gate in6_data(pii); 23890Sstevel@tonic-gate break; 23900Sstevel@tonic-gate } 23910Sstevel@tonic-gate } 23920Sstevel@tonic-gate if (pollfds[i].fd == lsock_v4) 23930Sstevel@tonic-gate loopback_cmd(lsock_v4, AF_INET); 23940Sstevel@tonic-gate else if (pollfds[i].fd == lsock_v6) 23950Sstevel@tonic-gate loopback_cmd(lsock_v6, AF_INET6); 23960Sstevel@tonic-gate } 23970Sstevel@tonic-gate if (full_scan_required) { 23980Sstevel@tonic-gate initifs(); 23990Sstevel@tonic-gate full_scan_required = _B_FALSE; 24000Sstevel@tonic-gate } 24010Sstevel@tonic-gate } 24020Sstevel@tonic-gate /* NOTREACHED */ 24030Sstevel@tonic-gate return (EXIT_SUCCESS); 24040Sstevel@tonic-gate } 24050Sstevel@tonic-gate 24060Sstevel@tonic-gate static int 24070Sstevel@tonic-gate setup_listener(int af) 24080Sstevel@tonic-gate { 24090Sstevel@tonic-gate int sock; 24100Sstevel@tonic-gate int on; 24110Sstevel@tonic-gate int len; 24120Sstevel@tonic-gate int ret; 24130Sstevel@tonic-gate struct sockaddr_storage laddr; 24140Sstevel@tonic-gate struct sockaddr_in *sin; 24150Sstevel@tonic-gate struct sockaddr_in6 *sin6; 24160Sstevel@tonic-gate struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; 24170Sstevel@tonic-gate 24180Sstevel@tonic-gate assert(af == AF_INET || af == AF_INET6); 24190Sstevel@tonic-gate 24200Sstevel@tonic-gate sock = socket(af, SOCK_STREAM, 0); 24210Sstevel@tonic-gate if (sock < 0) { 24220Sstevel@tonic-gate logperror("setup_listener: socket"); 24230Sstevel@tonic-gate exit(1); 24240Sstevel@tonic-gate } 24250Sstevel@tonic-gate 24260Sstevel@tonic-gate on = 1; 24270Sstevel@tonic-gate if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&on, 24280Sstevel@tonic-gate sizeof (on)) < 0) { 24290Sstevel@tonic-gate logperror("setup_listener: setsockopt (SO_REUSEADDR)"); 24300Sstevel@tonic-gate exit(1); 24310Sstevel@tonic-gate } 24320Sstevel@tonic-gate 24330Sstevel@tonic-gate bzero(&laddr, sizeof (laddr)); 24340Sstevel@tonic-gate laddr.ss_family = af; 24350Sstevel@tonic-gate 24360Sstevel@tonic-gate if (af == AF_INET) { 24370Sstevel@tonic-gate sin = (struct sockaddr_in *)&laddr; 24380Sstevel@tonic-gate sin->sin_port = htons(MPATHD_PORT); 24390Sstevel@tonic-gate sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 24400Sstevel@tonic-gate len = sizeof (struct sockaddr_in); 24410Sstevel@tonic-gate } else { 24420Sstevel@tonic-gate sin6 = (struct sockaddr_in6 *)&laddr; 24430Sstevel@tonic-gate sin6->sin6_port = htons(MPATHD_PORT); 24440Sstevel@tonic-gate sin6->sin6_addr = loopback_addr; 24450Sstevel@tonic-gate len = sizeof (struct sockaddr_in6); 24460Sstevel@tonic-gate } 24470Sstevel@tonic-gate 24480Sstevel@tonic-gate ret = bind(sock, (struct sockaddr *)&laddr, len); 24490Sstevel@tonic-gate if (ret < 0) { 24500Sstevel@tonic-gate if (errno == EADDRINUSE) { 24510Sstevel@tonic-gate /* 24520Sstevel@tonic-gate * Another instance of mpathd may be already active. 24530Sstevel@tonic-gate */ 24540Sstevel@tonic-gate logerr("main: is another instance of in.mpathd " 24550Sstevel@tonic-gate "already active?\n"); 24560Sstevel@tonic-gate exit(1); 24570Sstevel@tonic-gate } else { 24580Sstevel@tonic-gate (void) close(sock); 24590Sstevel@tonic-gate return (-1); 24600Sstevel@tonic-gate } 24610Sstevel@tonic-gate } 24620Sstevel@tonic-gate if (listen(sock, 30) < 0) { 24630Sstevel@tonic-gate logperror("main: listen"); 24640Sstevel@tonic-gate exit(1); 24650Sstevel@tonic-gate } 24660Sstevel@tonic-gate if (poll_add(sock) == -1) { 24670Sstevel@tonic-gate (void) close(sock); 24680Sstevel@tonic-gate exit(1); 24690Sstevel@tonic-gate } 24700Sstevel@tonic-gate 24710Sstevel@tonic-gate return (sock); 24720Sstevel@tonic-gate } 24730Sstevel@tonic-gate 24740Sstevel@tonic-gate /* 24750Sstevel@tonic-gate * Table of commands and their expected size; used by loopback_cmd(). 24760Sstevel@tonic-gate */ 24770Sstevel@tonic-gate static struct { 24780Sstevel@tonic-gate const char *name; 24790Sstevel@tonic-gate unsigned int size; 24800Sstevel@tonic-gate } commands[] = { 24810Sstevel@tonic-gate { "MI_PING", sizeof (uint32_t) }, 24820Sstevel@tonic-gate { "MI_OFFLINE", sizeof (mi_offline_t) }, 24830Sstevel@tonic-gate { "MI_UNDO_OFFLINE", sizeof (mi_undo_offline_t) }, 24840Sstevel@tonic-gate { "MI_SETOINDEX", sizeof (mi_setoindex_t) }, 24850Sstevel@tonic-gate { "MI_QUERY", sizeof (mi_query_t) } 24860Sstevel@tonic-gate }; 24870Sstevel@tonic-gate 24880Sstevel@tonic-gate /* 24890Sstevel@tonic-gate * Commands received over the loopback interface come here. Currently 24900Sstevel@tonic-gate * the agents that send commands are ifconfig, if_mpadm and the RCM IPMP 24910Sstevel@tonic-gate * module. ifconfig only makes a connection, and closes it to check if 24920Sstevel@tonic-gate * in.mpathd is running. 24930Sstevel@tonic-gate * if_mpadm sends commands in the format specified by the mpathd_interface 24940Sstevel@tonic-gate * structure. 24950Sstevel@tonic-gate */ 24960Sstevel@tonic-gate static void 24970Sstevel@tonic-gate loopback_cmd(int sock, int family) 24980Sstevel@tonic-gate { 24990Sstevel@tonic-gate int newfd; 25000Sstevel@tonic-gate ssize_t len; 25010Sstevel@tonic-gate struct sockaddr_storage peer; 25020Sstevel@tonic-gate struct sockaddr_in *peer_sin; 25030Sstevel@tonic-gate struct sockaddr_in6 *peer_sin6; 25040Sstevel@tonic-gate socklen_t peerlen; 25050Sstevel@tonic-gate union mi_commands mpi; 25060Sstevel@tonic-gate struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; 25070Sstevel@tonic-gate char abuf[INET6_ADDRSTRLEN]; 25080Sstevel@tonic-gate uint_t cmd; 25090Sstevel@tonic-gate int retval; 25100Sstevel@tonic-gate 25110Sstevel@tonic-gate peerlen = sizeof (peer); 25120Sstevel@tonic-gate newfd = accept(sock, (struct sockaddr *)&peer, &peerlen); 25130Sstevel@tonic-gate if (newfd < 0) { 25140Sstevel@tonic-gate logperror("loopback_cmd: accept"); 25150Sstevel@tonic-gate return; 25160Sstevel@tonic-gate } 25170Sstevel@tonic-gate 25180Sstevel@tonic-gate switch (family) { 25190Sstevel@tonic-gate case AF_INET: 25200Sstevel@tonic-gate /* 25210Sstevel@tonic-gate * Validate the address and port to make sure that 25220Sstevel@tonic-gate * non privileged processes don't connect and start 25230Sstevel@tonic-gate * talking to us. 25240Sstevel@tonic-gate */ 25250Sstevel@tonic-gate if (peerlen != sizeof (struct sockaddr_in)) { 25260Sstevel@tonic-gate logerr("loopback_cmd: AF_INET peerlen %d\n", peerlen); 25270Sstevel@tonic-gate (void) close(newfd); 25280Sstevel@tonic-gate return; 25290Sstevel@tonic-gate } 25300Sstevel@tonic-gate peer_sin = (struct sockaddr_in *)&peer; 25310Sstevel@tonic-gate if ((ntohs(peer_sin->sin_port) >= IPPORT_RESERVED) || 25320Sstevel@tonic-gate (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK)) { 25330Sstevel@tonic-gate (void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr, 25340Sstevel@tonic-gate abuf, sizeof (abuf)); 25350Sstevel@tonic-gate logerr("Attempt to connect from addr %s port %d\n", 25360Sstevel@tonic-gate abuf, ntohs(peer_sin->sin_port)); 25370Sstevel@tonic-gate (void) close(newfd); 25380Sstevel@tonic-gate return; 25390Sstevel@tonic-gate } 25400Sstevel@tonic-gate break; 25410Sstevel@tonic-gate 25420Sstevel@tonic-gate case AF_INET6: 25430Sstevel@tonic-gate if (peerlen != sizeof (struct sockaddr_in6)) { 25440Sstevel@tonic-gate logerr("loopback_cmd: AF_INET6 peerlen %d\n", peerlen); 25450Sstevel@tonic-gate (void) close(newfd); 25460Sstevel@tonic-gate return; 25470Sstevel@tonic-gate } 25480Sstevel@tonic-gate /* 25490Sstevel@tonic-gate * Validate the address and port to make sure that 25500Sstevel@tonic-gate * non privileged processes don't connect and start 25510Sstevel@tonic-gate * talking to us. 25520Sstevel@tonic-gate */ 25530Sstevel@tonic-gate peer_sin6 = (struct sockaddr_in6 *)&peer; 25540Sstevel@tonic-gate if ((ntohs(peer_sin6->sin6_port) >= IPPORT_RESERVED) || 25550Sstevel@tonic-gate (!IN6_ARE_ADDR_EQUAL(&peer_sin6->sin6_addr, 25560Sstevel@tonic-gate &loopback_addr))) { 25570Sstevel@tonic-gate (void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf, 25580Sstevel@tonic-gate sizeof (abuf)); 25590Sstevel@tonic-gate logerr("Attempt to connect from addr %s port %d\n", 25600Sstevel@tonic-gate abuf, ntohs(peer_sin6->sin6_port)); 25610Sstevel@tonic-gate (void) close(newfd); 25620Sstevel@tonic-gate return; 25630Sstevel@tonic-gate } 25640Sstevel@tonic-gate 25650Sstevel@tonic-gate default: 25660Sstevel@tonic-gate logdebug("loopback_cmd: family %d\n", family); 25670Sstevel@tonic-gate (void) close(newfd); 25680Sstevel@tonic-gate return; 25690Sstevel@tonic-gate } 25700Sstevel@tonic-gate 25710Sstevel@tonic-gate /* 25720Sstevel@tonic-gate * The sizeof the 'mpi' buffer corresponds to the maximum size of 25730Sstevel@tonic-gate * all supported commands 25740Sstevel@tonic-gate */ 25750Sstevel@tonic-gate len = read(newfd, &mpi, sizeof (mpi)); 25760Sstevel@tonic-gate 25770Sstevel@tonic-gate /* 25780Sstevel@tonic-gate * ifconfig does not send any data. Just tests to see if mpathd 25790Sstevel@tonic-gate * is already running. 25800Sstevel@tonic-gate */ 25810Sstevel@tonic-gate if (len <= 0) { 25820Sstevel@tonic-gate (void) close(newfd); 25830Sstevel@tonic-gate return; 25840Sstevel@tonic-gate } 25850Sstevel@tonic-gate 25860Sstevel@tonic-gate /* 25870Sstevel@tonic-gate * In theory, we can receive any sized message for a stream socket, 25880Sstevel@tonic-gate * but we don't expect that to happen for a small message over a 25890Sstevel@tonic-gate * loopback connection. 25900Sstevel@tonic-gate */ 25910Sstevel@tonic-gate if (len < sizeof (uint32_t)) { 25920Sstevel@tonic-gate logerr("loopback_cmd: bad command format or read returns " 25930Sstevel@tonic-gate "partial data %d\n", len); 25940Sstevel@tonic-gate } 25950Sstevel@tonic-gate 25960Sstevel@tonic-gate cmd = mpi.mi_command; 25970Sstevel@tonic-gate if (cmd >= MI_NCMD) { 25980Sstevel@tonic-gate logerr("loopback_cmd: unknown command id `%d'\n", cmd); 25990Sstevel@tonic-gate (void) close(newfd); 26000Sstevel@tonic-gate return; 26010Sstevel@tonic-gate } 26020Sstevel@tonic-gate 26030Sstevel@tonic-gate if (len < commands[cmd].size) { 26040Sstevel@tonic-gate logerr("loopback_cmd: short %s command (expected %d, got %d)\n", 26050Sstevel@tonic-gate commands[cmd].name, commands[cmd].size, len); 26060Sstevel@tonic-gate (void) close(newfd); 26070Sstevel@tonic-gate return; 26080Sstevel@tonic-gate } 26090Sstevel@tonic-gate 26100Sstevel@tonic-gate retval = process_cmd(newfd, &mpi); 26110Sstevel@tonic-gate if (retval != IPMP_SUCCESS) { 26120Sstevel@tonic-gate logerr("failed processing %s: %s\n", commands[cmd].name, 26130Sstevel@tonic-gate ipmp_errmsg(retval)); 26140Sstevel@tonic-gate } 26150Sstevel@tonic-gate (void) close(newfd); 26160Sstevel@tonic-gate } 26170Sstevel@tonic-gate 26180Sstevel@tonic-gate extern int global_errno; /* set by failover() or failback() */ 26190Sstevel@tonic-gate 26200Sstevel@tonic-gate /* 26210Sstevel@tonic-gate * Process the offline, undo offline and set original index commands, 26220Sstevel@tonic-gate * received from if_mpadm(1M) 26230Sstevel@tonic-gate */ 26240Sstevel@tonic-gate static unsigned int 26250Sstevel@tonic-gate process_cmd(int newfd, union mi_commands *mpi) 26260Sstevel@tonic-gate { 26270Sstevel@tonic-gate uint_t nif = 0; 26280Sstevel@tonic-gate uint32_t cmd; 26290Sstevel@tonic-gate struct phyint *pi; 26300Sstevel@tonic-gate struct phyint *pi2; 26310Sstevel@tonic-gate struct phyint_group *pg; 26320Sstevel@tonic-gate boolean_t success; 26330Sstevel@tonic-gate int error; 26340Sstevel@tonic-gate struct mi_offline *mio; 26350Sstevel@tonic-gate struct mi_undo_offline *miu; 26360Sstevel@tonic-gate struct lifreq lifr; 26370Sstevel@tonic-gate int ifsock; 26380Sstevel@tonic-gate struct mi_setoindex *mis; 26390Sstevel@tonic-gate 26400Sstevel@tonic-gate cmd = mpi->mi_command; 26410Sstevel@tonic-gate 26420Sstevel@tonic-gate switch (cmd) { 26430Sstevel@tonic-gate case MI_OFFLINE: 26440Sstevel@tonic-gate mio = &mpi->mi_ocmd; 26450Sstevel@tonic-gate /* 26460Sstevel@tonic-gate * Lookup the interface that needs to be offlined. 26470Sstevel@tonic-gate * If it does not exist, return a suitable error. 26480Sstevel@tonic-gate */ 26490Sstevel@tonic-gate pi = phyint_lookup(mio->mio_ifname); 26500Sstevel@tonic-gate if (pi == NULL) 26510Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, EINVAL)); 26520Sstevel@tonic-gate 26530Sstevel@tonic-gate /* 26540Sstevel@tonic-gate * Verify that the minimum redundancy requirements are met. 26550Sstevel@tonic-gate * The multipathing group must have at least the specified 26560Sstevel@tonic-gate * number of functional interfaces after offlining the 26570Sstevel@tonic-gate * requested interface. Otherwise return a suitable error. 26580Sstevel@tonic-gate */ 26590Sstevel@tonic-gate pg = pi->pi_group; 26600Sstevel@tonic-gate nif = 0; 26610Sstevel@tonic-gate if (pg != phyint_anongroup) { 26620Sstevel@tonic-gate for (nif = 0, pi2 = pg->pg_phyint; pi2 != NULL; 26630Sstevel@tonic-gate pi2 = pi2->pi_pgnext) { 26640Sstevel@tonic-gate if ((pi2->pi_state == PI_RUNNING) || 26650Sstevel@tonic-gate (pg->pg_groupfailed && 26660Sstevel@tonic-gate !(pi2->pi_flags & IFF_OFFLINE))) 26670Sstevel@tonic-gate nif++; 26680Sstevel@tonic-gate } 26690Sstevel@tonic-gate } 26700Sstevel@tonic-gate if (nif < mio->mio_min_redundancy) 26710Sstevel@tonic-gate return (send_result(newfd, IPMP_EMINRED, 0)); 26720Sstevel@tonic-gate 26730Sstevel@tonic-gate /* 26740Sstevel@tonic-gate * The order of operation is to set IFF_OFFLINE, followed by 26750Sstevel@tonic-gate * failover. Setting IFF_OFFLINE ensures that no new ipif's 26760Sstevel@tonic-gate * can be created. Subsequent failover moves everything on 26770Sstevel@tonic-gate * the OFFLINE interface to some other functional interface. 26780Sstevel@tonic-gate */ 26790Sstevel@tonic-gate success = change_lif_flags(pi, IFF_OFFLINE, _B_TRUE); 26800Sstevel@tonic-gate if (success) { 26810Sstevel@tonic-gate if (!pi->pi_empty) { 26820Sstevel@tonic-gate error = try_failover(pi, FAILOVER_NORMAL); 26830Sstevel@tonic-gate if (error != 0) { 26840Sstevel@tonic-gate if (!change_lif_flags(pi, IFF_OFFLINE, 26850Sstevel@tonic-gate _B_FALSE)) { 26860Sstevel@tonic-gate logerr("process_cmd: couldn't" 26870Sstevel@tonic-gate " clear OFFLINE flag on" 26880Sstevel@tonic-gate " %s\n", pi->pi_name); 26890Sstevel@tonic-gate /* 26900Sstevel@tonic-gate * Offline interfaces should 26910Sstevel@tonic-gate * not be probed. 26920Sstevel@tonic-gate */ 26930Sstevel@tonic-gate stop_probing(pi); 26940Sstevel@tonic-gate } 26950Sstevel@tonic-gate return (send_result(newfd, error, 26960Sstevel@tonic-gate global_errno)); 26970Sstevel@tonic-gate } 26980Sstevel@tonic-gate } 26990Sstevel@tonic-gate } else { 27000Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, errno)); 27010Sstevel@tonic-gate } 27020Sstevel@tonic-gate 27030Sstevel@tonic-gate /* 27040Sstevel@tonic-gate * The interface is now Offline, so stop probing it. 27050Sstevel@tonic-gate * Note that if_mpadm(1M) will down the test addresses, 27060Sstevel@tonic-gate * after receiving a success reply from us. The routing 27070Sstevel@tonic-gate * socket message will then make us close the socket used 27080Sstevel@tonic-gate * for sending probes. But it is more logical that an 27090Sstevel@tonic-gate * offlined interface must not be probed, even if it has 27100Sstevel@tonic-gate * test addresses. 27110Sstevel@tonic-gate */ 27120Sstevel@tonic-gate stop_probing(pi); 27130Sstevel@tonic-gate return (send_result(newfd, IPMP_SUCCESS, 0)); 27140Sstevel@tonic-gate 27150Sstevel@tonic-gate case MI_UNDO_OFFLINE: 27160Sstevel@tonic-gate miu = &mpi->mi_ucmd; 27170Sstevel@tonic-gate /* 27180Sstevel@tonic-gate * Undo the offline command. As usual lookup the interface. 27190Sstevel@tonic-gate * Send an error if it does not exist. 27200Sstevel@tonic-gate */ 27210Sstevel@tonic-gate pi = phyint_lookup(miu->miu_ifname); 27220Sstevel@tonic-gate if (pi == NULL) 27230Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, EINVAL)); 27240Sstevel@tonic-gate 27250Sstevel@tonic-gate /* 27260Sstevel@tonic-gate * Inverse of the offline operation. Do a failback, and then 27270Sstevel@tonic-gate * clear the IFF_OFFLINE flag. 27280Sstevel@tonic-gate */ 27290Sstevel@tonic-gate error = do_failback(pi, _B_TRUE); 27300Sstevel@tonic-gate if (error == IPMP_EFBPARTIAL) 27310Sstevel@tonic-gate return (send_result(newfd, IPMP_EFBPARTIAL, 0)); 27320Sstevel@tonic-gate error = do_failback(pi, _B_FALSE); 27330Sstevel@tonic-gate 27340Sstevel@tonic-gate switch (error) { 27350Sstevel@tonic-gate case IPMP_SUCCESS: 27360Sstevel@tonic-gate if (!change_lif_flags(pi, IFF_OFFLINE, _B_FALSE)) { 27370Sstevel@tonic-gate logdebug("undo error %X\n", global_errno); 27380Sstevel@tonic-gate error = IPMP_FAILURE; 27390Sstevel@tonic-gate break; 27400Sstevel@tonic-gate } 27410Sstevel@tonic-gate /* FALLTHROUGH */ 27420Sstevel@tonic-gate 27430Sstevel@tonic-gate case IPMP_EFBPARTIAL: 27440Sstevel@tonic-gate /* 27450Sstevel@tonic-gate * Reset the state of the interface based on the 27460Sstevel@tonic-gate * current link state; if this phyint subsequently 27470Sstevel@tonic-gate * acquires a test address, the state will be changed 27480Sstevel@tonic-gate * again later as a result of the probes. 27490Sstevel@tonic-gate */ 27500Sstevel@tonic-gate if (LINK_UP(pi)) 27510Sstevel@tonic-gate phyint_chstate(pi, PI_RUNNING); 27520Sstevel@tonic-gate else 27530Sstevel@tonic-gate phyint_chstate(pi, PI_FAILED); 27540Sstevel@tonic-gate break; 27550Sstevel@tonic-gate 27560Sstevel@tonic-gate case IPMP_FAILURE: 27570Sstevel@tonic-gate break; 27580Sstevel@tonic-gate 27590Sstevel@tonic-gate default: 27600Sstevel@tonic-gate logdebug("do_failback: unexpected return value\n"); 27610Sstevel@tonic-gate break; 27620Sstevel@tonic-gate } 27630Sstevel@tonic-gate return (send_result(newfd, error, global_errno)); 27640Sstevel@tonic-gate 27650Sstevel@tonic-gate case MI_SETOINDEX: 27660Sstevel@tonic-gate mis = &mpi->mi_scmd; 27670Sstevel@tonic-gate 27680Sstevel@tonic-gate /* Get the socket for doing ioctls */ 27690Sstevel@tonic-gate ifsock = (mis->mis_iftype == AF_INET) ? ifsock_v4 : ifsock_v6; 27700Sstevel@tonic-gate 27710Sstevel@tonic-gate /* 27720Sstevel@tonic-gate * Get index of new original interface. 27730Sstevel@tonic-gate * The index is returned in lifr.lifr_index. 27740Sstevel@tonic-gate */ 27750Sstevel@tonic-gate (void) strlcpy(lifr.lifr_name, mis->mis_new_pifname, 27760Sstevel@tonic-gate sizeof (lifr.lifr_name)); 27770Sstevel@tonic-gate 27780Sstevel@tonic-gate if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) 27790Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, errno)); 27800Sstevel@tonic-gate 27810Sstevel@tonic-gate /* 27820Sstevel@tonic-gate * Set new original interface index. 27830Sstevel@tonic-gate * The new index was put into lifr.lifr_index by the 27840Sstevel@tonic-gate * SIOCGLIFINDEX ioctl. 27850Sstevel@tonic-gate */ 27860Sstevel@tonic-gate (void) strlcpy(lifr.lifr_name, mis->mis_lifname, 27870Sstevel@tonic-gate sizeof (lifr.lifr_name)); 27880Sstevel@tonic-gate 27890Sstevel@tonic-gate if (ioctl(ifsock, SIOCSLIFOINDEX, (char *)&lifr) < 0) 27900Sstevel@tonic-gate return (send_result(newfd, IPMP_FAILURE, errno)); 27910Sstevel@tonic-gate 27920Sstevel@tonic-gate return (send_result(newfd, IPMP_SUCCESS, 0)); 27930Sstevel@tonic-gate 27940Sstevel@tonic-gate case MI_QUERY: 27950Sstevel@tonic-gate return (process_query(newfd, &mpi->mi_qcmd)); 27960Sstevel@tonic-gate 27970Sstevel@tonic-gate default: 27980Sstevel@tonic-gate break; 27990Sstevel@tonic-gate } 28000Sstevel@tonic-gate 28010Sstevel@tonic-gate return (send_result(newfd, IPMP_EPROTO, 0)); 28020Sstevel@tonic-gate } 28030Sstevel@tonic-gate 28040Sstevel@tonic-gate /* 28050Sstevel@tonic-gate * Process the query request pointed to by `miq' and send a reply on file 28060Sstevel@tonic-gate * descriptor `fd'. Returns an IPMP error code. 28070Sstevel@tonic-gate */ 28080Sstevel@tonic-gate static unsigned int 28090Sstevel@tonic-gate process_query(int fd, mi_query_t *miq) 28100Sstevel@tonic-gate { 28110Sstevel@tonic-gate ipmp_groupinfo_t *grinfop; 28120Sstevel@tonic-gate ipmp_groupinfolist_t *grlp; 28130Sstevel@tonic-gate ipmp_grouplist_t *grlistp; 28140Sstevel@tonic-gate ipmp_ifinfo_t *ifinfop; 28150Sstevel@tonic-gate ipmp_ifinfolist_t *iflp; 28160Sstevel@tonic-gate ipmp_snap_t *snap; 28170Sstevel@tonic-gate unsigned int retval; 28180Sstevel@tonic-gate 28190Sstevel@tonic-gate switch (miq->miq_inforeq) { 28200Sstevel@tonic-gate case IPMP_GROUPLIST: 28210Sstevel@tonic-gate retval = getgrouplist(&grlistp); 28220Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 28230Sstevel@tonic-gate return (send_result(fd, retval, errno)); 28240Sstevel@tonic-gate 28250Sstevel@tonic-gate retval = send_result(fd, IPMP_SUCCESS, 0); 28260Sstevel@tonic-gate if (retval == IPMP_SUCCESS) 28270Sstevel@tonic-gate retval = send_grouplist(fd, grlistp); 28280Sstevel@tonic-gate 28290Sstevel@tonic-gate ipmp_freegrouplist(grlistp); 28300Sstevel@tonic-gate return (retval); 28310Sstevel@tonic-gate 28320Sstevel@tonic-gate case IPMP_GROUPINFO: 28330Sstevel@tonic-gate miq->miq_grname[LIFGRNAMSIZ - 1] = '\0'; 28340Sstevel@tonic-gate retval = getgroupinfo(miq->miq_ifname, &grinfop); 28350Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 28360Sstevel@tonic-gate return (send_result(fd, retval, errno)); 28370Sstevel@tonic-gate 28380Sstevel@tonic-gate retval = send_result(fd, IPMP_SUCCESS, 0); 28390Sstevel@tonic-gate if (retval == IPMP_SUCCESS) 28400Sstevel@tonic-gate retval = send_groupinfo(fd, grinfop); 28410Sstevel@tonic-gate 28420Sstevel@tonic-gate ipmp_freegroupinfo(grinfop); 28430Sstevel@tonic-gate return (retval); 28440Sstevel@tonic-gate 28450Sstevel@tonic-gate case IPMP_IFINFO: 28460Sstevel@tonic-gate miq->miq_ifname[LIFNAMSIZ - 1] = '\0'; 28470Sstevel@tonic-gate retval = getifinfo(miq->miq_ifname, &ifinfop); 28480Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 28490Sstevel@tonic-gate return (send_result(fd, retval, errno)); 28500Sstevel@tonic-gate 28510Sstevel@tonic-gate retval = send_result(fd, IPMP_SUCCESS, 0); 28520Sstevel@tonic-gate if (retval == IPMP_SUCCESS) 28530Sstevel@tonic-gate retval = send_ifinfo(fd, ifinfop); 28540Sstevel@tonic-gate 28550Sstevel@tonic-gate ipmp_freeifinfo(ifinfop); 28560Sstevel@tonic-gate return (retval); 28570Sstevel@tonic-gate 28580Sstevel@tonic-gate case IPMP_SNAP: 28590Sstevel@tonic-gate retval = getsnap(&snap); 28600Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 28610Sstevel@tonic-gate return (send_result(fd, retval, errno)); 28620Sstevel@tonic-gate 28630Sstevel@tonic-gate retval = send_result(fd, IPMP_SUCCESS, 0); 28640Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 28650Sstevel@tonic-gate goto out; 28660Sstevel@tonic-gate 28670Sstevel@tonic-gate retval = ipmp_writetlv(fd, IPMP_SNAP, sizeof (*snap), snap); 28680Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 28690Sstevel@tonic-gate goto out; 28700Sstevel@tonic-gate 28710Sstevel@tonic-gate retval = send_grouplist(fd, snap->sn_grlistp); 28720Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 28730Sstevel@tonic-gate goto out; 28740Sstevel@tonic-gate 28750Sstevel@tonic-gate iflp = snap->sn_ifinfolistp; 28760Sstevel@tonic-gate for (; iflp != NULL; iflp = iflp->ifl_next) { 28770Sstevel@tonic-gate retval = send_ifinfo(fd, iflp->ifl_ifinfop); 28780Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 28790Sstevel@tonic-gate goto out; 28800Sstevel@tonic-gate } 28810Sstevel@tonic-gate 28820Sstevel@tonic-gate grlp = snap->sn_grinfolistp; 28830Sstevel@tonic-gate for (; grlp != NULL; grlp = grlp->grl_next) { 28840Sstevel@tonic-gate retval = send_groupinfo(fd, grlp->grl_grinfop); 28850Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 28860Sstevel@tonic-gate goto out; 28870Sstevel@tonic-gate } 28880Sstevel@tonic-gate out: 28890Sstevel@tonic-gate ipmp_snap_free(snap); 28900Sstevel@tonic-gate return (retval); 28910Sstevel@tonic-gate 28920Sstevel@tonic-gate default: 28930Sstevel@tonic-gate break; 28940Sstevel@tonic-gate 28950Sstevel@tonic-gate } 28960Sstevel@tonic-gate return (send_result(fd, IPMP_EPROTO, 0)); 28970Sstevel@tonic-gate } 28980Sstevel@tonic-gate 28990Sstevel@tonic-gate /* 29000Sstevel@tonic-gate * Send the group information pointed to by `grinfop' on file descriptor `fd'. 29010Sstevel@tonic-gate * Returns an IPMP error code. 29020Sstevel@tonic-gate */ 29030Sstevel@tonic-gate static unsigned int 29040Sstevel@tonic-gate send_groupinfo(int fd, ipmp_groupinfo_t *grinfop) 29050Sstevel@tonic-gate { 29060Sstevel@tonic-gate ipmp_iflist_t *iflistp = grinfop->gr_iflistp; 29070Sstevel@tonic-gate unsigned int retval; 29080Sstevel@tonic-gate 29090Sstevel@tonic-gate retval = ipmp_writetlv(fd, IPMP_GROUPINFO, sizeof (*grinfop), grinfop); 29100Sstevel@tonic-gate if (retval != IPMP_SUCCESS) 29110Sstevel@tonic-gate return (retval); 29120Sstevel@tonic-gate 29130Sstevel@tonic-gate return (ipmp_writetlv(fd, IPMP_IFLIST, 29140Sstevel@tonic-gate IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp)); 29150Sstevel@tonic-gate } 29160Sstevel@tonic-gate 29170Sstevel@tonic-gate /* 29180Sstevel@tonic-gate * Send the interface information pointed to by `ifinfop' on file descriptor 29190Sstevel@tonic-gate * `fd'. Returns an IPMP error code. 29200Sstevel@tonic-gate */ 29210Sstevel@tonic-gate static unsigned int 29220Sstevel@tonic-gate send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop) 29230Sstevel@tonic-gate { 29240Sstevel@tonic-gate return (ipmp_writetlv(fd, IPMP_IFINFO, sizeof (*ifinfop), ifinfop)); 29250Sstevel@tonic-gate } 29260Sstevel@tonic-gate 29270Sstevel@tonic-gate /* 29280Sstevel@tonic-gate * Send the group list pointed to by `grlistp' on file descriptor `fd'. 29290Sstevel@tonic-gate * Returns an IPMP error code. 29300Sstevel@tonic-gate */ 29310Sstevel@tonic-gate static unsigned int 29320Sstevel@tonic-gate send_grouplist(int fd, ipmp_grouplist_t *grlistp) 29330Sstevel@tonic-gate { 29340Sstevel@tonic-gate return (ipmp_writetlv(fd, IPMP_GROUPLIST, 29350Sstevel@tonic-gate IPMP_GROUPLIST_SIZE(grlistp->gl_ngroup), grlistp)); 29360Sstevel@tonic-gate } 29370Sstevel@tonic-gate 29380Sstevel@tonic-gate /* 29390Sstevel@tonic-gate * Initialize an mi_result_t structure using `error' and `syserror' and 29400Sstevel@tonic-gate * send it on file descriptor `fd'. Returns an IPMP error code. 29410Sstevel@tonic-gate */ 29420Sstevel@tonic-gate static unsigned int 29430Sstevel@tonic-gate send_result(int fd, unsigned int error, int syserror) 29440Sstevel@tonic-gate { 29450Sstevel@tonic-gate mi_result_t me; 29460Sstevel@tonic-gate 29470Sstevel@tonic-gate me.me_mpathd_error = error; 29480Sstevel@tonic-gate if (error == IPMP_FAILURE) 29490Sstevel@tonic-gate me.me_sys_error = syserror; 29500Sstevel@tonic-gate else 29510Sstevel@tonic-gate me.me_sys_error = 0; 29520Sstevel@tonic-gate 29530Sstevel@tonic-gate return (ipmp_write(fd, &me, sizeof (me))); 29540Sstevel@tonic-gate } 29550Sstevel@tonic-gate 29560Sstevel@tonic-gate /* 29570Sstevel@tonic-gate * Daemonize the process. 29580Sstevel@tonic-gate */ 29590Sstevel@tonic-gate static boolean_t 29600Sstevel@tonic-gate daemonize(void) 29610Sstevel@tonic-gate { 29620Sstevel@tonic-gate switch (fork()) { 29630Sstevel@tonic-gate case -1: 29640Sstevel@tonic-gate return (_B_FALSE); 29650Sstevel@tonic-gate 29660Sstevel@tonic-gate case 0: 29670Sstevel@tonic-gate /* 29680Sstevel@tonic-gate * Lose our controlling terminal, and become both a session 29690Sstevel@tonic-gate * leader and a process group leader. 29700Sstevel@tonic-gate */ 29710Sstevel@tonic-gate if (setsid() == -1) 29720Sstevel@tonic-gate return (_B_FALSE); 29730Sstevel@tonic-gate 29740Sstevel@tonic-gate /* 29750Sstevel@tonic-gate * Under POSIX, a session leader can accidentally (through 29760Sstevel@tonic-gate * open(2)) acquire a controlling terminal if it does not 29770Sstevel@tonic-gate * have one. Just to be safe, fork() again so we are not a 29780Sstevel@tonic-gate * session leader. 29790Sstevel@tonic-gate */ 29800Sstevel@tonic-gate switch (fork()) { 29810Sstevel@tonic-gate case -1: 29820Sstevel@tonic-gate return (_B_FALSE); 29830Sstevel@tonic-gate 29840Sstevel@tonic-gate case 0: 29850Sstevel@tonic-gate (void) chdir("/"); 29860Sstevel@tonic-gate (void) umask(022); 29870Sstevel@tonic-gate (void) fdwalk(closefunc, NULL); 29880Sstevel@tonic-gate break; 29890Sstevel@tonic-gate 29900Sstevel@tonic-gate default: 29910Sstevel@tonic-gate _exit(EXIT_SUCCESS); 29920Sstevel@tonic-gate } 29930Sstevel@tonic-gate break; 29940Sstevel@tonic-gate 29950Sstevel@tonic-gate default: 29960Sstevel@tonic-gate _exit(EXIT_SUCCESS); 29970Sstevel@tonic-gate } 29980Sstevel@tonic-gate 29990Sstevel@tonic-gate return (_B_TRUE); 30000Sstevel@tonic-gate } 30010Sstevel@tonic-gate 30020Sstevel@tonic-gate /* 30030Sstevel@tonic-gate * The parent has created some fds before forking on purpose, keep them open. 30040Sstevel@tonic-gate */ 30050Sstevel@tonic-gate static int 30060Sstevel@tonic-gate closefunc(void *not_used, int fd) 30070Sstevel@tonic-gate /* ARGSUSED */ 30080Sstevel@tonic-gate { 30090Sstevel@tonic-gate if (fd != lsock_v4 && fd != lsock_v6) 30100Sstevel@tonic-gate (void) close(fd); 30110Sstevel@tonic-gate return (0); 30120Sstevel@tonic-gate } 30130Sstevel@tonic-gate 30140Sstevel@tonic-gate /* LOGGER */ 30150Sstevel@tonic-gate 30160Sstevel@tonic-gate #include <syslog.h> 30170Sstevel@tonic-gate 30180Sstevel@tonic-gate /* 30190Sstevel@tonic-gate * Logging routines. All routines log to syslog, unless the daemon is 30200Sstevel@tonic-gate * running in the foreground, in which case the logging goes to stderr. 30210Sstevel@tonic-gate * 30220Sstevel@tonic-gate * The following routines are available: 30230Sstevel@tonic-gate * 30240Sstevel@tonic-gate * logdebug(): A printf-like function for outputting debug messages 30250Sstevel@tonic-gate * (messages at LOG_DEBUG) that are only of use to developers. 30260Sstevel@tonic-gate * 30270Sstevel@tonic-gate * logtrace(): A printf-like function for outputting tracing messages 30280Sstevel@tonic-gate * (messages at LOG_INFO) from the daemon. This is typically used 30290Sstevel@tonic-gate * to log the receipt of interesting network-related conditions. 30300Sstevel@tonic-gate * 30310Sstevel@tonic-gate * logerr(): A printf-like function for outputting error messages 30320Sstevel@tonic-gate * (messages at LOG_ERR) from the daemon. 30330Sstevel@tonic-gate * 30340Sstevel@tonic-gate * logperror*(): A set of functions used to output error messages 30350Sstevel@tonic-gate * (messages at LOG_ERR); these automatically append strerror(errno) 30360Sstevel@tonic-gate * and a newline to the message passed to them. 30370Sstevel@tonic-gate * 30380Sstevel@tonic-gate * NOTE: since the logging functions write to syslog, the messages passed 30390Sstevel@tonic-gate * to them are not eligible for localization. Thus, gettext() must 30400Sstevel@tonic-gate * *not* be used. 30410Sstevel@tonic-gate */ 30420Sstevel@tonic-gate 30430Sstevel@tonic-gate static int logging = 0; 30440Sstevel@tonic-gate 30450Sstevel@tonic-gate static void 30460Sstevel@tonic-gate initlog(void) 30470Sstevel@tonic-gate { 30480Sstevel@tonic-gate logging++; 30490Sstevel@tonic-gate openlog("in.mpathd", LOG_PID | LOG_CONS, LOG_DAEMON); 30500Sstevel@tonic-gate } 30510Sstevel@tonic-gate 30520Sstevel@tonic-gate /* PRINTFLIKE1 */ 30530Sstevel@tonic-gate void 30540Sstevel@tonic-gate logerr(char *fmt, ...) 30550Sstevel@tonic-gate { 30560Sstevel@tonic-gate va_list ap; 30570Sstevel@tonic-gate 30580Sstevel@tonic-gate va_start(ap, fmt); 30590Sstevel@tonic-gate 30600Sstevel@tonic-gate if (logging) 30610Sstevel@tonic-gate vsyslog(LOG_ERR, fmt, ap); 30620Sstevel@tonic-gate else 30630Sstevel@tonic-gate (void) vfprintf(stderr, fmt, ap); 30640Sstevel@tonic-gate va_end(ap); 30650Sstevel@tonic-gate } 30660Sstevel@tonic-gate 30670Sstevel@tonic-gate /* PRINTFLIKE1 */ 30680Sstevel@tonic-gate void 30690Sstevel@tonic-gate logtrace(char *fmt, ...) 30700Sstevel@tonic-gate { 30710Sstevel@tonic-gate va_list ap; 30720Sstevel@tonic-gate 30730Sstevel@tonic-gate va_start(ap, fmt); 30740Sstevel@tonic-gate 30750Sstevel@tonic-gate if (logging) 30760Sstevel@tonic-gate vsyslog(LOG_INFO, fmt, ap); 30770Sstevel@tonic-gate else 30780Sstevel@tonic-gate (void) vfprintf(stderr, fmt, ap); 30790Sstevel@tonic-gate va_end(ap); 30800Sstevel@tonic-gate } 30810Sstevel@tonic-gate 30820Sstevel@tonic-gate /* PRINTFLIKE1 */ 30830Sstevel@tonic-gate void 30840Sstevel@tonic-gate logdebug(char *fmt, ...) 30850Sstevel@tonic-gate { 30860Sstevel@tonic-gate va_list ap; 30870Sstevel@tonic-gate 30880Sstevel@tonic-gate va_start(ap, fmt); 30890Sstevel@tonic-gate 30900Sstevel@tonic-gate if (logging) 30910Sstevel@tonic-gate vsyslog(LOG_DEBUG, fmt, ap); 30920Sstevel@tonic-gate else 30930Sstevel@tonic-gate (void) vfprintf(stderr, fmt, ap); 30940Sstevel@tonic-gate va_end(ap); 30950Sstevel@tonic-gate } 30960Sstevel@tonic-gate 30970Sstevel@tonic-gate /* PRINTFLIKE1 */ 30980Sstevel@tonic-gate void 30990Sstevel@tonic-gate logperror(char *str) 31000Sstevel@tonic-gate { 31010Sstevel@tonic-gate if (logging) 31020Sstevel@tonic-gate syslog(LOG_ERR, "%s: %m\n", str); 31030Sstevel@tonic-gate else 31040Sstevel@tonic-gate (void) fprintf(stderr, "%s: %s\n", str, strerror(errno)); 31050Sstevel@tonic-gate } 31060Sstevel@tonic-gate 31070Sstevel@tonic-gate void 31080Sstevel@tonic-gate logperror_pii(struct phyint_instance *pii, char *str) 31090Sstevel@tonic-gate { 31100Sstevel@tonic-gate if (logging) { 31110Sstevel@tonic-gate syslog(LOG_ERR, "%s (%s %s): %m\n", 31120Sstevel@tonic-gate str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name); 31130Sstevel@tonic-gate } else { 31140Sstevel@tonic-gate (void) fprintf(stderr, "%s (%s %s): %s\n", 31150Sstevel@tonic-gate str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name, 31160Sstevel@tonic-gate strerror(errno)); 31170Sstevel@tonic-gate } 31180Sstevel@tonic-gate } 31190Sstevel@tonic-gate 31200Sstevel@tonic-gate void 31210Sstevel@tonic-gate logperror_li(struct logint *li, char *str) 31220Sstevel@tonic-gate { 31230Sstevel@tonic-gate struct phyint_instance *pii = li->li_phyint_inst; 31240Sstevel@tonic-gate 31250Sstevel@tonic-gate if (logging) { 31260Sstevel@tonic-gate syslog(LOG_ERR, "%s (%s %s): %m\n", 31270Sstevel@tonic-gate str, AF_STR(pii->pii_af), li->li_name); 31280Sstevel@tonic-gate } else { 31290Sstevel@tonic-gate (void) fprintf(stderr, "%s (%s %s): %s\n", 31300Sstevel@tonic-gate str, AF_STR(pii->pii_af), li->li_name, 31310Sstevel@tonic-gate strerror(errno)); 31320Sstevel@tonic-gate } 31330Sstevel@tonic-gate } 31340Sstevel@tonic-gate 31350Sstevel@tonic-gate void 31360Sstevel@tonic-gate close_probe_socket(struct phyint_instance *pii, boolean_t polled) 31370Sstevel@tonic-gate { 31380Sstevel@tonic-gate if (polled) 31390Sstevel@tonic-gate (void) poll_remove(pii->pii_probe_sock); 31400Sstevel@tonic-gate (void) close(pii->pii_probe_sock); 31410Sstevel@tonic-gate pii->pii_probe_sock = -1; 31420Sstevel@tonic-gate pii->pii_basetime_inited = 0; 31430Sstevel@tonic-gate } 3144