xref: /onnv-gate/usr/src/cmd/fs.d/autofs/nfs_cast.c (revision 249:6233bc9dd306)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
230Sstevel@tonic-gate  *	nfs_cast.c : broadcast to a specific group of NFS servers
240Sstevel@tonic-gate  *
25*249Sjwahlig  *      Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26*249Sjwahlig  *      Use is subject to license terms.
270Sstevel@tonic-gate  */
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
300Sstevel@tonic-gate 
310Sstevel@tonic-gate #include <stdio.h>
320Sstevel@tonic-gate #include <syslog.h>
330Sstevel@tonic-gate #include <errno.h>
340Sstevel@tonic-gate #include <string.h>
350Sstevel@tonic-gate #include <sys/types.h>
360Sstevel@tonic-gate #include <sys/time.h>
370Sstevel@tonic-gate #include <sys/resource.h>
380Sstevel@tonic-gate #include <unistd.h>
390Sstevel@tonic-gate #include <stdlib.h>
400Sstevel@tonic-gate #include <rpc/rpc.h>
410Sstevel@tonic-gate #include <rpc/clnt_soc.h>
420Sstevel@tonic-gate #include <rpc/nettype.h>
430Sstevel@tonic-gate #include <rpc/pmap_prot.h>
440Sstevel@tonic-gate #include <netconfig.h>
450Sstevel@tonic-gate #include <netdir.h>
460Sstevel@tonic-gate #include <nfs/nfs.h>
470Sstevel@tonic-gate #define	NFSCLIENT
480Sstevel@tonic-gate #include <locale.h>
490Sstevel@tonic-gate #include "automount.h"
500Sstevel@tonic-gate 
510Sstevel@tonic-gate #define	PENALTY_WEIGHT    100000
520Sstevel@tonic-gate 
53*249Sjwahlig struct tstamps {
540Sstevel@tonic-gate 	struct tstamps	*ts_next;
550Sstevel@tonic-gate 	int		ts_penalty;
560Sstevel@tonic-gate 	int		ts_inx;
570Sstevel@tonic-gate 	int		ts_rcvd;
580Sstevel@tonic-gate 	struct timeval	ts_timeval;
590Sstevel@tonic-gate };
600Sstevel@tonic-gate 
610Sstevel@tonic-gate /* A list of addresses - all belonging to the same transport */
620Sstevel@tonic-gate 
63*249Sjwahlig struct addrs {
640Sstevel@tonic-gate 	struct addrs		*addr_next;
650Sstevel@tonic-gate 	struct mapfs		*addr_mfs;
660Sstevel@tonic-gate 	struct nd_addrlist	*addr_addrs;
670Sstevel@tonic-gate 	struct tstamps		*addr_if_tstamps;
680Sstevel@tonic-gate };
690Sstevel@tonic-gate 
700Sstevel@tonic-gate /* A list of connectionless transports */
710Sstevel@tonic-gate 
72*249Sjwahlig struct transp {
730Sstevel@tonic-gate 	struct transp		*tr_next;
740Sstevel@tonic-gate 	int			tr_fd;
750Sstevel@tonic-gate 	char			*tr_device;
760Sstevel@tonic-gate 	struct t_bind		*tr_taddr;
770Sstevel@tonic-gate 	struct addrs		*tr_addrs;
780Sstevel@tonic-gate };
790Sstevel@tonic-gate 
800Sstevel@tonic-gate /* A list of map entries and their roundtrip times, for sorting */
810Sstevel@tonic-gate 
820Sstevel@tonic-gate struct sm {
830Sstevel@tonic-gate 	struct mapfs *mfs;
840Sstevel@tonic-gate 	struct timeval timeval;
850Sstevel@tonic-gate };
860Sstevel@tonic-gate 
870Sstevel@tonic-gate static void free_transports(struct transp *);
880Sstevel@tonic-gate static void calc_resp_time(struct timeval *);
890Sstevel@tonic-gate static struct mapfs *sort_responses(struct transp *);
900Sstevel@tonic-gate static int host_sm(const void *, const void *b);
910Sstevel@tonic-gate static int time_sm(const void *, const void *b);
920Sstevel@tonic-gate extern struct mapfs *add_mfs(struct mapfs *, int, struct mapfs **,
930Sstevel@tonic-gate 	struct mapfs **);
940Sstevel@tonic-gate 
950Sstevel@tonic-gate /*
960Sstevel@tonic-gate  * This routine is designed to be able to "ping"
970Sstevel@tonic-gate  * a list of hosts and create a list of responding
980Sstevel@tonic-gate  * hosts sorted by response time.
990Sstevel@tonic-gate  * This must be done without any prior
1000Sstevel@tonic-gate  * contact with the host - therefore the "ping"
1010Sstevel@tonic-gate  * must be to a "well-known" address.  The outstanding
1020Sstevel@tonic-gate  * candidate here is the address of "rpcbind".
1030Sstevel@tonic-gate  *
1040Sstevel@tonic-gate  * A response to a ping is no guarantee that the host
1050Sstevel@tonic-gate  * is running NFS, has a mount daemon, or exports
1060Sstevel@tonic-gate  * the required filesystem.  If the subsequent
1070Sstevel@tonic-gate  * mount attempt fails then the host will be marked
1080Sstevel@tonic-gate  * "ignore" and the host list will be re-pinged
1090Sstevel@tonic-gate  * (sans the bad host). This process continues
1100Sstevel@tonic-gate  * until a successful mount is achieved or until
1110Sstevel@tonic-gate  * there are no hosts left to try.
1120Sstevel@tonic-gate  */
1130Sstevel@tonic-gate enum clnt_stat
nfs_cast(struct mapfs * mfs_in,struct mapfs ** mfs_out,int timeout)1140Sstevel@tonic-gate nfs_cast(struct mapfs *mfs_in, struct mapfs **mfs_out, int timeout)
1150Sstevel@tonic-gate {
1160Sstevel@tonic-gate 	enum clnt_stat stat;
1170Sstevel@tonic-gate 	AUTH *sys_auth = authsys_create_default();
1180Sstevel@tonic-gate 	XDR xdr_stream;
1190Sstevel@tonic-gate 	register XDR *xdrs = &xdr_stream;
1200Sstevel@tonic-gate 	int outlen;
1210Sstevel@tonic-gate 	int if_inx;
1220Sstevel@tonic-gate 	int tsec;
1230Sstevel@tonic-gate 	int flag;
1240Sstevel@tonic-gate 	int sent, addr_cnt, rcvd, if_cnt;
1250Sstevel@tonic-gate 	fd_set readfds, mask;
1260Sstevel@tonic-gate 	register ulong_t xid;		/* xid - unique per addr */
1270Sstevel@tonic-gate 	register int i;
1280Sstevel@tonic-gate 	struct rpc_msg msg;
1290Sstevel@tonic-gate 	struct timeval t, rcv_timeout;
1300Sstevel@tonic-gate 	char outbuf[UDPMSGSIZE], inbuf[UDPMSGSIZE];
1310Sstevel@tonic-gate 	struct t_unitdata t_udata, t_rdata;
1320Sstevel@tonic-gate 	struct nd_hostserv hs;
1330Sstevel@tonic-gate 	struct nd_addrlist *retaddrs;
1340Sstevel@tonic-gate 	struct transp *tr_head;
1350Sstevel@tonic-gate 	struct transp *trans, *prev_trans;
1360Sstevel@tonic-gate 	struct addrs *a, *prev_addr;
1370Sstevel@tonic-gate 	struct tstamps *ts, *prev_ts;
1380Sstevel@tonic-gate 	NCONF_HANDLE *nc = NULL;
1390Sstevel@tonic-gate 	struct netconfig *nconf;
1400Sstevel@tonic-gate 	struct rlimit rl;
1410Sstevel@tonic-gate 	int dtbsize;
1420Sstevel@tonic-gate 	struct mapfs *mfs;
1430Sstevel@tonic-gate 
1440Sstevel@tonic-gate 	/*
1450Sstevel@tonic-gate 	 * For each connectionless transport get a list of
1460Sstevel@tonic-gate 	 * host addresses.  Any single host may have
1470Sstevel@tonic-gate 	 * addresses on several transports.
1480Sstevel@tonic-gate 	 */
1490Sstevel@tonic-gate 	addr_cnt = sent = rcvd = 0;
1500Sstevel@tonic-gate 	tr_head = NULL;
1510Sstevel@tonic-gate 	FD_ZERO(&mask);
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 	/*
1540Sstevel@tonic-gate 	 * Set the default select size to be the maximum FD_SETSIZE, unless
1550Sstevel@tonic-gate 	 * the current rlimit is lower.
1560Sstevel@tonic-gate 	 */
1570Sstevel@tonic-gate 	dtbsize = FD_SETSIZE;
1580Sstevel@tonic-gate 	if (getrlimit(RLIMIT_NOFILE, &rl) == 0) {
1590Sstevel@tonic-gate 		if (rl.rlim_cur < FD_SETSIZE)
1600Sstevel@tonic-gate 			dtbsize = rl.rlim_cur;
1610Sstevel@tonic-gate 	}
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate 	prev_trans = NULL;
1640Sstevel@tonic-gate 	prev_addr = NULL;
1650Sstevel@tonic-gate 	prev_ts = NULL;
1660Sstevel@tonic-gate 	for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) {
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate 		if (trace > 2)
1690Sstevel@tonic-gate 			trace_prt(1, "nfs_cast: host=%s\n", mfs->mfs_host);
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate 		nc = setnetconfig();
1720Sstevel@tonic-gate 		if (nc == NULL) {
1730Sstevel@tonic-gate 			stat = RPC_CANTSEND;
1740Sstevel@tonic-gate 			goto done_broad;
1750Sstevel@tonic-gate 		}
1760Sstevel@tonic-gate 		while (nconf = getnetconfig(nc)) {
1770Sstevel@tonic-gate 			if (!(nconf->nc_flag & NC_VISIBLE) ||
1780Sstevel@tonic-gate 			    nconf->nc_semantics != NC_TPI_CLTS ||
1790Sstevel@tonic-gate 			    (strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0))
1800Sstevel@tonic-gate 				continue;
1810Sstevel@tonic-gate 			trans = (struct transp *)malloc(sizeof (*trans));
1820Sstevel@tonic-gate 			if (trans == NULL) {
1830Sstevel@tonic-gate 				syslog(LOG_ERR, "no memory");
1840Sstevel@tonic-gate 				stat = RPC_CANTSEND;
1850Sstevel@tonic-gate 				goto done_broad;
1860Sstevel@tonic-gate 			}
1870Sstevel@tonic-gate 			(void) memset(trans, 0, sizeof (*trans));
1880Sstevel@tonic-gate 			if (tr_head == NULL)
1890Sstevel@tonic-gate 				tr_head = trans;
1900Sstevel@tonic-gate 			else
1910Sstevel@tonic-gate 				prev_trans->tr_next = trans;
1920Sstevel@tonic-gate 			prev_trans = trans;
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 			trans->tr_fd = t_open(nconf->nc_device, O_RDWR, NULL);
1950Sstevel@tonic-gate 			if (trans->tr_fd < 0) {
1960Sstevel@tonic-gate 				syslog(LOG_ERR, "nfscast: t_open: %s:%m",
1970Sstevel@tonic-gate 					nconf->nc_device);
1980Sstevel@tonic-gate 				stat = RPC_CANTSEND;
1990Sstevel@tonic-gate 				goto done_broad;
2000Sstevel@tonic-gate 			}
2010Sstevel@tonic-gate 			if (t_bind(trans->tr_fd, (struct t_bind *)NULL,
2020Sstevel@tonic-gate 				(struct t_bind *)NULL) < 0) {
2030Sstevel@tonic-gate 				syslog(LOG_ERR, "nfscast: t_bind: %m");
2040Sstevel@tonic-gate 				stat = RPC_CANTSEND;
2050Sstevel@tonic-gate 				goto done_broad;
2060Sstevel@tonic-gate 			}
2070Sstevel@tonic-gate 			trans->tr_taddr =
2080Sstevel@tonic-gate 				/* LINTED pointer alignment */
2090Sstevel@tonic-gate 			(struct t_bind *)t_alloc(trans->tr_fd, T_BIND, T_ADDR);
2100Sstevel@tonic-gate 			if (trans->tr_taddr == (struct t_bind *)NULL) {
2110Sstevel@tonic-gate 				syslog(LOG_ERR, "nfscast: t_alloc: %m");
2120Sstevel@tonic-gate 				stat = RPC_SYSTEMERROR;
2130Sstevel@tonic-gate 				goto done_broad;
2140Sstevel@tonic-gate 			}
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 			trans->tr_device = nconf->nc_device;
2170Sstevel@tonic-gate 			FD_SET(trans->tr_fd, &mask);
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate 			if_inx = 0;
2200Sstevel@tonic-gate 			hs.h_host = mfs->mfs_host;
2210Sstevel@tonic-gate 			hs.h_serv = "rpcbind";
2220Sstevel@tonic-gate 			if (netdir_getbyname(nconf, &hs, &retaddrs) == ND_OK) {
2230Sstevel@tonic-gate 
2240Sstevel@tonic-gate 				/*
2250Sstevel@tonic-gate 				 * If mfs->ignore is previously set for
2260Sstevel@tonic-gate 				 * this map, clear it. Because a host can
2270Sstevel@tonic-gate 				 * have either v6 or v4 address
2280Sstevel@tonic-gate 				 */
2290Sstevel@tonic-gate 				if (mfs->mfs_ignore == 1)
2300Sstevel@tonic-gate 					mfs->mfs_ignore = 0;
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 				a = (struct addrs *)malloc(sizeof (*a));
2330Sstevel@tonic-gate 				if (a == NULL) {
2340Sstevel@tonic-gate 					syslog(LOG_ERR, "no memory");
2350Sstevel@tonic-gate 					stat = RPC_CANTSEND;
2360Sstevel@tonic-gate 					goto done_broad;
2370Sstevel@tonic-gate 				}
2380Sstevel@tonic-gate 				(void) memset(a, 0, sizeof (*a));
2390Sstevel@tonic-gate 				if (trans->tr_addrs == NULL)
2400Sstevel@tonic-gate 					trans->tr_addrs = a;
2410Sstevel@tonic-gate 				else
2420Sstevel@tonic-gate 					prev_addr->addr_next = a;
2430Sstevel@tonic-gate 				prev_addr = a;
2440Sstevel@tonic-gate 				a->addr_if_tstamps = NULL;
2450Sstevel@tonic-gate 				a->addr_mfs = mfs;
2460Sstevel@tonic-gate 				a->addr_addrs = retaddrs;
2470Sstevel@tonic-gate 				if_cnt = retaddrs->n_cnt;
2480Sstevel@tonic-gate 				while (if_cnt--) {
2490Sstevel@tonic-gate 					ts = (struct tstamps *)
2500Sstevel@tonic-gate 						malloc(sizeof (*ts));
2510Sstevel@tonic-gate 					if (ts == NULL) {
2520Sstevel@tonic-gate 						syslog(LOG_ERR, "no memory");
2530Sstevel@tonic-gate 						stat = RPC_CANTSEND;
2540Sstevel@tonic-gate 						goto done_broad;
2550Sstevel@tonic-gate 					}
2560Sstevel@tonic-gate 					(void) memset(ts, 0, sizeof (*ts));
2570Sstevel@tonic-gate 					ts->ts_penalty = mfs->mfs_penalty;
2580Sstevel@tonic-gate 					if (a->addr_if_tstamps == NULL)
2590Sstevel@tonic-gate 						a->addr_if_tstamps = ts;
2600Sstevel@tonic-gate 					else
2610Sstevel@tonic-gate 						prev_ts->ts_next = ts;
2620Sstevel@tonic-gate 					prev_ts = ts;
2630Sstevel@tonic-gate 					ts->ts_inx = if_inx++;
2640Sstevel@tonic-gate 					addr_cnt++;
2650Sstevel@tonic-gate 				}
2660Sstevel@tonic-gate 				break;
2670Sstevel@tonic-gate 			} else {
2680Sstevel@tonic-gate 				mfs->mfs_ignore = 1;
2690Sstevel@tonic-gate 				if (verbose)
2700Sstevel@tonic-gate 					syslog(LOG_ERR,
2710Sstevel@tonic-gate 				"%s:%s address not known",
2720Sstevel@tonic-gate 				mfs->mfs_host,
2730Sstevel@tonic-gate 				strcmp(nconf->nc_proto, NC_INET)?"IPv6":"IPv4");
2740Sstevel@tonic-gate 			}
2750Sstevel@tonic-gate 		} /* while */
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 		endnetconfig(nc);
2780Sstevel@tonic-gate 		nc = NULL;
2790Sstevel@tonic-gate 	} /* for */
2800Sstevel@tonic-gate 	if (addr_cnt == 0) {
2810Sstevel@tonic-gate 		syslog(LOG_ERR, "nfscast: couldn't find addresses");
2820Sstevel@tonic-gate 		stat = RPC_CANTSEND;
2830Sstevel@tonic-gate 		goto done_broad;
2840Sstevel@tonic-gate 	}
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	(void) gettimeofday(&t, (struct timezone *)0);
2870Sstevel@tonic-gate 	xid = (getpid() ^ t.tv_sec ^ t.tv_usec) & ~0xFF;
2880Sstevel@tonic-gate 	t.tv_usec = 0;
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate 	/* serialize the RPC header */
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate 	msg.rm_direction = CALL;
2930Sstevel@tonic-gate 	msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
2940Sstevel@tonic-gate 	msg.rm_call.cb_prog = RPCBPROG;
2950Sstevel@tonic-gate 	/*
2960Sstevel@tonic-gate 	 * we can not use RPCBVERS here since it doesn't exist in 4.X,
2970Sstevel@tonic-gate 	 * the fix to bug 1139883 has made the 4.X portmapper silent to
2980Sstevel@tonic-gate 	 * version mismatches. This causes the RPC call to the remote
2990Sstevel@tonic-gate 	 * portmapper to simply be ignored if it's not Version 2.
3000Sstevel@tonic-gate 	 */
3010Sstevel@tonic-gate 	msg.rm_call.cb_vers = PMAPVERS;
3020Sstevel@tonic-gate 	msg.rm_call.cb_proc = NULLPROC;
3030Sstevel@tonic-gate 	if (sys_auth == (AUTH *)NULL) {
3040Sstevel@tonic-gate 		stat = RPC_SYSTEMERROR;
3050Sstevel@tonic-gate 		goto done_broad;
3060Sstevel@tonic-gate 	}
3070Sstevel@tonic-gate 	msg.rm_call.cb_cred = sys_auth->ah_cred;
3080Sstevel@tonic-gate 	msg.rm_call.cb_verf = sys_auth->ah_verf;
3090Sstevel@tonic-gate 	xdrmem_create(xdrs, outbuf, sizeof (outbuf), XDR_ENCODE);
3100Sstevel@tonic-gate 	if (! xdr_callmsg(xdrs, &msg)) {
3110Sstevel@tonic-gate 		stat = RPC_CANTENCODEARGS;
3120Sstevel@tonic-gate 		goto done_broad;
3130Sstevel@tonic-gate 	}
3140Sstevel@tonic-gate 	outlen = (int)xdr_getpos(xdrs);
3150Sstevel@tonic-gate 	xdr_destroy(xdrs);
3160Sstevel@tonic-gate 
3170Sstevel@tonic-gate 	t_udata.opt.len = 0;
3180Sstevel@tonic-gate 	t_udata.udata.buf = outbuf;
3190Sstevel@tonic-gate 	t_udata.udata.len = outlen;
3200Sstevel@tonic-gate 
3210Sstevel@tonic-gate 	/*
3220Sstevel@tonic-gate 	 * Basic loop: send packet to all hosts and wait for response(s).
3230Sstevel@tonic-gate 	 * The response timeout grows larger per iteration.
3240Sstevel@tonic-gate 	 * A unique xid is assigned to each address in order to
3250Sstevel@tonic-gate 	 * correctly match the replies.
3260Sstevel@tonic-gate 	 */
3270Sstevel@tonic-gate 	for (tsec = 4; timeout > 0; tsec *= 2) {
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 		timeout -= tsec;
3300Sstevel@tonic-gate 		if (timeout <= 0)
3310Sstevel@tonic-gate 			tsec += timeout;
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 		rcv_timeout.tv_sec = tsec;
3340Sstevel@tonic-gate 		rcv_timeout.tv_usec = 0;
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 		sent = 0;
3370Sstevel@tonic-gate 		for (trans = tr_head; trans; trans = trans->tr_next) {
3380Sstevel@tonic-gate 			for (a = trans->tr_addrs; a; a = a->addr_next) {
3390Sstevel@tonic-gate 				struct netbuf *if_netbuf =
3400Sstevel@tonic-gate 					a->addr_addrs->n_addrs;
3410Sstevel@tonic-gate 				ts = a->addr_if_tstamps;
3420Sstevel@tonic-gate 				if_cnt = a->addr_addrs->n_cnt;
3430Sstevel@tonic-gate 				while (if_cnt--) {
3440Sstevel@tonic-gate 
3450Sstevel@tonic-gate 					/*
3460Sstevel@tonic-gate 					 * xid is the first thing in
3470Sstevel@tonic-gate 					 * preserialized buffer
3480Sstevel@tonic-gate 					 */
3490Sstevel@tonic-gate 					/* LINTED pointer alignment */
3500Sstevel@tonic-gate 					*((ulong_t *)outbuf) =
3510Sstevel@tonic-gate 						htonl(xid + ts->ts_inx);
3520Sstevel@tonic-gate 					(void) gettimeofday(&(ts->ts_timeval),
3530Sstevel@tonic-gate 						(struct timezone *)0);
3540Sstevel@tonic-gate 					/*
3550Sstevel@tonic-gate 					 * Check if already received
3560Sstevel@tonic-gate 					 * from a previous iteration.
3570Sstevel@tonic-gate 					 */
3580Sstevel@tonic-gate 					if (ts->ts_rcvd) {
3590Sstevel@tonic-gate 						sent++;
3600Sstevel@tonic-gate 						ts = ts->ts_next;
3610Sstevel@tonic-gate 						continue;
3620Sstevel@tonic-gate 					}
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 					t_udata.addr = *if_netbuf++;
3650Sstevel@tonic-gate 
3660Sstevel@tonic-gate 					if (t_sndudata(trans->tr_fd,
3670Sstevel@tonic-gate 							&t_udata) == 0) {
3680Sstevel@tonic-gate 						sent++;
3690Sstevel@tonic-gate 					}
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate 					ts = ts->ts_next;
3720Sstevel@tonic-gate 				}
3730Sstevel@tonic-gate 			}
3740Sstevel@tonic-gate 		}
3750Sstevel@tonic-gate 		if (sent == 0) {		/* no packets sent ? */
3760Sstevel@tonic-gate 			stat = RPC_CANTSEND;
3770Sstevel@tonic-gate 			goto done_broad;
3780Sstevel@tonic-gate 		}
3790Sstevel@tonic-gate 
3800Sstevel@tonic-gate 		/*
3810Sstevel@tonic-gate 		 * Have sent all the packets.  Now collect the responses...
3820Sstevel@tonic-gate 		 */
3830Sstevel@tonic-gate 		rcvd = 0;
3840Sstevel@tonic-gate 	recv_again:
3850Sstevel@tonic-gate 		msg.acpted_rply.ar_verf = _null_auth;
3860Sstevel@tonic-gate 		msg.acpted_rply.ar_results.proc = xdr_void;
3870Sstevel@tonic-gate 		readfds = mask;
3880Sstevel@tonic-gate 
3890Sstevel@tonic-gate 		switch (select(dtbsize, &readfds,
3900Sstevel@tonic-gate 			(fd_set *)NULL, (fd_set *)NULL, &rcv_timeout)) {
3910Sstevel@tonic-gate 
3920Sstevel@tonic-gate 		case 0: /* Timed out */
3930Sstevel@tonic-gate 			/*
3940Sstevel@tonic-gate 			 * If we got at least one response in the
3950Sstevel@tonic-gate 			 * last interval, then don't wait for any
3960Sstevel@tonic-gate 			 * more.  In theory we should wait for
3970Sstevel@tonic-gate 			 * the max weighting (penalty) value so
3980Sstevel@tonic-gate 			 * that a very slow server has a chance to
3990Sstevel@tonic-gate 			 * respond but this could take a long time
4000Sstevel@tonic-gate 			 * if the admin has set a high weighting
4010Sstevel@tonic-gate 			 * value.
4020Sstevel@tonic-gate 			 */
4030Sstevel@tonic-gate 			if (rcvd > 0)
4040Sstevel@tonic-gate 				goto done_broad;
4050Sstevel@tonic-gate 
4060Sstevel@tonic-gate 			stat = RPC_TIMEDOUT;
4070Sstevel@tonic-gate 			continue;
4080Sstevel@tonic-gate 
4090Sstevel@tonic-gate 		case -1:  /* some kind of error */
4100Sstevel@tonic-gate 			if (errno == EINTR)
4110Sstevel@tonic-gate 				goto recv_again;
4120Sstevel@tonic-gate 			syslog(LOG_ERR, "nfscast: select: %m");
4130Sstevel@tonic-gate 			if (rcvd == 0)
4140Sstevel@tonic-gate 				stat = RPC_CANTRECV;
4150Sstevel@tonic-gate 			goto done_broad;
4160Sstevel@tonic-gate 
4170Sstevel@tonic-gate 		}  /* end of select results switch */
4180Sstevel@tonic-gate 
4190Sstevel@tonic-gate 		for (trans = tr_head; trans; trans = trans->tr_next) {
4200Sstevel@tonic-gate 			if (FD_ISSET(trans->tr_fd, &readfds))
4210Sstevel@tonic-gate 				break;
4220Sstevel@tonic-gate 		}
4230Sstevel@tonic-gate 		if (trans == NULL)
4240Sstevel@tonic-gate 			goto recv_again;
4250Sstevel@tonic-gate 
4260Sstevel@tonic-gate 	try_again:
4270Sstevel@tonic-gate 		t_rdata.addr = trans->tr_taddr->addr;
4280Sstevel@tonic-gate 		t_rdata.udata.buf = inbuf;
4290Sstevel@tonic-gate 		t_rdata.udata.maxlen = sizeof (inbuf);
4300Sstevel@tonic-gate 		t_rdata.udata.len = 0;
4310Sstevel@tonic-gate 		t_rdata.opt.len = 0;
4320Sstevel@tonic-gate 		if (t_rcvudata(trans->tr_fd, &t_rdata, &flag) < 0) {
4330Sstevel@tonic-gate 			if (errno == EINTR)
4340Sstevel@tonic-gate 				goto try_again;
4350Sstevel@tonic-gate 			syslog(LOG_ERR, "nfscast: t_rcvudata: %s:%m",
4360Sstevel@tonic-gate 				trans->tr_device);
4370Sstevel@tonic-gate 			stat = RPC_CANTRECV;
4380Sstevel@tonic-gate 			continue;
4390Sstevel@tonic-gate 		}
4400Sstevel@tonic-gate 		if (t_rdata.udata.len < sizeof (ulong_t))
4410Sstevel@tonic-gate 			goto recv_again;
4420Sstevel@tonic-gate 		if (flag & T_MORE) {
4430Sstevel@tonic-gate 			syslog(LOG_ERR,
4440Sstevel@tonic-gate 				"nfscast: t_rcvudata: %s: buffer overflow",
4450Sstevel@tonic-gate 				trans->tr_device);
4460Sstevel@tonic-gate 			goto recv_again;
4470Sstevel@tonic-gate 		}
4480Sstevel@tonic-gate 
4490Sstevel@tonic-gate 		/*
4500Sstevel@tonic-gate 		 * see if reply transaction id matches sent id.
4510Sstevel@tonic-gate 		 * If so, decode the results.
4520Sstevel@tonic-gate 		 * Note: received addr is ignored, it could be
4530Sstevel@tonic-gate 		 * different from the send addr if the host has
4540Sstevel@tonic-gate 		 * more than one addr.
4550Sstevel@tonic-gate 		 */
4560Sstevel@tonic-gate 		xdrmem_create(xdrs, inbuf, (uint_t)t_rdata.udata.len,
4570Sstevel@tonic-gate 								XDR_DECODE);
4580Sstevel@tonic-gate 		if (xdr_replymsg(xdrs, &msg)) {
4590Sstevel@tonic-gate 		    if (msg.rm_reply.rp_stat == MSG_ACCEPTED &&
4600Sstevel@tonic-gate 			(msg.rm_xid & ~0xFF) == xid) {
4610Sstevel@tonic-gate 			struct addrs *curr_addr;
4620Sstevel@tonic-gate 
4630Sstevel@tonic-gate 			i = msg.rm_xid & 0xFF;
4640Sstevel@tonic-gate 			for (curr_addr = trans->tr_addrs; curr_addr;
4650Sstevel@tonic-gate 			    curr_addr = curr_addr->addr_next) {
4660Sstevel@tonic-gate 			    for (ts = curr_addr->addr_if_tstamps; ts;
4670Sstevel@tonic-gate 				ts = ts->ts_next)
4680Sstevel@tonic-gate 				if (ts->ts_inx == i && !ts->ts_rcvd) {
4690Sstevel@tonic-gate 					ts->ts_rcvd = 1;
4700Sstevel@tonic-gate 					calc_resp_time(&ts->ts_timeval);
4710Sstevel@tonic-gate 					stat = RPC_SUCCESS;
4720Sstevel@tonic-gate 					rcvd++;
4730Sstevel@tonic-gate 					break;
4740Sstevel@tonic-gate 				}
4750Sstevel@tonic-gate 			}
4760Sstevel@tonic-gate 		    } /* otherwise, we just ignore the errors ... */
4770Sstevel@tonic-gate 		}
4780Sstevel@tonic-gate 		xdrs->x_op = XDR_FREE;
4790Sstevel@tonic-gate 		msg.acpted_rply.ar_results.proc = xdr_void;
4800Sstevel@tonic-gate 		(void) xdr_replymsg(xdrs, &msg);
4810Sstevel@tonic-gate 		XDR_DESTROY(xdrs);
4820Sstevel@tonic-gate 		if (rcvd == sent)
4830Sstevel@tonic-gate 			goto done_broad;
4840Sstevel@tonic-gate 		else
4850Sstevel@tonic-gate 			goto recv_again;
4860Sstevel@tonic-gate 	}
4870Sstevel@tonic-gate 	if (!rcvd)
4880Sstevel@tonic-gate 		stat = RPC_TIMEDOUT;
4890Sstevel@tonic-gate 
4900Sstevel@tonic-gate done_broad:
4910Sstevel@tonic-gate 	if (rcvd) {
4920Sstevel@tonic-gate 		*mfs_out = sort_responses(tr_head);
4930Sstevel@tonic-gate 		stat = RPC_SUCCESS;
4940Sstevel@tonic-gate 	}
4950Sstevel@tonic-gate 	if (nc)
4960Sstevel@tonic-gate 		endnetconfig(nc);
4970Sstevel@tonic-gate 	free_transports(tr_head);
4980Sstevel@tonic-gate 	AUTH_DESTROY(sys_auth);
4990Sstevel@tonic-gate 	return (stat);
5000Sstevel@tonic-gate }
5010Sstevel@tonic-gate 
5020Sstevel@tonic-gate /*
5030Sstevel@tonic-gate  * Go through all the responses and sort fastest to slowest.
5040Sstevel@tonic-gate  * Note that any penalty is added to the response time - so the
5050Sstevel@tonic-gate  * fastest response isn't necessarily the one that arrived first.
5060Sstevel@tonic-gate  */
5070Sstevel@tonic-gate static struct mapfs *
sort_responses(trans)5080Sstevel@tonic-gate sort_responses(trans)
5090Sstevel@tonic-gate 	struct transp *trans;
5100Sstevel@tonic-gate {
5110Sstevel@tonic-gate 	struct transp *t;
5120Sstevel@tonic-gate 	struct addrs *a;
5130Sstevel@tonic-gate 	struct tstamps *ti;
5140Sstevel@tonic-gate 	int i, size = 0, allocsize = 10;
5150Sstevel@tonic-gate 	struct mapfs *p, *mfs_head = NULL, *mfs_tail = NULL;
5160Sstevel@tonic-gate 	struct sm *buffer;
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate 	buffer = (struct sm *)malloc(allocsize * sizeof (struct sm));
5190Sstevel@tonic-gate 	if (!buffer) {
5200Sstevel@tonic-gate 		syslog(LOG_ERR, "sort_responses: malloc error.\n");
5210Sstevel@tonic-gate 		return (NULL);
5220Sstevel@tonic-gate 	}
5230Sstevel@tonic-gate 
5240Sstevel@tonic-gate 	for (t = trans; t; t = t->tr_next) {
5250Sstevel@tonic-gate 		for (a = t->tr_addrs; a; a = a->addr_next) {
5260Sstevel@tonic-gate 			for (ti = a->addr_if_tstamps;
5270Sstevel@tonic-gate 				ti; ti = ti->ts_next) {
5280Sstevel@tonic-gate 				if (!ti->ts_rcvd)
5290Sstevel@tonic-gate 					continue;
5300Sstevel@tonic-gate 				ti->ts_timeval.tv_usec +=
5310Sstevel@tonic-gate 					(ti->ts_penalty * PENALTY_WEIGHT);
5320Sstevel@tonic-gate 				if (ti->ts_timeval.tv_usec >= 1000000) {
5330Sstevel@tonic-gate 					ti->ts_timeval.tv_sec +=
5340Sstevel@tonic-gate 					(ti->ts_timeval.tv_usec / 1000000);
5350Sstevel@tonic-gate 					ti->ts_timeval.tv_usec =
5360Sstevel@tonic-gate 					(ti->ts_timeval.tv_usec % 1000000);
5370Sstevel@tonic-gate 				}
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 				if (size >= allocsize) {
5400Sstevel@tonic-gate 					allocsize += 10;
5410Sstevel@tonic-gate 					buffer = (struct sm *)realloc(buffer,
5420Sstevel@tonic-gate 					    allocsize * sizeof (struct sm));
5430Sstevel@tonic-gate 					if (!buffer) {
5440Sstevel@tonic-gate 						syslog(LOG_ERR,
5450Sstevel@tonic-gate 					    "sort_responses: malloc error.\n");
5460Sstevel@tonic-gate 						return (NULL);
5470Sstevel@tonic-gate 					}
5480Sstevel@tonic-gate 				}
5490Sstevel@tonic-gate 				buffer[size].timeval = ti->ts_timeval;
5500Sstevel@tonic-gate 				buffer[size].mfs = a->addr_mfs;
5510Sstevel@tonic-gate 				size++;
5520Sstevel@tonic-gate 			}
5530Sstevel@tonic-gate 		}
5540Sstevel@tonic-gate 	}
5550Sstevel@tonic-gate 
5560Sstevel@tonic-gate #ifdef DEBUG
5570Sstevel@tonic-gate 	if (trace > 3) {
5580Sstevel@tonic-gate 		trace_prt(1, "  sort_responses: before host sort:\n");
5590Sstevel@tonic-gate 		for (i = 0; i < size; i++)
5600Sstevel@tonic-gate 			trace_prt(1, "    %s %d.%d\n", buffer[i].mfs->mfs_host,
5610Sstevel@tonic-gate 			buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
5620Sstevel@tonic-gate 		trace_prt(0, "\n");
5630Sstevel@tonic-gate 	}
5640Sstevel@tonic-gate #endif
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 	qsort((void *)buffer, size, sizeof (struct sm), host_sm);
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate 	/*
5690Sstevel@tonic-gate 	 * Cope with multiply listed hosts  by choosing first time
5700Sstevel@tonic-gate 	 */
5710Sstevel@tonic-gate 	for (i = 1; i < size; i++) {
5720Sstevel@tonic-gate #ifdef DEBUG
5730Sstevel@tonic-gate 		if (trace > 3) {
5740Sstevel@tonic-gate 			trace_prt(1, "  sort_responses: comparing %s and %s\n",
5750Sstevel@tonic-gate 				buffer[i-1].mfs->mfs_host,
5760Sstevel@tonic-gate 				buffer[i].mfs->mfs_host);
5770Sstevel@tonic-gate 		}
5780Sstevel@tonic-gate #endif
5790Sstevel@tonic-gate 		if (strcmp(buffer[i-1].mfs->mfs_host,
5800Sstevel@tonic-gate 		    buffer[i].mfs->mfs_host) == 0)
5810Sstevel@tonic-gate 			memcpy(&buffer[i].timeval, &buffer[i-1].timeval,
5820Sstevel@tonic-gate 				sizeof (struct timeval));
5830Sstevel@tonic-gate 	}
5840Sstevel@tonic-gate 	if (trace > 3)
5850Sstevel@tonic-gate 		trace_prt(0, "\n");
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate #ifdef DEBUG
5880Sstevel@tonic-gate 	if (trace > 3) {
5890Sstevel@tonic-gate 		trace_prt(1, "  sort_responses: before time sort:\n");
5900Sstevel@tonic-gate 		for (i = 0; i < size; i++)
5910Sstevel@tonic-gate 			trace_prt(1, "    %s %d.%d\n", buffer[i].mfs->mfs_host,
5920Sstevel@tonic-gate 			buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
5930Sstevel@tonic-gate 		trace_prt(0, "\n");
5940Sstevel@tonic-gate 	}
5950Sstevel@tonic-gate #endif
5960Sstevel@tonic-gate 
5970Sstevel@tonic-gate 	qsort((void *)buffer, size, sizeof (struct sm), time_sm);
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate #ifdef DEBUG
6000Sstevel@tonic-gate 	if (trace > 3) {
6010Sstevel@tonic-gate 		trace_prt(1, "  sort_responses: after sort:\n");
6020Sstevel@tonic-gate 		for (i = 0; i < size; i++)
6030Sstevel@tonic-gate 			trace_prt(1, "    %s %d.%d\n", buffer[i].mfs->mfs_host,
6040Sstevel@tonic-gate 			buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
6050Sstevel@tonic-gate 		trace_prt(0, "\n");
6060Sstevel@tonic-gate 	}
6070Sstevel@tonic-gate #endif
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 	for (i = 0; i < size; i++) {
6100Sstevel@tonic-gate #ifdef DEBUG
6110Sstevel@tonic-gate 		if (trace > 3) {
6120Sstevel@tonic-gate 			trace_prt(1, "  sort_responses: adding %s\n",
6130Sstevel@tonic-gate 				buffer[i].mfs->mfs_host);
6140Sstevel@tonic-gate 		}
6150Sstevel@tonic-gate #endif
6160Sstevel@tonic-gate 		p = add_mfs(buffer[i].mfs, 0, &mfs_head, &mfs_tail);
6170Sstevel@tonic-gate 		if (!p)
6180Sstevel@tonic-gate 			return (NULL);
6190Sstevel@tonic-gate 	}
6200Sstevel@tonic-gate 	free(buffer);
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 	return (mfs_head);
6230Sstevel@tonic-gate }
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate /*
6270Sstevel@tonic-gate  * Comparison routines called by qsort(3).
6280Sstevel@tonic-gate  */
host_sm(const void * a,const void * b)6290Sstevel@tonic-gate static int host_sm(const void *a, const void *b)
6300Sstevel@tonic-gate {
6310Sstevel@tonic-gate 	return (strcmp(((struct sm *)a)->mfs->mfs_host,
6320Sstevel@tonic-gate 			((struct sm *)b)->mfs->mfs_host));
6330Sstevel@tonic-gate }
6340Sstevel@tonic-gate 
time_sm(const void * a,const void * b)6350Sstevel@tonic-gate static int time_sm(const void *a, const void *b)
6360Sstevel@tonic-gate {
6370Sstevel@tonic-gate 	if (timercmp(&(((struct sm *)a)->timeval),
6380Sstevel@tonic-gate 	    &(((struct sm *)b)->timeval), < /* cstyle */))
6390Sstevel@tonic-gate 		return (-1);
6400Sstevel@tonic-gate 	else if (timercmp(&(((struct sm *)a)->timeval),
6410Sstevel@tonic-gate 	    &(((struct sm *)b)->timeval), > /* cstyle */))
6420Sstevel@tonic-gate 		return (1);
6430Sstevel@tonic-gate 	else
6440Sstevel@tonic-gate 		return (0);
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate 
6470Sstevel@tonic-gate /*
6480Sstevel@tonic-gate  * Given send_time which is the time a request
6490Sstevel@tonic-gate  * was transmitted to a server, subtract it
6500Sstevel@tonic-gate  * from the time "now" thereby converting it
6510Sstevel@tonic-gate  * to an elapsed time.
6520Sstevel@tonic-gate  */
6530Sstevel@tonic-gate static void
calc_resp_time(send_time)6540Sstevel@tonic-gate calc_resp_time(send_time)
6550Sstevel@tonic-gate struct timeval *send_time;
6560Sstevel@tonic-gate {
6570Sstevel@tonic-gate 	struct timeval time_now;
6580Sstevel@tonic-gate 
6590Sstevel@tonic-gate 	(void) gettimeofday(&time_now, (struct timezone *)0);
6600Sstevel@tonic-gate 	if (time_now.tv_usec <  send_time->tv_usec) {
6610Sstevel@tonic-gate 		time_now.tv_sec--;
6620Sstevel@tonic-gate 		time_now.tv_usec += 1000000;
6630Sstevel@tonic-gate 	}
6640Sstevel@tonic-gate 	send_time->tv_sec = time_now.tv_sec - send_time->tv_sec;
6650Sstevel@tonic-gate 	send_time->tv_usec = time_now.tv_usec - send_time->tv_usec;
6660Sstevel@tonic-gate }
6670Sstevel@tonic-gate 
6680Sstevel@tonic-gate static void
free_transports(trans)6690Sstevel@tonic-gate free_transports(trans)
6700Sstevel@tonic-gate 	struct transp *trans;
6710Sstevel@tonic-gate {
6720Sstevel@tonic-gate 	struct transp *t, *tmpt = NULL;
6730Sstevel@tonic-gate 	struct addrs *a, *tmpa = NULL;
6740Sstevel@tonic-gate 	struct tstamps *ts, *tmpts = NULL;
6750Sstevel@tonic-gate 
6760Sstevel@tonic-gate 	for (t = trans; t; t = tmpt) {
6770Sstevel@tonic-gate 		if (t->tr_taddr)
6780Sstevel@tonic-gate 			(void) t_free((char *)t->tr_taddr, T_BIND);
6790Sstevel@tonic-gate 		if (t->tr_fd > 0)
6800Sstevel@tonic-gate 			(void) t_close(t->tr_fd);
6810Sstevel@tonic-gate 		for (a = t->tr_addrs; a; a = tmpa) {
6820Sstevel@tonic-gate 			for (ts = a->addr_if_tstamps; ts; ts = tmpts) {
6830Sstevel@tonic-gate 				tmpts = ts->ts_next;
6840Sstevel@tonic-gate 				free(ts);
6850Sstevel@tonic-gate 			}
6860Sstevel@tonic-gate 			(void) netdir_free((char *)a->addr_addrs, ND_ADDRLIST);
6870Sstevel@tonic-gate 			tmpa = a->addr_next;
6880Sstevel@tonic-gate 			free(a);
6890Sstevel@tonic-gate 		}
6900Sstevel@tonic-gate 		tmpt = t->tr_next;
6910Sstevel@tonic-gate 		free(t);
6920Sstevel@tonic-gate 	}
6930Sstevel@tonic-gate }
694