xref: /onnv-gate/usr/src/cmd/fs.d/autofs/nfs_cast.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  *	nfs_cast.c : broadcast to a specific group of NFS servers
24*0Sstevel@tonic-gate  *
25*0Sstevel@tonic-gate  *	Copyright (c) 1988-1996,1998,1999,2001 by Sun Microsystems, Inc.
26*0Sstevel@tonic-gate  *	All rights reserved.
27*0Sstevel@tonic-gate  */
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
30*0Sstevel@tonic-gate 
31*0Sstevel@tonic-gate #include <stdio.h>
32*0Sstevel@tonic-gate #include <syslog.h>
33*0Sstevel@tonic-gate #include <errno.h>
34*0Sstevel@tonic-gate #include <string.h>
35*0Sstevel@tonic-gate #include <sys/types.h>
36*0Sstevel@tonic-gate #include <sys/time.h>
37*0Sstevel@tonic-gate #include <sys/resource.h>
38*0Sstevel@tonic-gate #include <unistd.h>
39*0Sstevel@tonic-gate #include <stdlib.h>
40*0Sstevel@tonic-gate #include <rpc/rpc.h>
41*0Sstevel@tonic-gate #include <rpc/clnt_soc.h>
42*0Sstevel@tonic-gate #include <rpc/nettype.h>
43*0Sstevel@tonic-gate #include <rpc/pmap_prot.h>
44*0Sstevel@tonic-gate #include <netconfig.h>
45*0Sstevel@tonic-gate #include <netdir.h>
46*0Sstevel@tonic-gate #include <nfs/nfs.h>
47*0Sstevel@tonic-gate #define	NFSCLIENT
48*0Sstevel@tonic-gate #include <locale.h>
49*0Sstevel@tonic-gate #include "automount.h"
50*0Sstevel@tonic-gate 
51*0Sstevel@tonic-gate #define	PENALTY_WEIGHT    100000
52*0Sstevel@tonic-gate 
53*0Sstevel@tonic-gate static struct tstamps {
54*0Sstevel@tonic-gate 	struct tstamps	*ts_next;
55*0Sstevel@tonic-gate 	int		ts_penalty;
56*0Sstevel@tonic-gate 	int		ts_inx;
57*0Sstevel@tonic-gate 	int		ts_rcvd;
58*0Sstevel@tonic-gate 	struct timeval	ts_timeval;
59*0Sstevel@tonic-gate };
60*0Sstevel@tonic-gate 
61*0Sstevel@tonic-gate /* A list of addresses - all belonging to the same transport */
62*0Sstevel@tonic-gate 
63*0Sstevel@tonic-gate static struct addrs {
64*0Sstevel@tonic-gate 	struct addrs		*addr_next;
65*0Sstevel@tonic-gate 	struct mapfs		*addr_mfs;
66*0Sstevel@tonic-gate 	struct nd_addrlist	*addr_addrs;
67*0Sstevel@tonic-gate 	struct tstamps		*addr_if_tstamps;
68*0Sstevel@tonic-gate };
69*0Sstevel@tonic-gate 
70*0Sstevel@tonic-gate /* A list of connectionless transports */
71*0Sstevel@tonic-gate 
72*0Sstevel@tonic-gate static struct transp {
73*0Sstevel@tonic-gate 	struct transp		*tr_next;
74*0Sstevel@tonic-gate 	int			tr_fd;
75*0Sstevel@tonic-gate 	char			*tr_device;
76*0Sstevel@tonic-gate 	struct t_bind		*tr_taddr;
77*0Sstevel@tonic-gate 	struct addrs		*tr_addrs;
78*0Sstevel@tonic-gate };
79*0Sstevel@tonic-gate 
80*0Sstevel@tonic-gate /* A list of map entries and their roundtrip times, for sorting */
81*0Sstevel@tonic-gate 
82*0Sstevel@tonic-gate struct sm {
83*0Sstevel@tonic-gate 	struct mapfs *mfs;
84*0Sstevel@tonic-gate 	struct timeval timeval;
85*0Sstevel@tonic-gate };
86*0Sstevel@tonic-gate 
87*0Sstevel@tonic-gate static void free_transports(struct transp *);
88*0Sstevel@tonic-gate static void calc_resp_time(struct timeval *);
89*0Sstevel@tonic-gate static struct mapfs *sort_responses(struct transp *);
90*0Sstevel@tonic-gate static int host_sm(const void *, const void *b);
91*0Sstevel@tonic-gate static int time_sm(const void *, const void *b);
92*0Sstevel@tonic-gate extern struct mapfs *add_mfs(struct mapfs *, int, struct mapfs **,
93*0Sstevel@tonic-gate 	struct mapfs **);
94*0Sstevel@tonic-gate 
95*0Sstevel@tonic-gate /*
96*0Sstevel@tonic-gate  * This routine is designed to be able to "ping"
97*0Sstevel@tonic-gate  * a list of hosts and create a list of responding
98*0Sstevel@tonic-gate  * hosts sorted by response time.
99*0Sstevel@tonic-gate  * This must be done without any prior
100*0Sstevel@tonic-gate  * contact with the host - therefore the "ping"
101*0Sstevel@tonic-gate  * must be to a "well-known" address.  The outstanding
102*0Sstevel@tonic-gate  * candidate here is the address of "rpcbind".
103*0Sstevel@tonic-gate  *
104*0Sstevel@tonic-gate  * A response to a ping is no guarantee that the host
105*0Sstevel@tonic-gate  * is running NFS, has a mount daemon, or exports
106*0Sstevel@tonic-gate  * the required filesystem.  If the subsequent
107*0Sstevel@tonic-gate  * mount attempt fails then the host will be marked
108*0Sstevel@tonic-gate  * "ignore" and the host list will be re-pinged
109*0Sstevel@tonic-gate  * (sans the bad host). This process continues
110*0Sstevel@tonic-gate  * until a successful mount is achieved or until
111*0Sstevel@tonic-gate  * there are no hosts left to try.
112*0Sstevel@tonic-gate  */
113*0Sstevel@tonic-gate enum clnt_stat
114*0Sstevel@tonic-gate nfs_cast(struct mapfs *mfs_in, struct mapfs **mfs_out, int timeout)
115*0Sstevel@tonic-gate {
116*0Sstevel@tonic-gate 	enum clnt_stat stat;
117*0Sstevel@tonic-gate 	AUTH *sys_auth = authsys_create_default();
118*0Sstevel@tonic-gate 	XDR xdr_stream;
119*0Sstevel@tonic-gate 	register XDR *xdrs = &xdr_stream;
120*0Sstevel@tonic-gate 	int outlen;
121*0Sstevel@tonic-gate 	int if_inx;
122*0Sstevel@tonic-gate 	int tsec;
123*0Sstevel@tonic-gate 	int flag;
124*0Sstevel@tonic-gate 	int sent, addr_cnt, rcvd, if_cnt;
125*0Sstevel@tonic-gate 	fd_set readfds, mask;
126*0Sstevel@tonic-gate 	register ulong_t xid;		/* xid - unique per addr */
127*0Sstevel@tonic-gate 	register int i;
128*0Sstevel@tonic-gate 	struct rpc_msg msg;
129*0Sstevel@tonic-gate 	struct timeval t, rcv_timeout;
130*0Sstevel@tonic-gate 	char outbuf[UDPMSGSIZE], inbuf[UDPMSGSIZE];
131*0Sstevel@tonic-gate 	struct t_unitdata t_udata, t_rdata;
132*0Sstevel@tonic-gate 	struct nd_hostserv hs;
133*0Sstevel@tonic-gate 	struct nd_addrlist *retaddrs;
134*0Sstevel@tonic-gate 	struct transp *tr_head;
135*0Sstevel@tonic-gate 	struct transp *trans, *prev_trans;
136*0Sstevel@tonic-gate 	struct addrs *a, *prev_addr;
137*0Sstevel@tonic-gate 	struct tstamps *ts, *prev_ts;
138*0Sstevel@tonic-gate 	NCONF_HANDLE *nc = NULL;
139*0Sstevel@tonic-gate 	struct netconfig *nconf;
140*0Sstevel@tonic-gate 	struct rlimit rl;
141*0Sstevel@tonic-gate 	int dtbsize;
142*0Sstevel@tonic-gate 	struct mapfs *mfs;
143*0Sstevel@tonic-gate 
144*0Sstevel@tonic-gate 	/*
145*0Sstevel@tonic-gate 	 * For each connectionless transport get a list of
146*0Sstevel@tonic-gate 	 * host addresses.  Any single host may have
147*0Sstevel@tonic-gate 	 * addresses on several transports.
148*0Sstevel@tonic-gate 	 */
149*0Sstevel@tonic-gate 	addr_cnt = sent = rcvd = 0;
150*0Sstevel@tonic-gate 	tr_head = NULL;
151*0Sstevel@tonic-gate 	FD_ZERO(&mask);
152*0Sstevel@tonic-gate 
153*0Sstevel@tonic-gate 	/*
154*0Sstevel@tonic-gate 	 * Set the default select size to be the maximum FD_SETSIZE, unless
155*0Sstevel@tonic-gate 	 * the current rlimit is lower.
156*0Sstevel@tonic-gate 	 */
157*0Sstevel@tonic-gate 	dtbsize = FD_SETSIZE;
158*0Sstevel@tonic-gate 	if (getrlimit(RLIMIT_NOFILE, &rl) == 0) {
159*0Sstevel@tonic-gate 		if (rl.rlim_cur < FD_SETSIZE)
160*0Sstevel@tonic-gate 			dtbsize = rl.rlim_cur;
161*0Sstevel@tonic-gate 	}
162*0Sstevel@tonic-gate 
163*0Sstevel@tonic-gate 	prev_trans = NULL;
164*0Sstevel@tonic-gate 	prev_addr = NULL;
165*0Sstevel@tonic-gate 	prev_ts = NULL;
166*0Sstevel@tonic-gate 	for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) {
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate 		if (trace > 2)
169*0Sstevel@tonic-gate 			trace_prt(1, "nfs_cast: host=%s\n", mfs->mfs_host);
170*0Sstevel@tonic-gate 
171*0Sstevel@tonic-gate 		nc = setnetconfig();
172*0Sstevel@tonic-gate 		if (nc == NULL) {
173*0Sstevel@tonic-gate 			stat = RPC_CANTSEND;
174*0Sstevel@tonic-gate 			goto done_broad;
175*0Sstevel@tonic-gate 		}
176*0Sstevel@tonic-gate 		while (nconf = getnetconfig(nc)) {
177*0Sstevel@tonic-gate 			if (!(nconf->nc_flag & NC_VISIBLE) ||
178*0Sstevel@tonic-gate 			    nconf->nc_semantics != NC_TPI_CLTS ||
179*0Sstevel@tonic-gate 			    (strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0))
180*0Sstevel@tonic-gate 				continue;
181*0Sstevel@tonic-gate 			trans = (struct transp *)malloc(sizeof (*trans));
182*0Sstevel@tonic-gate 			if (trans == NULL) {
183*0Sstevel@tonic-gate 				syslog(LOG_ERR, "no memory");
184*0Sstevel@tonic-gate 				stat = RPC_CANTSEND;
185*0Sstevel@tonic-gate 				goto done_broad;
186*0Sstevel@tonic-gate 			}
187*0Sstevel@tonic-gate 			(void) memset(trans, 0, sizeof (*trans));
188*0Sstevel@tonic-gate 			if (tr_head == NULL)
189*0Sstevel@tonic-gate 				tr_head = trans;
190*0Sstevel@tonic-gate 			else
191*0Sstevel@tonic-gate 				prev_trans->tr_next = trans;
192*0Sstevel@tonic-gate 			prev_trans = trans;
193*0Sstevel@tonic-gate 
194*0Sstevel@tonic-gate 			trans->tr_fd = t_open(nconf->nc_device, O_RDWR, NULL);
195*0Sstevel@tonic-gate 			if (trans->tr_fd < 0) {
196*0Sstevel@tonic-gate 				syslog(LOG_ERR, "nfscast: t_open: %s:%m",
197*0Sstevel@tonic-gate 					nconf->nc_device);
198*0Sstevel@tonic-gate 				stat = RPC_CANTSEND;
199*0Sstevel@tonic-gate 				goto done_broad;
200*0Sstevel@tonic-gate 			}
201*0Sstevel@tonic-gate 			if (t_bind(trans->tr_fd, (struct t_bind *)NULL,
202*0Sstevel@tonic-gate 				(struct t_bind *)NULL) < 0) {
203*0Sstevel@tonic-gate 				syslog(LOG_ERR, "nfscast: t_bind: %m");
204*0Sstevel@tonic-gate 				stat = RPC_CANTSEND;
205*0Sstevel@tonic-gate 				goto done_broad;
206*0Sstevel@tonic-gate 			}
207*0Sstevel@tonic-gate 			trans->tr_taddr =
208*0Sstevel@tonic-gate 				/* LINTED pointer alignment */
209*0Sstevel@tonic-gate 			(struct t_bind *)t_alloc(trans->tr_fd, T_BIND, T_ADDR);
210*0Sstevel@tonic-gate 			if (trans->tr_taddr == (struct t_bind *)NULL) {
211*0Sstevel@tonic-gate 				syslog(LOG_ERR, "nfscast: t_alloc: %m");
212*0Sstevel@tonic-gate 				stat = RPC_SYSTEMERROR;
213*0Sstevel@tonic-gate 				goto done_broad;
214*0Sstevel@tonic-gate 			}
215*0Sstevel@tonic-gate 
216*0Sstevel@tonic-gate 			trans->tr_device = nconf->nc_device;
217*0Sstevel@tonic-gate 			FD_SET(trans->tr_fd, &mask);
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate 			if_inx = 0;
220*0Sstevel@tonic-gate 			hs.h_host = mfs->mfs_host;
221*0Sstevel@tonic-gate 			hs.h_serv = "rpcbind";
222*0Sstevel@tonic-gate 			if (netdir_getbyname(nconf, &hs, &retaddrs) == ND_OK) {
223*0Sstevel@tonic-gate 
224*0Sstevel@tonic-gate 				/*
225*0Sstevel@tonic-gate 				 * If mfs->ignore is previously set for
226*0Sstevel@tonic-gate 				 * this map, clear it. Because a host can
227*0Sstevel@tonic-gate 				 * have either v6 or v4 address
228*0Sstevel@tonic-gate 				 */
229*0Sstevel@tonic-gate 				if (mfs->mfs_ignore == 1)
230*0Sstevel@tonic-gate 					mfs->mfs_ignore = 0;
231*0Sstevel@tonic-gate 
232*0Sstevel@tonic-gate 				a = (struct addrs *)malloc(sizeof (*a));
233*0Sstevel@tonic-gate 				if (a == NULL) {
234*0Sstevel@tonic-gate 					syslog(LOG_ERR, "no memory");
235*0Sstevel@tonic-gate 					stat = RPC_CANTSEND;
236*0Sstevel@tonic-gate 					goto done_broad;
237*0Sstevel@tonic-gate 				}
238*0Sstevel@tonic-gate 				(void) memset(a, 0, sizeof (*a));
239*0Sstevel@tonic-gate 				if (trans->tr_addrs == NULL)
240*0Sstevel@tonic-gate 					trans->tr_addrs = a;
241*0Sstevel@tonic-gate 				else
242*0Sstevel@tonic-gate 					prev_addr->addr_next = a;
243*0Sstevel@tonic-gate 				prev_addr = a;
244*0Sstevel@tonic-gate 				a->addr_if_tstamps = NULL;
245*0Sstevel@tonic-gate 				a->addr_mfs = mfs;
246*0Sstevel@tonic-gate 				a->addr_addrs = retaddrs;
247*0Sstevel@tonic-gate 				if_cnt = retaddrs->n_cnt;
248*0Sstevel@tonic-gate 				while (if_cnt--) {
249*0Sstevel@tonic-gate 					ts = (struct tstamps *)
250*0Sstevel@tonic-gate 						malloc(sizeof (*ts));
251*0Sstevel@tonic-gate 					if (ts == NULL) {
252*0Sstevel@tonic-gate 						syslog(LOG_ERR, "no memory");
253*0Sstevel@tonic-gate 						stat = RPC_CANTSEND;
254*0Sstevel@tonic-gate 						goto done_broad;
255*0Sstevel@tonic-gate 					}
256*0Sstevel@tonic-gate 					(void) memset(ts, 0, sizeof (*ts));
257*0Sstevel@tonic-gate 					ts->ts_penalty = mfs->mfs_penalty;
258*0Sstevel@tonic-gate 					if (a->addr_if_tstamps == NULL)
259*0Sstevel@tonic-gate 						a->addr_if_tstamps = ts;
260*0Sstevel@tonic-gate 					else
261*0Sstevel@tonic-gate 						prev_ts->ts_next = ts;
262*0Sstevel@tonic-gate 					prev_ts = ts;
263*0Sstevel@tonic-gate 					ts->ts_inx = if_inx++;
264*0Sstevel@tonic-gate 					addr_cnt++;
265*0Sstevel@tonic-gate 				}
266*0Sstevel@tonic-gate 				break;
267*0Sstevel@tonic-gate 			} else {
268*0Sstevel@tonic-gate 				mfs->mfs_ignore = 1;
269*0Sstevel@tonic-gate 				if (verbose)
270*0Sstevel@tonic-gate 					syslog(LOG_ERR,
271*0Sstevel@tonic-gate 				"%s:%s address not known",
272*0Sstevel@tonic-gate 				mfs->mfs_host,
273*0Sstevel@tonic-gate 				strcmp(nconf->nc_proto, NC_INET)?"IPv6":"IPv4");
274*0Sstevel@tonic-gate 			}
275*0Sstevel@tonic-gate 		} /* while */
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate 		endnetconfig(nc);
278*0Sstevel@tonic-gate 		nc = NULL;
279*0Sstevel@tonic-gate 	} /* for */
280*0Sstevel@tonic-gate 	if (addr_cnt == 0) {
281*0Sstevel@tonic-gate 		syslog(LOG_ERR, "nfscast: couldn't find addresses");
282*0Sstevel@tonic-gate 		stat = RPC_CANTSEND;
283*0Sstevel@tonic-gate 		goto done_broad;
284*0Sstevel@tonic-gate 	}
285*0Sstevel@tonic-gate 
286*0Sstevel@tonic-gate 	(void) gettimeofday(&t, (struct timezone *)0);
287*0Sstevel@tonic-gate 	xid = (getpid() ^ t.tv_sec ^ t.tv_usec) & ~0xFF;
288*0Sstevel@tonic-gate 	t.tv_usec = 0;
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate 	/* serialize the RPC header */
291*0Sstevel@tonic-gate 
292*0Sstevel@tonic-gate 	msg.rm_direction = CALL;
293*0Sstevel@tonic-gate 	msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
294*0Sstevel@tonic-gate 	msg.rm_call.cb_prog = RPCBPROG;
295*0Sstevel@tonic-gate 	/*
296*0Sstevel@tonic-gate 	 * we can not use RPCBVERS here since it doesn't exist in 4.X,
297*0Sstevel@tonic-gate 	 * the fix to bug 1139883 has made the 4.X portmapper silent to
298*0Sstevel@tonic-gate 	 * version mismatches. This causes the RPC call to the remote
299*0Sstevel@tonic-gate 	 * portmapper to simply be ignored if it's not Version 2.
300*0Sstevel@tonic-gate 	 */
301*0Sstevel@tonic-gate 	msg.rm_call.cb_vers = PMAPVERS;
302*0Sstevel@tonic-gate 	msg.rm_call.cb_proc = NULLPROC;
303*0Sstevel@tonic-gate 	if (sys_auth == (AUTH *)NULL) {
304*0Sstevel@tonic-gate 		stat = RPC_SYSTEMERROR;
305*0Sstevel@tonic-gate 		goto done_broad;
306*0Sstevel@tonic-gate 	}
307*0Sstevel@tonic-gate 	msg.rm_call.cb_cred = sys_auth->ah_cred;
308*0Sstevel@tonic-gate 	msg.rm_call.cb_verf = sys_auth->ah_verf;
309*0Sstevel@tonic-gate 	xdrmem_create(xdrs, outbuf, sizeof (outbuf), XDR_ENCODE);
310*0Sstevel@tonic-gate 	if (! xdr_callmsg(xdrs, &msg)) {
311*0Sstevel@tonic-gate 		stat = RPC_CANTENCODEARGS;
312*0Sstevel@tonic-gate 		goto done_broad;
313*0Sstevel@tonic-gate 	}
314*0Sstevel@tonic-gate 	outlen = (int)xdr_getpos(xdrs);
315*0Sstevel@tonic-gate 	xdr_destroy(xdrs);
316*0Sstevel@tonic-gate 
317*0Sstevel@tonic-gate 	t_udata.opt.len = 0;
318*0Sstevel@tonic-gate 	t_udata.udata.buf = outbuf;
319*0Sstevel@tonic-gate 	t_udata.udata.len = outlen;
320*0Sstevel@tonic-gate 
321*0Sstevel@tonic-gate 	/*
322*0Sstevel@tonic-gate 	 * Basic loop: send packet to all hosts and wait for response(s).
323*0Sstevel@tonic-gate 	 * The response timeout grows larger per iteration.
324*0Sstevel@tonic-gate 	 * A unique xid is assigned to each address in order to
325*0Sstevel@tonic-gate 	 * correctly match the replies.
326*0Sstevel@tonic-gate 	 */
327*0Sstevel@tonic-gate 	for (tsec = 4; timeout > 0; tsec *= 2) {
328*0Sstevel@tonic-gate 
329*0Sstevel@tonic-gate 		timeout -= tsec;
330*0Sstevel@tonic-gate 		if (timeout <= 0)
331*0Sstevel@tonic-gate 			tsec += timeout;
332*0Sstevel@tonic-gate 
333*0Sstevel@tonic-gate 		rcv_timeout.tv_sec = tsec;
334*0Sstevel@tonic-gate 		rcv_timeout.tv_usec = 0;
335*0Sstevel@tonic-gate 
336*0Sstevel@tonic-gate 		sent = 0;
337*0Sstevel@tonic-gate 		for (trans = tr_head; trans; trans = trans->tr_next) {
338*0Sstevel@tonic-gate 			for (a = trans->tr_addrs; a; a = a->addr_next) {
339*0Sstevel@tonic-gate 				struct netbuf *if_netbuf =
340*0Sstevel@tonic-gate 					a->addr_addrs->n_addrs;
341*0Sstevel@tonic-gate 				ts = a->addr_if_tstamps;
342*0Sstevel@tonic-gate 				if_cnt = a->addr_addrs->n_cnt;
343*0Sstevel@tonic-gate 				while (if_cnt--) {
344*0Sstevel@tonic-gate 
345*0Sstevel@tonic-gate 					/*
346*0Sstevel@tonic-gate 					 * xid is the first thing in
347*0Sstevel@tonic-gate 					 * preserialized buffer
348*0Sstevel@tonic-gate 					 */
349*0Sstevel@tonic-gate 					/* LINTED pointer alignment */
350*0Sstevel@tonic-gate 					*((ulong_t *)outbuf) =
351*0Sstevel@tonic-gate 						htonl(xid + ts->ts_inx);
352*0Sstevel@tonic-gate 					(void) gettimeofday(&(ts->ts_timeval),
353*0Sstevel@tonic-gate 						(struct timezone *)0);
354*0Sstevel@tonic-gate 					/*
355*0Sstevel@tonic-gate 					 * Check if already received
356*0Sstevel@tonic-gate 					 * from a previous iteration.
357*0Sstevel@tonic-gate 					 */
358*0Sstevel@tonic-gate 					if (ts->ts_rcvd) {
359*0Sstevel@tonic-gate 						sent++;
360*0Sstevel@tonic-gate 						ts = ts->ts_next;
361*0Sstevel@tonic-gate 						continue;
362*0Sstevel@tonic-gate 					}
363*0Sstevel@tonic-gate 
364*0Sstevel@tonic-gate 					t_udata.addr = *if_netbuf++;
365*0Sstevel@tonic-gate 
366*0Sstevel@tonic-gate 					if (t_sndudata(trans->tr_fd,
367*0Sstevel@tonic-gate 							&t_udata) == 0) {
368*0Sstevel@tonic-gate 						sent++;
369*0Sstevel@tonic-gate 					}
370*0Sstevel@tonic-gate 
371*0Sstevel@tonic-gate 					ts = ts->ts_next;
372*0Sstevel@tonic-gate 				}
373*0Sstevel@tonic-gate 			}
374*0Sstevel@tonic-gate 		}
375*0Sstevel@tonic-gate 		if (sent == 0) {		/* no packets sent ? */
376*0Sstevel@tonic-gate 			stat = RPC_CANTSEND;
377*0Sstevel@tonic-gate 			goto done_broad;
378*0Sstevel@tonic-gate 		}
379*0Sstevel@tonic-gate 
380*0Sstevel@tonic-gate 		/*
381*0Sstevel@tonic-gate 		 * Have sent all the packets.  Now collect the responses...
382*0Sstevel@tonic-gate 		 */
383*0Sstevel@tonic-gate 		rcvd = 0;
384*0Sstevel@tonic-gate 	recv_again:
385*0Sstevel@tonic-gate 		msg.acpted_rply.ar_verf = _null_auth;
386*0Sstevel@tonic-gate 		msg.acpted_rply.ar_results.proc = xdr_void;
387*0Sstevel@tonic-gate 		readfds = mask;
388*0Sstevel@tonic-gate 
389*0Sstevel@tonic-gate 		switch (select(dtbsize, &readfds,
390*0Sstevel@tonic-gate 			(fd_set *)NULL, (fd_set *)NULL, &rcv_timeout)) {
391*0Sstevel@tonic-gate 
392*0Sstevel@tonic-gate 		case 0: /* Timed out */
393*0Sstevel@tonic-gate 			/*
394*0Sstevel@tonic-gate 			 * If we got at least one response in the
395*0Sstevel@tonic-gate 			 * last interval, then don't wait for any
396*0Sstevel@tonic-gate 			 * more.  In theory we should wait for
397*0Sstevel@tonic-gate 			 * the max weighting (penalty) value so
398*0Sstevel@tonic-gate 			 * that a very slow server has a chance to
399*0Sstevel@tonic-gate 			 * respond but this could take a long time
400*0Sstevel@tonic-gate 			 * if the admin has set a high weighting
401*0Sstevel@tonic-gate 			 * value.
402*0Sstevel@tonic-gate 			 */
403*0Sstevel@tonic-gate 			if (rcvd > 0)
404*0Sstevel@tonic-gate 				goto done_broad;
405*0Sstevel@tonic-gate 
406*0Sstevel@tonic-gate 			stat = RPC_TIMEDOUT;
407*0Sstevel@tonic-gate 			continue;
408*0Sstevel@tonic-gate 
409*0Sstevel@tonic-gate 		case -1:  /* some kind of error */
410*0Sstevel@tonic-gate 			if (errno == EINTR)
411*0Sstevel@tonic-gate 				goto recv_again;
412*0Sstevel@tonic-gate 			syslog(LOG_ERR, "nfscast: select: %m");
413*0Sstevel@tonic-gate 			if (rcvd == 0)
414*0Sstevel@tonic-gate 				stat = RPC_CANTRECV;
415*0Sstevel@tonic-gate 			goto done_broad;
416*0Sstevel@tonic-gate 
417*0Sstevel@tonic-gate 		}  /* end of select results switch */
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate 		for (trans = tr_head; trans; trans = trans->tr_next) {
420*0Sstevel@tonic-gate 			if (FD_ISSET(trans->tr_fd, &readfds))
421*0Sstevel@tonic-gate 				break;
422*0Sstevel@tonic-gate 		}
423*0Sstevel@tonic-gate 		if (trans == NULL)
424*0Sstevel@tonic-gate 			goto recv_again;
425*0Sstevel@tonic-gate 
426*0Sstevel@tonic-gate 	try_again:
427*0Sstevel@tonic-gate 		t_rdata.addr = trans->tr_taddr->addr;
428*0Sstevel@tonic-gate 		t_rdata.udata.buf = inbuf;
429*0Sstevel@tonic-gate 		t_rdata.udata.maxlen = sizeof (inbuf);
430*0Sstevel@tonic-gate 		t_rdata.udata.len = 0;
431*0Sstevel@tonic-gate 		t_rdata.opt.len = 0;
432*0Sstevel@tonic-gate 		if (t_rcvudata(trans->tr_fd, &t_rdata, &flag) < 0) {
433*0Sstevel@tonic-gate 			if (errno == EINTR)
434*0Sstevel@tonic-gate 				goto try_again;
435*0Sstevel@tonic-gate 			syslog(LOG_ERR, "nfscast: t_rcvudata: %s:%m",
436*0Sstevel@tonic-gate 				trans->tr_device);
437*0Sstevel@tonic-gate 			stat = RPC_CANTRECV;
438*0Sstevel@tonic-gate 			continue;
439*0Sstevel@tonic-gate 		}
440*0Sstevel@tonic-gate 		if (t_rdata.udata.len < sizeof (ulong_t))
441*0Sstevel@tonic-gate 			goto recv_again;
442*0Sstevel@tonic-gate 		if (flag & T_MORE) {
443*0Sstevel@tonic-gate 			syslog(LOG_ERR,
444*0Sstevel@tonic-gate 				"nfscast: t_rcvudata: %s: buffer overflow",
445*0Sstevel@tonic-gate 				trans->tr_device);
446*0Sstevel@tonic-gate 			goto recv_again;
447*0Sstevel@tonic-gate 		}
448*0Sstevel@tonic-gate 
449*0Sstevel@tonic-gate 		/*
450*0Sstevel@tonic-gate 		 * see if reply transaction id matches sent id.
451*0Sstevel@tonic-gate 		 * If so, decode the results.
452*0Sstevel@tonic-gate 		 * Note: received addr is ignored, it could be
453*0Sstevel@tonic-gate 		 * different from the send addr if the host has
454*0Sstevel@tonic-gate 		 * more than one addr.
455*0Sstevel@tonic-gate 		 */
456*0Sstevel@tonic-gate 		xdrmem_create(xdrs, inbuf, (uint_t)t_rdata.udata.len,
457*0Sstevel@tonic-gate 								XDR_DECODE);
458*0Sstevel@tonic-gate 		if (xdr_replymsg(xdrs, &msg)) {
459*0Sstevel@tonic-gate 		    if (msg.rm_reply.rp_stat == MSG_ACCEPTED &&
460*0Sstevel@tonic-gate 			(msg.rm_xid & ~0xFF) == xid) {
461*0Sstevel@tonic-gate 			struct addrs *curr_addr;
462*0Sstevel@tonic-gate 
463*0Sstevel@tonic-gate 			i = msg.rm_xid & 0xFF;
464*0Sstevel@tonic-gate 			for (curr_addr = trans->tr_addrs; curr_addr;
465*0Sstevel@tonic-gate 			    curr_addr = curr_addr->addr_next) {
466*0Sstevel@tonic-gate 			    for (ts = curr_addr->addr_if_tstamps; ts;
467*0Sstevel@tonic-gate 				ts = ts->ts_next)
468*0Sstevel@tonic-gate 				if (ts->ts_inx == i && !ts->ts_rcvd) {
469*0Sstevel@tonic-gate 					ts->ts_rcvd = 1;
470*0Sstevel@tonic-gate 					calc_resp_time(&ts->ts_timeval);
471*0Sstevel@tonic-gate 					stat = RPC_SUCCESS;
472*0Sstevel@tonic-gate 					rcvd++;
473*0Sstevel@tonic-gate 					break;
474*0Sstevel@tonic-gate 				}
475*0Sstevel@tonic-gate 			}
476*0Sstevel@tonic-gate 		    } /* otherwise, we just ignore the errors ... */
477*0Sstevel@tonic-gate 		}
478*0Sstevel@tonic-gate 		xdrs->x_op = XDR_FREE;
479*0Sstevel@tonic-gate 		msg.acpted_rply.ar_results.proc = xdr_void;
480*0Sstevel@tonic-gate 		(void) xdr_replymsg(xdrs, &msg);
481*0Sstevel@tonic-gate 		XDR_DESTROY(xdrs);
482*0Sstevel@tonic-gate 		if (rcvd == sent)
483*0Sstevel@tonic-gate 			goto done_broad;
484*0Sstevel@tonic-gate 		else
485*0Sstevel@tonic-gate 			goto recv_again;
486*0Sstevel@tonic-gate 	}
487*0Sstevel@tonic-gate 	if (!rcvd)
488*0Sstevel@tonic-gate 		stat = RPC_TIMEDOUT;
489*0Sstevel@tonic-gate 
490*0Sstevel@tonic-gate done_broad:
491*0Sstevel@tonic-gate 	if (rcvd) {
492*0Sstevel@tonic-gate 		*mfs_out = sort_responses(tr_head);
493*0Sstevel@tonic-gate 		stat = RPC_SUCCESS;
494*0Sstevel@tonic-gate 	}
495*0Sstevel@tonic-gate 	if (nc)
496*0Sstevel@tonic-gate 		endnetconfig(nc);
497*0Sstevel@tonic-gate 	free_transports(tr_head);
498*0Sstevel@tonic-gate 	AUTH_DESTROY(sys_auth);
499*0Sstevel@tonic-gate 	return (stat);
500*0Sstevel@tonic-gate }
501*0Sstevel@tonic-gate 
502*0Sstevel@tonic-gate /*
503*0Sstevel@tonic-gate  * Go through all the responses and sort fastest to slowest.
504*0Sstevel@tonic-gate  * Note that any penalty is added to the response time - so the
505*0Sstevel@tonic-gate  * fastest response isn't necessarily the one that arrived first.
506*0Sstevel@tonic-gate  */
507*0Sstevel@tonic-gate static struct mapfs *
508*0Sstevel@tonic-gate sort_responses(trans)
509*0Sstevel@tonic-gate 	struct transp *trans;
510*0Sstevel@tonic-gate {
511*0Sstevel@tonic-gate 	struct transp *t;
512*0Sstevel@tonic-gate 	struct addrs *a;
513*0Sstevel@tonic-gate 	struct tstamps *ti;
514*0Sstevel@tonic-gate 	int i, size = 0, allocsize = 10;
515*0Sstevel@tonic-gate 	struct mapfs *p, *mfs_head = NULL, *mfs_tail = NULL;
516*0Sstevel@tonic-gate 	struct sm *buffer;
517*0Sstevel@tonic-gate 
518*0Sstevel@tonic-gate 	buffer = (struct sm *)malloc(allocsize * sizeof (struct sm));
519*0Sstevel@tonic-gate 	if (!buffer) {
520*0Sstevel@tonic-gate 		syslog(LOG_ERR, "sort_responses: malloc error.\n");
521*0Sstevel@tonic-gate 		return (NULL);
522*0Sstevel@tonic-gate 	}
523*0Sstevel@tonic-gate 
524*0Sstevel@tonic-gate 	for (t = trans; t; t = t->tr_next) {
525*0Sstevel@tonic-gate 		for (a = t->tr_addrs; a; a = a->addr_next) {
526*0Sstevel@tonic-gate 			for (ti = a->addr_if_tstamps;
527*0Sstevel@tonic-gate 				ti; ti = ti->ts_next) {
528*0Sstevel@tonic-gate 				if (!ti->ts_rcvd)
529*0Sstevel@tonic-gate 					continue;
530*0Sstevel@tonic-gate 				ti->ts_timeval.tv_usec +=
531*0Sstevel@tonic-gate 					(ti->ts_penalty * PENALTY_WEIGHT);
532*0Sstevel@tonic-gate 				if (ti->ts_timeval.tv_usec >= 1000000) {
533*0Sstevel@tonic-gate 					ti->ts_timeval.tv_sec +=
534*0Sstevel@tonic-gate 					(ti->ts_timeval.tv_usec / 1000000);
535*0Sstevel@tonic-gate 					ti->ts_timeval.tv_usec =
536*0Sstevel@tonic-gate 					(ti->ts_timeval.tv_usec % 1000000);
537*0Sstevel@tonic-gate 				}
538*0Sstevel@tonic-gate 
539*0Sstevel@tonic-gate 				if (size >= allocsize) {
540*0Sstevel@tonic-gate 					allocsize += 10;
541*0Sstevel@tonic-gate 					buffer = (struct sm *)realloc(buffer,
542*0Sstevel@tonic-gate 					    allocsize * sizeof (struct sm));
543*0Sstevel@tonic-gate 					if (!buffer) {
544*0Sstevel@tonic-gate 						syslog(LOG_ERR,
545*0Sstevel@tonic-gate 					    "sort_responses: malloc error.\n");
546*0Sstevel@tonic-gate 						return (NULL);
547*0Sstevel@tonic-gate 					}
548*0Sstevel@tonic-gate 				}
549*0Sstevel@tonic-gate 				buffer[size].timeval = ti->ts_timeval;
550*0Sstevel@tonic-gate 				buffer[size].mfs = a->addr_mfs;
551*0Sstevel@tonic-gate 				size++;
552*0Sstevel@tonic-gate 			}
553*0Sstevel@tonic-gate 		}
554*0Sstevel@tonic-gate 	}
555*0Sstevel@tonic-gate 
556*0Sstevel@tonic-gate #ifdef DEBUG
557*0Sstevel@tonic-gate 	if (trace > 3) {
558*0Sstevel@tonic-gate 		trace_prt(1, "  sort_responses: before host sort:\n");
559*0Sstevel@tonic-gate 		for (i = 0; i < size; i++)
560*0Sstevel@tonic-gate 			trace_prt(1, "    %s %d.%d\n", buffer[i].mfs->mfs_host,
561*0Sstevel@tonic-gate 			buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
562*0Sstevel@tonic-gate 		trace_prt(0, "\n");
563*0Sstevel@tonic-gate 	}
564*0Sstevel@tonic-gate #endif
565*0Sstevel@tonic-gate 
566*0Sstevel@tonic-gate 	qsort((void *)buffer, size, sizeof (struct sm), host_sm);
567*0Sstevel@tonic-gate 
568*0Sstevel@tonic-gate 	/*
569*0Sstevel@tonic-gate 	 * Cope with multiply listed hosts  by choosing first time
570*0Sstevel@tonic-gate 	 */
571*0Sstevel@tonic-gate 	for (i = 1; i < size; i++) {
572*0Sstevel@tonic-gate #ifdef DEBUG
573*0Sstevel@tonic-gate 		if (trace > 3) {
574*0Sstevel@tonic-gate 			trace_prt(1, "  sort_responses: comparing %s and %s\n",
575*0Sstevel@tonic-gate 				buffer[i-1].mfs->mfs_host,
576*0Sstevel@tonic-gate 				buffer[i].mfs->mfs_host);
577*0Sstevel@tonic-gate 		}
578*0Sstevel@tonic-gate #endif
579*0Sstevel@tonic-gate 		if (strcmp(buffer[i-1].mfs->mfs_host,
580*0Sstevel@tonic-gate 		    buffer[i].mfs->mfs_host) == 0)
581*0Sstevel@tonic-gate 			memcpy(&buffer[i].timeval, &buffer[i-1].timeval,
582*0Sstevel@tonic-gate 				sizeof (struct timeval));
583*0Sstevel@tonic-gate 	}
584*0Sstevel@tonic-gate 	if (trace > 3)
585*0Sstevel@tonic-gate 		trace_prt(0, "\n");
586*0Sstevel@tonic-gate 
587*0Sstevel@tonic-gate #ifdef DEBUG
588*0Sstevel@tonic-gate 	if (trace > 3) {
589*0Sstevel@tonic-gate 		trace_prt(1, "  sort_responses: before time sort:\n");
590*0Sstevel@tonic-gate 		for (i = 0; i < size; i++)
591*0Sstevel@tonic-gate 			trace_prt(1, "    %s %d.%d\n", buffer[i].mfs->mfs_host,
592*0Sstevel@tonic-gate 			buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
593*0Sstevel@tonic-gate 		trace_prt(0, "\n");
594*0Sstevel@tonic-gate 	}
595*0Sstevel@tonic-gate #endif
596*0Sstevel@tonic-gate 
597*0Sstevel@tonic-gate 	qsort((void *)buffer, size, sizeof (struct sm), time_sm);
598*0Sstevel@tonic-gate 
599*0Sstevel@tonic-gate #ifdef DEBUG
600*0Sstevel@tonic-gate 	if (trace > 3) {
601*0Sstevel@tonic-gate 		trace_prt(1, "  sort_responses: after sort:\n");
602*0Sstevel@tonic-gate 		for (i = 0; i < size; i++)
603*0Sstevel@tonic-gate 			trace_prt(1, "    %s %d.%d\n", buffer[i].mfs->mfs_host,
604*0Sstevel@tonic-gate 			buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
605*0Sstevel@tonic-gate 		trace_prt(0, "\n");
606*0Sstevel@tonic-gate 	}
607*0Sstevel@tonic-gate #endif
608*0Sstevel@tonic-gate 
609*0Sstevel@tonic-gate 	for (i = 0; i < size; i++) {
610*0Sstevel@tonic-gate #ifdef DEBUG
611*0Sstevel@tonic-gate 		if (trace > 3) {
612*0Sstevel@tonic-gate 			trace_prt(1, "  sort_responses: adding %s\n",
613*0Sstevel@tonic-gate 				buffer[i].mfs->mfs_host);
614*0Sstevel@tonic-gate 		}
615*0Sstevel@tonic-gate #endif
616*0Sstevel@tonic-gate 		p = add_mfs(buffer[i].mfs, 0, &mfs_head, &mfs_tail);
617*0Sstevel@tonic-gate 		if (!p)
618*0Sstevel@tonic-gate 			return (NULL);
619*0Sstevel@tonic-gate 	}
620*0Sstevel@tonic-gate 	free(buffer);
621*0Sstevel@tonic-gate 
622*0Sstevel@tonic-gate 	return (mfs_head);
623*0Sstevel@tonic-gate }
624*0Sstevel@tonic-gate 
625*0Sstevel@tonic-gate 
626*0Sstevel@tonic-gate /*
627*0Sstevel@tonic-gate  * Comparison routines called by qsort(3).
628*0Sstevel@tonic-gate  */
629*0Sstevel@tonic-gate static int host_sm(const void *a, const void *b)
630*0Sstevel@tonic-gate {
631*0Sstevel@tonic-gate 	return (strcmp(((struct sm *)a)->mfs->mfs_host,
632*0Sstevel@tonic-gate 			((struct sm *)b)->mfs->mfs_host));
633*0Sstevel@tonic-gate }
634*0Sstevel@tonic-gate 
635*0Sstevel@tonic-gate static int time_sm(const void *a, const void *b)
636*0Sstevel@tonic-gate {
637*0Sstevel@tonic-gate 	if (timercmp(&(((struct sm *)a)->timeval),
638*0Sstevel@tonic-gate 	    &(((struct sm *)b)->timeval), < /* cstyle */))
639*0Sstevel@tonic-gate 		return (-1);
640*0Sstevel@tonic-gate 	else if (timercmp(&(((struct sm *)a)->timeval),
641*0Sstevel@tonic-gate 	    &(((struct sm *)b)->timeval), > /* cstyle */))
642*0Sstevel@tonic-gate 		return (1);
643*0Sstevel@tonic-gate 	else
644*0Sstevel@tonic-gate 		return (0);
645*0Sstevel@tonic-gate }
646*0Sstevel@tonic-gate 
647*0Sstevel@tonic-gate /*
648*0Sstevel@tonic-gate  * Given send_time which is the time a request
649*0Sstevel@tonic-gate  * was transmitted to a server, subtract it
650*0Sstevel@tonic-gate  * from the time "now" thereby converting it
651*0Sstevel@tonic-gate  * to an elapsed time.
652*0Sstevel@tonic-gate  */
653*0Sstevel@tonic-gate static void
654*0Sstevel@tonic-gate calc_resp_time(send_time)
655*0Sstevel@tonic-gate struct timeval *send_time;
656*0Sstevel@tonic-gate {
657*0Sstevel@tonic-gate 	struct timeval time_now;
658*0Sstevel@tonic-gate 
659*0Sstevel@tonic-gate 	(void) gettimeofday(&time_now, (struct timezone *)0);
660*0Sstevel@tonic-gate 	if (time_now.tv_usec <  send_time->tv_usec) {
661*0Sstevel@tonic-gate 		time_now.tv_sec--;
662*0Sstevel@tonic-gate 		time_now.tv_usec += 1000000;
663*0Sstevel@tonic-gate 	}
664*0Sstevel@tonic-gate 	send_time->tv_sec = time_now.tv_sec - send_time->tv_sec;
665*0Sstevel@tonic-gate 	send_time->tv_usec = time_now.tv_usec - send_time->tv_usec;
666*0Sstevel@tonic-gate }
667*0Sstevel@tonic-gate 
668*0Sstevel@tonic-gate static void
669*0Sstevel@tonic-gate free_transports(trans)
670*0Sstevel@tonic-gate 	struct transp *trans;
671*0Sstevel@tonic-gate {
672*0Sstevel@tonic-gate 	struct transp *t, *tmpt = NULL;
673*0Sstevel@tonic-gate 	struct addrs *a, *tmpa = NULL;
674*0Sstevel@tonic-gate 	struct tstamps *ts, *tmpts = NULL;
675*0Sstevel@tonic-gate 
676*0Sstevel@tonic-gate 	for (t = trans; t; t = tmpt) {
677*0Sstevel@tonic-gate 		if (t->tr_taddr)
678*0Sstevel@tonic-gate 			(void) t_free((char *)t->tr_taddr, T_BIND);
679*0Sstevel@tonic-gate 		if (t->tr_fd > 0)
680*0Sstevel@tonic-gate 			(void) t_close(t->tr_fd);
681*0Sstevel@tonic-gate 		for (a = t->tr_addrs; a; a = tmpa) {
682*0Sstevel@tonic-gate 			for (ts = a->addr_if_tstamps; ts; ts = tmpts) {
683*0Sstevel@tonic-gate 				tmpts = ts->ts_next;
684*0Sstevel@tonic-gate 				free(ts);
685*0Sstevel@tonic-gate 			}
686*0Sstevel@tonic-gate 			(void) netdir_free((char *)a->addr_addrs, ND_ADDRLIST);
687*0Sstevel@tonic-gate 			tmpa = a->addr_next;
688*0Sstevel@tonic-gate 			free(a);
689*0Sstevel@tonic-gate 		}
690*0Sstevel@tonic-gate 		tmpt = t->tr_next;
691*0Sstevel@tonic-gate 		free(t);
692*0Sstevel@tonic-gate 	}
693*0Sstevel@tonic-gate }
694