10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
70Sstevel@tonic-gate * with the License.
80Sstevel@tonic-gate *
90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate * See the License for the specific language governing permissions
120Sstevel@tonic-gate * and limitations under the License.
130Sstevel@tonic-gate *
140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate *
200Sstevel@tonic-gate * CDDL HEADER END
210Sstevel@tonic-gate */
220Sstevel@tonic-gate /*
230Sstevel@tonic-gate * nfs_cast.c : broadcast to a specific group of NFS servers
240Sstevel@tonic-gate *
25*249Sjwahlig * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
26*249Sjwahlig * Use is subject to license terms.
270Sstevel@tonic-gate */
280Sstevel@tonic-gate
290Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
300Sstevel@tonic-gate
310Sstevel@tonic-gate #include <stdio.h>
320Sstevel@tonic-gate #include <syslog.h>
330Sstevel@tonic-gate #include <errno.h>
340Sstevel@tonic-gate #include <string.h>
350Sstevel@tonic-gate #include <sys/types.h>
360Sstevel@tonic-gate #include <sys/time.h>
370Sstevel@tonic-gate #include <sys/resource.h>
380Sstevel@tonic-gate #include <unistd.h>
390Sstevel@tonic-gate #include <stdlib.h>
400Sstevel@tonic-gate #include <rpc/rpc.h>
410Sstevel@tonic-gate #include <rpc/clnt_soc.h>
420Sstevel@tonic-gate #include <rpc/nettype.h>
430Sstevel@tonic-gate #include <rpc/pmap_prot.h>
440Sstevel@tonic-gate #include <netconfig.h>
450Sstevel@tonic-gate #include <netdir.h>
460Sstevel@tonic-gate #include <nfs/nfs.h>
470Sstevel@tonic-gate #define NFSCLIENT
480Sstevel@tonic-gate #include <locale.h>
490Sstevel@tonic-gate #include "automount.h"
500Sstevel@tonic-gate
510Sstevel@tonic-gate #define PENALTY_WEIGHT 100000
520Sstevel@tonic-gate
53*249Sjwahlig struct tstamps {
540Sstevel@tonic-gate struct tstamps *ts_next;
550Sstevel@tonic-gate int ts_penalty;
560Sstevel@tonic-gate int ts_inx;
570Sstevel@tonic-gate int ts_rcvd;
580Sstevel@tonic-gate struct timeval ts_timeval;
590Sstevel@tonic-gate };
600Sstevel@tonic-gate
610Sstevel@tonic-gate /* A list of addresses - all belonging to the same transport */
620Sstevel@tonic-gate
63*249Sjwahlig struct addrs {
640Sstevel@tonic-gate struct addrs *addr_next;
650Sstevel@tonic-gate struct mapfs *addr_mfs;
660Sstevel@tonic-gate struct nd_addrlist *addr_addrs;
670Sstevel@tonic-gate struct tstamps *addr_if_tstamps;
680Sstevel@tonic-gate };
690Sstevel@tonic-gate
700Sstevel@tonic-gate /* A list of connectionless transports */
710Sstevel@tonic-gate
72*249Sjwahlig struct transp {
730Sstevel@tonic-gate struct transp *tr_next;
740Sstevel@tonic-gate int tr_fd;
750Sstevel@tonic-gate char *tr_device;
760Sstevel@tonic-gate struct t_bind *tr_taddr;
770Sstevel@tonic-gate struct addrs *tr_addrs;
780Sstevel@tonic-gate };
790Sstevel@tonic-gate
800Sstevel@tonic-gate /* A list of map entries and their roundtrip times, for sorting */
810Sstevel@tonic-gate
820Sstevel@tonic-gate struct sm {
830Sstevel@tonic-gate struct mapfs *mfs;
840Sstevel@tonic-gate struct timeval timeval;
850Sstevel@tonic-gate };
860Sstevel@tonic-gate
870Sstevel@tonic-gate static void free_transports(struct transp *);
880Sstevel@tonic-gate static void calc_resp_time(struct timeval *);
890Sstevel@tonic-gate static struct mapfs *sort_responses(struct transp *);
900Sstevel@tonic-gate static int host_sm(const void *, const void *b);
910Sstevel@tonic-gate static int time_sm(const void *, const void *b);
920Sstevel@tonic-gate extern struct mapfs *add_mfs(struct mapfs *, int, struct mapfs **,
930Sstevel@tonic-gate struct mapfs **);
940Sstevel@tonic-gate
950Sstevel@tonic-gate /*
960Sstevel@tonic-gate * This routine is designed to be able to "ping"
970Sstevel@tonic-gate * a list of hosts and create a list of responding
980Sstevel@tonic-gate * hosts sorted by response time.
990Sstevel@tonic-gate * This must be done without any prior
1000Sstevel@tonic-gate * contact with the host - therefore the "ping"
1010Sstevel@tonic-gate * must be to a "well-known" address. The outstanding
1020Sstevel@tonic-gate * candidate here is the address of "rpcbind".
1030Sstevel@tonic-gate *
1040Sstevel@tonic-gate * A response to a ping is no guarantee that the host
1050Sstevel@tonic-gate * is running NFS, has a mount daemon, or exports
1060Sstevel@tonic-gate * the required filesystem. If the subsequent
1070Sstevel@tonic-gate * mount attempt fails then the host will be marked
1080Sstevel@tonic-gate * "ignore" and the host list will be re-pinged
1090Sstevel@tonic-gate * (sans the bad host). This process continues
1100Sstevel@tonic-gate * until a successful mount is achieved or until
1110Sstevel@tonic-gate * there are no hosts left to try.
1120Sstevel@tonic-gate */
1130Sstevel@tonic-gate enum clnt_stat
nfs_cast(struct mapfs * mfs_in,struct mapfs ** mfs_out,int timeout)1140Sstevel@tonic-gate nfs_cast(struct mapfs *mfs_in, struct mapfs **mfs_out, int timeout)
1150Sstevel@tonic-gate {
1160Sstevel@tonic-gate enum clnt_stat stat;
1170Sstevel@tonic-gate AUTH *sys_auth = authsys_create_default();
1180Sstevel@tonic-gate XDR xdr_stream;
1190Sstevel@tonic-gate register XDR *xdrs = &xdr_stream;
1200Sstevel@tonic-gate int outlen;
1210Sstevel@tonic-gate int if_inx;
1220Sstevel@tonic-gate int tsec;
1230Sstevel@tonic-gate int flag;
1240Sstevel@tonic-gate int sent, addr_cnt, rcvd, if_cnt;
1250Sstevel@tonic-gate fd_set readfds, mask;
1260Sstevel@tonic-gate register ulong_t xid; /* xid - unique per addr */
1270Sstevel@tonic-gate register int i;
1280Sstevel@tonic-gate struct rpc_msg msg;
1290Sstevel@tonic-gate struct timeval t, rcv_timeout;
1300Sstevel@tonic-gate char outbuf[UDPMSGSIZE], inbuf[UDPMSGSIZE];
1310Sstevel@tonic-gate struct t_unitdata t_udata, t_rdata;
1320Sstevel@tonic-gate struct nd_hostserv hs;
1330Sstevel@tonic-gate struct nd_addrlist *retaddrs;
1340Sstevel@tonic-gate struct transp *tr_head;
1350Sstevel@tonic-gate struct transp *trans, *prev_trans;
1360Sstevel@tonic-gate struct addrs *a, *prev_addr;
1370Sstevel@tonic-gate struct tstamps *ts, *prev_ts;
1380Sstevel@tonic-gate NCONF_HANDLE *nc = NULL;
1390Sstevel@tonic-gate struct netconfig *nconf;
1400Sstevel@tonic-gate struct rlimit rl;
1410Sstevel@tonic-gate int dtbsize;
1420Sstevel@tonic-gate struct mapfs *mfs;
1430Sstevel@tonic-gate
1440Sstevel@tonic-gate /*
1450Sstevel@tonic-gate * For each connectionless transport get a list of
1460Sstevel@tonic-gate * host addresses. Any single host may have
1470Sstevel@tonic-gate * addresses on several transports.
1480Sstevel@tonic-gate */
1490Sstevel@tonic-gate addr_cnt = sent = rcvd = 0;
1500Sstevel@tonic-gate tr_head = NULL;
1510Sstevel@tonic-gate FD_ZERO(&mask);
1520Sstevel@tonic-gate
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate * Set the default select size to be the maximum FD_SETSIZE, unless
1550Sstevel@tonic-gate * the current rlimit is lower.
1560Sstevel@tonic-gate */
1570Sstevel@tonic-gate dtbsize = FD_SETSIZE;
1580Sstevel@tonic-gate if (getrlimit(RLIMIT_NOFILE, &rl) == 0) {
1590Sstevel@tonic-gate if (rl.rlim_cur < FD_SETSIZE)
1600Sstevel@tonic-gate dtbsize = rl.rlim_cur;
1610Sstevel@tonic-gate }
1620Sstevel@tonic-gate
1630Sstevel@tonic-gate prev_trans = NULL;
1640Sstevel@tonic-gate prev_addr = NULL;
1650Sstevel@tonic-gate prev_ts = NULL;
1660Sstevel@tonic-gate for (mfs = mfs_in; mfs; mfs = mfs->mfs_next) {
1670Sstevel@tonic-gate
1680Sstevel@tonic-gate if (trace > 2)
1690Sstevel@tonic-gate trace_prt(1, "nfs_cast: host=%s\n", mfs->mfs_host);
1700Sstevel@tonic-gate
1710Sstevel@tonic-gate nc = setnetconfig();
1720Sstevel@tonic-gate if (nc == NULL) {
1730Sstevel@tonic-gate stat = RPC_CANTSEND;
1740Sstevel@tonic-gate goto done_broad;
1750Sstevel@tonic-gate }
1760Sstevel@tonic-gate while (nconf = getnetconfig(nc)) {
1770Sstevel@tonic-gate if (!(nconf->nc_flag & NC_VISIBLE) ||
1780Sstevel@tonic-gate nconf->nc_semantics != NC_TPI_CLTS ||
1790Sstevel@tonic-gate (strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0))
1800Sstevel@tonic-gate continue;
1810Sstevel@tonic-gate trans = (struct transp *)malloc(sizeof (*trans));
1820Sstevel@tonic-gate if (trans == NULL) {
1830Sstevel@tonic-gate syslog(LOG_ERR, "no memory");
1840Sstevel@tonic-gate stat = RPC_CANTSEND;
1850Sstevel@tonic-gate goto done_broad;
1860Sstevel@tonic-gate }
1870Sstevel@tonic-gate (void) memset(trans, 0, sizeof (*trans));
1880Sstevel@tonic-gate if (tr_head == NULL)
1890Sstevel@tonic-gate tr_head = trans;
1900Sstevel@tonic-gate else
1910Sstevel@tonic-gate prev_trans->tr_next = trans;
1920Sstevel@tonic-gate prev_trans = trans;
1930Sstevel@tonic-gate
1940Sstevel@tonic-gate trans->tr_fd = t_open(nconf->nc_device, O_RDWR, NULL);
1950Sstevel@tonic-gate if (trans->tr_fd < 0) {
1960Sstevel@tonic-gate syslog(LOG_ERR, "nfscast: t_open: %s:%m",
1970Sstevel@tonic-gate nconf->nc_device);
1980Sstevel@tonic-gate stat = RPC_CANTSEND;
1990Sstevel@tonic-gate goto done_broad;
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate if (t_bind(trans->tr_fd, (struct t_bind *)NULL,
2020Sstevel@tonic-gate (struct t_bind *)NULL) < 0) {
2030Sstevel@tonic-gate syslog(LOG_ERR, "nfscast: t_bind: %m");
2040Sstevel@tonic-gate stat = RPC_CANTSEND;
2050Sstevel@tonic-gate goto done_broad;
2060Sstevel@tonic-gate }
2070Sstevel@tonic-gate trans->tr_taddr =
2080Sstevel@tonic-gate /* LINTED pointer alignment */
2090Sstevel@tonic-gate (struct t_bind *)t_alloc(trans->tr_fd, T_BIND, T_ADDR);
2100Sstevel@tonic-gate if (trans->tr_taddr == (struct t_bind *)NULL) {
2110Sstevel@tonic-gate syslog(LOG_ERR, "nfscast: t_alloc: %m");
2120Sstevel@tonic-gate stat = RPC_SYSTEMERROR;
2130Sstevel@tonic-gate goto done_broad;
2140Sstevel@tonic-gate }
2150Sstevel@tonic-gate
2160Sstevel@tonic-gate trans->tr_device = nconf->nc_device;
2170Sstevel@tonic-gate FD_SET(trans->tr_fd, &mask);
2180Sstevel@tonic-gate
2190Sstevel@tonic-gate if_inx = 0;
2200Sstevel@tonic-gate hs.h_host = mfs->mfs_host;
2210Sstevel@tonic-gate hs.h_serv = "rpcbind";
2220Sstevel@tonic-gate if (netdir_getbyname(nconf, &hs, &retaddrs) == ND_OK) {
2230Sstevel@tonic-gate
2240Sstevel@tonic-gate /*
2250Sstevel@tonic-gate * If mfs->ignore is previously set for
2260Sstevel@tonic-gate * this map, clear it. Because a host can
2270Sstevel@tonic-gate * have either v6 or v4 address
2280Sstevel@tonic-gate */
2290Sstevel@tonic-gate if (mfs->mfs_ignore == 1)
2300Sstevel@tonic-gate mfs->mfs_ignore = 0;
2310Sstevel@tonic-gate
2320Sstevel@tonic-gate a = (struct addrs *)malloc(sizeof (*a));
2330Sstevel@tonic-gate if (a == NULL) {
2340Sstevel@tonic-gate syslog(LOG_ERR, "no memory");
2350Sstevel@tonic-gate stat = RPC_CANTSEND;
2360Sstevel@tonic-gate goto done_broad;
2370Sstevel@tonic-gate }
2380Sstevel@tonic-gate (void) memset(a, 0, sizeof (*a));
2390Sstevel@tonic-gate if (trans->tr_addrs == NULL)
2400Sstevel@tonic-gate trans->tr_addrs = a;
2410Sstevel@tonic-gate else
2420Sstevel@tonic-gate prev_addr->addr_next = a;
2430Sstevel@tonic-gate prev_addr = a;
2440Sstevel@tonic-gate a->addr_if_tstamps = NULL;
2450Sstevel@tonic-gate a->addr_mfs = mfs;
2460Sstevel@tonic-gate a->addr_addrs = retaddrs;
2470Sstevel@tonic-gate if_cnt = retaddrs->n_cnt;
2480Sstevel@tonic-gate while (if_cnt--) {
2490Sstevel@tonic-gate ts = (struct tstamps *)
2500Sstevel@tonic-gate malloc(sizeof (*ts));
2510Sstevel@tonic-gate if (ts == NULL) {
2520Sstevel@tonic-gate syslog(LOG_ERR, "no memory");
2530Sstevel@tonic-gate stat = RPC_CANTSEND;
2540Sstevel@tonic-gate goto done_broad;
2550Sstevel@tonic-gate }
2560Sstevel@tonic-gate (void) memset(ts, 0, sizeof (*ts));
2570Sstevel@tonic-gate ts->ts_penalty = mfs->mfs_penalty;
2580Sstevel@tonic-gate if (a->addr_if_tstamps == NULL)
2590Sstevel@tonic-gate a->addr_if_tstamps = ts;
2600Sstevel@tonic-gate else
2610Sstevel@tonic-gate prev_ts->ts_next = ts;
2620Sstevel@tonic-gate prev_ts = ts;
2630Sstevel@tonic-gate ts->ts_inx = if_inx++;
2640Sstevel@tonic-gate addr_cnt++;
2650Sstevel@tonic-gate }
2660Sstevel@tonic-gate break;
2670Sstevel@tonic-gate } else {
2680Sstevel@tonic-gate mfs->mfs_ignore = 1;
2690Sstevel@tonic-gate if (verbose)
2700Sstevel@tonic-gate syslog(LOG_ERR,
2710Sstevel@tonic-gate "%s:%s address not known",
2720Sstevel@tonic-gate mfs->mfs_host,
2730Sstevel@tonic-gate strcmp(nconf->nc_proto, NC_INET)?"IPv6":"IPv4");
2740Sstevel@tonic-gate }
2750Sstevel@tonic-gate } /* while */
2760Sstevel@tonic-gate
2770Sstevel@tonic-gate endnetconfig(nc);
2780Sstevel@tonic-gate nc = NULL;
2790Sstevel@tonic-gate } /* for */
2800Sstevel@tonic-gate if (addr_cnt == 0) {
2810Sstevel@tonic-gate syslog(LOG_ERR, "nfscast: couldn't find addresses");
2820Sstevel@tonic-gate stat = RPC_CANTSEND;
2830Sstevel@tonic-gate goto done_broad;
2840Sstevel@tonic-gate }
2850Sstevel@tonic-gate
2860Sstevel@tonic-gate (void) gettimeofday(&t, (struct timezone *)0);
2870Sstevel@tonic-gate xid = (getpid() ^ t.tv_sec ^ t.tv_usec) & ~0xFF;
2880Sstevel@tonic-gate t.tv_usec = 0;
2890Sstevel@tonic-gate
2900Sstevel@tonic-gate /* serialize the RPC header */
2910Sstevel@tonic-gate
2920Sstevel@tonic-gate msg.rm_direction = CALL;
2930Sstevel@tonic-gate msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
2940Sstevel@tonic-gate msg.rm_call.cb_prog = RPCBPROG;
2950Sstevel@tonic-gate /*
2960Sstevel@tonic-gate * we can not use RPCBVERS here since it doesn't exist in 4.X,
2970Sstevel@tonic-gate * the fix to bug 1139883 has made the 4.X portmapper silent to
2980Sstevel@tonic-gate * version mismatches. This causes the RPC call to the remote
2990Sstevel@tonic-gate * portmapper to simply be ignored if it's not Version 2.
3000Sstevel@tonic-gate */
3010Sstevel@tonic-gate msg.rm_call.cb_vers = PMAPVERS;
3020Sstevel@tonic-gate msg.rm_call.cb_proc = NULLPROC;
3030Sstevel@tonic-gate if (sys_auth == (AUTH *)NULL) {
3040Sstevel@tonic-gate stat = RPC_SYSTEMERROR;
3050Sstevel@tonic-gate goto done_broad;
3060Sstevel@tonic-gate }
3070Sstevel@tonic-gate msg.rm_call.cb_cred = sys_auth->ah_cred;
3080Sstevel@tonic-gate msg.rm_call.cb_verf = sys_auth->ah_verf;
3090Sstevel@tonic-gate xdrmem_create(xdrs, outbuf, sizeof (outbuf), XDR_ENCODE);
3100Sstevel@tonic-gate if (! xdr_callmsg(xdrs, &msg)) {
3110Sstevel@tonic-gate stat = RPC_CANTENCODEARGS;
3120Sstevel@tonic-gate goto done_broad;
3130Sstevel@tonic-gate }
3140Sstevel@tonic-gate outlen = (int)xdr_getpos(xdrs);
3150Sstevel@tonic-gate xdr_destroy(xdrs);
3160Sstevel@tonic-gate
3170Sstevel@tonic-gate t_udata.opt.len = 0;
3180Sstevel@tonic-gate t_udata.udata.buf = outbuf;
3190Sstevel@tonic-gate t_udata.udata.len = outlen;
3200Sstevel@tonic-gate
3210Sstevel@tonic-gate /*
3220Sstevel@tonic-gate * Basic loop: send packet to all hosts and wait for response(s).
3230Sstevel@tonic-gate * The response timeout grows larger per iteration.
3240Sstevel@tonic-gate * A unique xid is assigned to each address in order to
3250Sstevel@tonic-gate * correctly match the replies.
3260Sstevel@tonic-gate */
3270Sstevel@tonic-gate for (tsec = 4; timeout > 0; tsec *= 2) {
3280Sstevel@tonic-gate
3290Sstevel@tonic-gate timeout -= tsec;
3300Sstevel@tonic-gate if (timeout <= 0)
3310Sstevel@tonic-gate tsec += timeout;
3320Sstevel@tonic-gate
3330Sstevel@tonic-gate rcv_timeout.tv_sec = tsec;
3340Sstevel@tonic-gate rcv_timeout.tv_usec = 0;
3350Sstevel@tonic-gate
3360Sstevel@tonic-gate sent = 0;
3370Sstevel@tonic-gate for (trans = tr_head; trans; trans = trans->tr_next) {
3380Sstevel@tonic-gate for (a = trans->tr_addrs; a; a = a->addr_next) {
3390Sstevel@tonic-gate struct netbuf *if_netbuf =
3400Sstevel@tonic-gate a->addr_addrs->n_addrs;
3410Sstevel@tonic-gate ts = a->addr_if_tstamps;
3420Sstevel@tonic-gate if_cnt = a->addr_addrs->n_cnt;
3430Sstevel@tonic-gate while (if_cnt--) {
3440Sstevel@tonic-gate
3450Sstevel@tonic-gate /*
3460Sstevel@tonic-gate * xid is the first thing in
3470Sstevel@tonic-gate * preserialized buffer
3480Sstevel@tonic-gate */
3490Sstevel@tonic-gate /* LINTED pointer alignment */
3500Sstevel@tonic-gate *((ulong_t *)outbuf) =
3510Sstevel@tonic-gate htonl(xid + ts->ts_inx);
3520Sstevel@tonic-gate (void) gettimeofday(&(ts->ts_timeval),
3530Sstevel@tonic-gate (struct timezone *)0);
3540Sstevel@tonic-gate /*
3550Sstevel@tonic-gate * Check if already received
3560Sstevel@tonic-gate * from a previous iteration.
3570Sstevel@tonic-gate */
3580Sstevel@tonic-gate if (ts->ts_rcvd) {
3590Sstevel@tonic-gate sent++;
3600Sstevel@tonic-gate ts = ts->ts_next;
3610Sstevel@tonic-gate continue;
3620Sstevel@tonic-gate }
3630Sstevel@tonic-gate
3640Sstevel@tonic-gate t_udata.addr = *if_netbuf++;
3650Sstevel@tonic-gate
3660Sstevel@tonic-gate if (t_sndudata(trans->tr_fd,
3670Sstevel@tonic-gate &t_udata) == 0) {
3680Sstevel@tonic-gate sent++;
3690Sstevel@tonic-gate }
3700Sstevel@tonic-gate
3710Sstevel@tonic-gate ts = ts->ts_next;
3720Sstevel@tonic-gate }
3730Sstevel@tonic-gate }
3740Sstevel@tonic-gate }
3750Sstevel@tonic-gate if (sent == 0) { /* no packets sent ? */
3760Sstevel@tonic-gate stat = RPC_CANTSEND;
3770Sstevel@tonic-gate goto done_broad;
3780Sstevel@tonic-gate }
3790Sstevel@tonic-gate
3800Sstevel@tonic-gate /*
3810Sstevel@tonic-gate * Have sent all the packets. Now collect the responses...
3820Sstevel@tonic-gate */
3830Sstevel@tonic-gate rcvd = 0;
3840Sstevel@tonic-gate recv_again:
3850Sstevel@tonic-gate msg.acpted_rply.ar_verf = _null_auth;
3860Sstevel@tonic-gate msg.acpted_rply.ar_results.proc = xdr_void;
3870Sstevel@tonic-gate readfds = mask;
3880Sstevel@tonic-gate
3890Sstevel@tonic-gate switch (select(dtbsize, &readfds,
3900Sstevel@tonic-gate (fd_set *)NULL, (fd_set *)NULL, &rcv_timeout)) {
3910Sstevel@tonic-gate
3920Sstevel@tonic-gate case 0: /* Timed out */
3930Sstevel@tonic-gate /*
3940Sstevel@tonic-gate * If we got at least one response in the
3950Sstevel@tonic-gate * last interval, then don't wait for any
3960Sstevel@tonic-gate * more. In theory we should wait for
3970Sstevel@tonic-gate * the max weighting (penalty) value so
3980Sstevel@tonic-gate * that a very slow server has a chance to
3990Sstevel@tonic-gate * respond but this could take a long time
4000Sstevel@tonic-gate * if the admin has set a high weighting
4010Sstevel@tonic-gate * value.
4020Sstevel@tonic-gate */
4030Sstevel@tonic-gate if (rcvd > 0)
4040Sstevel@tonic-gate goto done_broad;
4050Sstevel@tonic-gate
4060Sstevel@tonic-gate stat = RPC_TIMEDOUT;
4070Sstevel@tonic-gate continue;
4080Sstevel@tonic-gate
4090Sstevel@tonic-gate case -1: /* some kind of error */
4100Sstevel@tonic-gate if (errno == EINTR)
4110Sstevel@tonic-gate goto recv_again;
4120Sstevel@tonic-gate syslog(LOG_ERR, "nfscast: select: %m");
4130Sstevel@tonic-gate if (rcvd == 0)
4140Sstevel@tonic-gate stat = RPC_CANTRECV;
4150Sstevel@tonic-gate goto done_broad;
4160Sstevel@tonic-gate
4170Sstevel@tonic-gate } /* end of select results switch */
4180Sstevel@tonic-gate
4190Sstevel@tonic-gate for (trans = tr_head; trans; trans = trans->tr_next) {
4200Sstevel@tonic-gate if (FD_ISSET(trans->tr_fd, &readfds))
4210Sstevel@tonic-gate break;
4220Sstevel@tonic-gate }
4230Sstevel@tonic-gate if (trans == NULL)
4240Sstevel@tonic-gate goto recv_again;
4250Sstevel@tonic-gate
4260Sstevel@tonic-gate try_again:
4270Sstevel@tonic-gate t_rdata.addr = trans->tr_taddr->addr;
4280Sstevel@tonic-gate t_rdata.udata.buf = inbuf;
4290Sstevel@tonic-gate t_rdata.udata.maxlen = sizeof (inbuf);
4300Sstevel@tonic-gate t_rdata.udata.len = 0;
4310Sstevel@tonic-gate t_rdata.opt.len = 0;
4320Sstevel@tonic-gate if (t_rcvudata(trans->tr_fd, &t_rdata, &flag) < 0) {
4330Sstevel@tonic-gate if (errno == EINTR)
4340Sstevel@tonic-gate goto try_again;
4350Sstevel@tonic-gate syslog(LOG_ERR, "nfscast: t_rcvudata: %s:%m",
4360Sstevel@tonic-gate trans->tr_device);
4370Sstevel@tonic-gate stat = RPC_CANTRECV;
4380Sstevel@tonic-gate continue;
4390Sstevel@tonic-gate }
4400Sstevel@tonic-gate if (t_rdata.udata.len < sizeof (ulong_t))
4410Sstevel@tonic-gate goto recv_again;
4420Sstevel@tonic-gate if (flag & T_MORE) {
4430Sstevel@tonic-gate syslog(LOG_ERR,
4440Sstevel@tonic-gate "nfscast: t_rcvudata: %s: buffer overflow",
4450Sstevel@tonic-gate trans->tr_device);
4460Sstevel@tonic-gate goto recv_again;
4470Sstevel@tonic-gate }
4480Sstevel@tonic-gate
4490Sstevel@tonic-gate /*
4500Sstevel@tonic-gate * see if reply transaction id matches sent id.
4510Sstevel@tonic-gate * If so, decode the results.
4520Sstevel@tonic-gate * Note: received addr is ignored, it could be
4530Sstevel@tonic-gate * different from the send addr if the host has
4540Sstevel@tonic-gate * more than one addr.
4550Sstevel@tonic-gate */
4560Sstevel@tonic-gate xdrmem_create(xdrs, inbuf, (uint_t)t_rdata.udata.len,
4570Sstevel@tonic-gate XDR_DECODE);
4580Sstevel@tonic-gate if (xdr_replymsg(xdrs, &msg)) {
4590Sstevel@tonic-gate if (msg.rm_reply.rp_stat == MSG_ACCEPTED &&
4600Sstevel@tonic-gate (msg.rm_xid & ~0xFF) == xid) {
4610Sstevel@tonic-gate struct addrs *curr_addr;
4620Sstevel@tonic-gate
4630Sstevel@tonic-gate i = msg.rm_xid & 0xFF;
4640Sstevel@tonic-gate for (curr_addr = trans->tr_addrs; curr_addr;
4650Sstevel@tonic-gate curr_addr = curr_addr->addr_next) {
4660Sstevel@tonic-gate for (ts = curr_addr->addr_if_tstamps; ts;
4670Sstevel@tonic-gate ts = ts->ts_next)
4680Sstevel@tonic-gate if (ts->ts_inx == i && !ts->ts_rcvd) {
4690Sstevel@tonic-gate ts->ts_rcvd = 1;
4700Sstevel@tonic-gate calc_resp_time(&ts->ts_timeval);
4710Sstevel@tonic-gate stat = RPC_SUCCESS;
4720Sstevel@tonic-gate rcvd++;
4730Sstevel@tonic-gate break;
4740Sstevel@tonic-gate }
4750Sstevel@tonic-gate }
4760Sstevel@tonic-gate } /* otherwise, we just ignore the errors ... */
4770Sstevel@tonic-gate }
4780Sstevel@tonic-gate xdrs->x_op = XDR_FREE;
4790Sstevel@tonic-gate msg.acpted_rply.ar_results.proc = xdr_void;
4800Sstevel@tonic-gate (void) xdr_replymsg(xdrs, &msg);
4810Sstevel@tonic-gate XDR_DESTROY(xdrs);
4820Sstevel@tonic-gate if (rcvd == sent)
4830Sstevel@tonic-gate goto done_broad;
4840Sstevel@tonic-gate else
4850Sstevel@tonic-gate goto recv_again;
4860Sstevel@tonic-gate }
4870Sstevel@tonic-gate if (!rcvd)
4880Sstevel@tonic-gate stat = RPC_TIMEDOUT;
4890Sstevel@tonic-gate
4900Sstevel@tonic-gate done_broad:
4910Sstevel@tonic-gate if (rcvd) {
4920Sstevel@tonic-gate *mfs_out = sort_responses(tr_head);
4930Sstevel@tonic-gate stat = RPC_SUCCESS;
4940Sstevel@tonic-gate }
4950Sstevel@tonic-gate if (nc)
4960Sstevel@tonic-gate endnetconfig(nc);
4970Sstevel@tonic-gate free_transports(tr_head);
4980Sstevel@tonic-gate AUTH_DESTROY(sys_auth);
4990Sstevel@tonic-gate return (stat);
5000Sstevel@tonic-gate }
5010Sstevel@tonic-gate
5020Sstevel@tonic-gate /*
5030Sstevel@tonic-gate * Go through all the responses and sort fastest to slowest.
5040Sstevel@tonic-gate * Note that any penalty is added to the response time - so the
5050Sstevel@tonic-gate * fastest response isn't necessarily the one that arrived first.
5060Sstevel@tonic-gate */
5070Sstevel@tonic-gate static struct mapfs *
sort_responses(trans)5080Sstevel@tonic-gate sort_responses(trans)
5090Sstevel@tonic-gate struct transp *trans;
5100Sstevel@tonic-gate {
5110Sstevel@tonic-gate struct transp *t;
5120Sstevel@tonic-gate struct addrs *a;
5130Sstevel@tonic-gate struct tstamps *ti;
5140Sstevel@tonic-gate int i, size = 0, allocsize = 10;
5150Sstevel@tonic-gate struct mapfs *p, *mfs_head = NULL, *mfs_tail = NULL;
5160Sstevel@tonic-gate struct sm *buffer;
5170Sstevel@tonic-gate
5180Sstevel@tonic-gate buffer = (struct sm *)malloc(allocsize * sizeof (struct sm));
5190Sstevel@tonic-gate if (!buffer) {
5200Sstevel@tonic-gate syslog(LOG_ERR, "sort_responses: malloc error.\n");
5210Sstevel@tonic-gate return (NULL);
5220Sstevel@tonic-gate }
5230Sstevel@tonic-gate
5240Sstevel@tonic-gate for (t = trans; t; t = t->tr_next) {
5250Sstevel@tonic-gate for (a = t->tr_addrs; a; a = a->addr_next) {
5260Sstevel@tonic-gate for (ti = a->addr_if_tstamps;
5270Sstevel@tonic-gate ti; ti = ti->ts_next) {
5280Sstevel@tonic-gate if (!ti->ts_rcvd)
5290Sstevel@tonic-gate continue;
5300Sstevel@tonic-gate ti->ts_timeval.tv_usec +=
5310Sstevel@tonic-gate (ti->ts_penalty * PENALTY_WEIGHT);
5320Sstevel@tonic-gate if (ti->ts_timeval.tv_usec >= 1000000) {
5330Sstevel@tonic-gate ti->ts_timeval.tv_sec +=
5340Sstevel@tonic-gate (ti->ts_timeval.tv_usec / 1000000);
5350Sstevel@tonic-gate ti->ts_timeval.tv_usec =
5360Sstevel@tonic-gate (ti->ts_timeval.tv_usec % 1000000);
5370Sstevel@tonic-gate }
5380Sstevel@tonic-gate
5390Sstevel@tonic-gate if (size >= allocsize) {
5400Sstevel@tonic-gate allocsize += 10;
5410Sstevel@tonic-gate buffer = (struct sm *)realloc(buffer,
5420Sstevel@tonic-gate allocsize * sizeof (struct sm));
5430Sstevel@tonic-gate if (!buffer) {
5440Sstevel@tonic-gate syslog(LOG_ERR,
5450Sstevel@tonic-gate "sort_responses: malloc error.\n");
5460Sstevel@tonic-gate return (NULL);
5470Sstevel@tonic-gate }
5480Sstevel@tonic-gate }
5490Sstevel@tonic-gate buffer[size].timeval = ti->ts_timeval;
5500Sstevel@tonic-gate buffer[size].mfs = a->addr_mfs;
5510Sstevel@tonic-gate size++;
5520Sstevel@tonic-gate }
5530Sstevel@tonic-gate }
5540Sstevel@tonic-gate }
5550Sstevel@tonic-gate
5560Sstevel@tonic-gate #ifdef DEBUG
5570Sstevel@tonic-gate if (trace > 3) {
5580Sstevel@tonic-gate trace_prt(1, " sort_responses: before host sort:\n");
5590Sstevel@tonic-gate for (i = 0; i < size; i++)
5600Sstevel@tonic-gate trace_prt(1, " %s %d.%d\n", buffer[i].mfs->mfs_host,
5610Sstevel@tonic-gate buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
5620Sstevel@tonic-gate trace_prt(0, "\n");
5630Sstevel@tonic-gate }
5640Sstevel@tonic-gate #endif
5650Sstevel@tonic-gate
5660Sstevel@tonic-gate qsort((void *)buffer, size, sizeof (struct sm), host_sm);
5670Sstevel@tonic-gate
5680Sstevel@tonic-gate /*
5690Sstevel@tonic-gate * Cope with multiply listed hosts by choosing first time
5700Sstevel@tonic-gate */
5710Sstevel@tonic-gate for (i = 1; i < size; i++) {
5720Sstevel@tonic-gate #ifdef DEBUG
5730Sstevel@tonic-gate if (trace > 3) {
5740Sstevel@tonic-gate trace_prt(1, " sort_responses: comparing %s and %s\n",
5750Sstevel@tonic-gate buffer[i-1].mfs->mfs_host,
5760Sstevel@tonic-gate buffer[i].mfs->mfs_host);
5770Sstevel@tonic-gate }
5780Sstevel@tonic-gate #endif
5790Sstevel@tonic-gate if (strcmp(buffer[i-1].mfs->mfs_host,
5800Sstevel@tonic-gate buffer[i].mfs->mfs_host) == 0)
5810Sstevel@tonic-gate memcpy(&buffer[i].timeval, &buffer[i-1].timeval,
5820Sstevel@tonic-gate sizeof (struct timeval));
5830Sstevel@tonic-gate }
5840Sstevel@tonic-gate if (trace > 3)
5850Sstevel@tonic-gate trace_prt(0, "\n");
5860Sstevel@tonic-gate
5870Sstevel@tonic-gate #ifdef DEBUG
5880Sstevel@tonic-gate if (trace > 3) {
5890Sstevel@tonic-gate trace_prt(1, " sort_responses: before time sort:\n");
5900Sstevel@tonic-gate for (i = 0; i < size; i++)
5910Sstevel@tonic-gate trace_prt(1, " %s %d.%d\n", buffer[i].mfs->mfs_host,
5920Sstevel@tonic-gate buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
5930Sstevel@tonic-gate trace_prt(0, "\n");
5940Sstevel@tonic-gate }
5950Sstevel@tonic-gate #endif
5960Sstevel@tonic-gate
5970Sstevel@tonic-gate qsort((void *)buffer, size, sizeof (struct sm), time_sm);
5980Sstevel@tonic-gate
5990Sstevel@tonic-gate #ifdef DEBUG
6000Sstevel@tonic-gate if (trace > 3) {
6010Sstevel@tonic-gate trace_prt(1, " sort_responses: after sort:\n");
6020Sstevel@tonic-gate for (i = 0; i < size; i++)
6030Sstevel@tonic-gate trace_prt(1, " %s %d.%d\n", buffer[i].mfs->mfs_host,
6040Sstevel@tonic-gate buffer[i].timeval.tv_sec, buffer[i].timeval.tv_usec);
6050Sstevel@tonic-gate trace_prt(0, "\n");
6060Sstevel@tonic-gate }
6070Sstevel@tonic-gate #endif
6080Sstevel@tonic-gate
6090Sstevel@tonic-gate for (i = 0; i < size; i++) {
6100Sstevel@tonic-gate #ifdef DEBUG
6110Sstevel@tonic-gate if (trace > 3) {
6120Sstevel@tonic-gate trace_prt(1, " sort_responses: adding %s\n",
6130Sstevel@tonic-gate buffer[i].mfs->mfs_host);
6140Sstevel@tonic-gate }
6150Sstevel@tonic-gate #endif
6160Sstevel@tonic-gate p = add_mfs(buffer[i].mfs, 0, &mfs_head, &mfs_tail);
6170Sstevel@tonic-gate if (!p)
6180Sstevel@tonic-gate return (NULL);
6190Sstevel@tonic-gate }
6200Sstevel@tonic-gate free(buffer);
6210Sstevel@tonic-gate
6220Sstevel@tonic-gate return (mfs_head);
6230Sstevel@tonic-gate }
6240Sstevel@tonic-gate
6250Sstevel@tonic-gate
6260Sstevel@tonic-gate /*
6270Sstevel@tonic-gate * Comparison routines called by qsort(3).
6280Sstevel@tonic-gate */
host_sm(const void * a,const void * b)6290Sstevel@tonic-gate static int host_sm(const void *a, const void *b)
6300Sstevel@tonic-gate {
6310Sstevel@tonic-gate return (strcmp(((struct sm *)a)->mfs->mfs_host,
6320Sstevel@tonic-gate ((struct sm *)b)->mfs->mfs_host));
6330Sstevel@tonic-gate }
6340Sstevel@tonic-gate
time_sm(const void * a,const void * b)6350Sstevel@tonic-gate static int time_sm(const void *a, const void *b)
6360Sstevel@tonic-gate {
6370Sstevel@tonic-gate if (timercmp(&(((struct sm *)a)->timeval),
6380Sstevel@tonic-gate &(((struct sm *)b)->timeval), < /* cstyle */))
6390Sstevel@tonic-gate return (-1);
6400Sstevel@tonic-gate else if (timercmp(&(((struct sm *)a)->timeval),
6410Sstevel@tonic-gate &(((struct sm *)b)->timeval), > /* cstyle */))
6420Sstevel@tonic-gate return (1);
6430Sstevel@tonic-gate else
6440Sstevel@tonic-gate return (0);
6450Sstevel@tonic-gate }
6460Sstevel@tonic-gate
6470Sstevel@tonic-gate /*
6480Sstevel@tonic-gate * Given send_time which is the time a request
6490Sstevel@tonic-gate * was transmitted to a server, subtract it
6500Sstevel@tonic-gate * from the time "now" thereby converting it
6510Sstevel@tonic-gate * to an elapsed time.
6520Sstevel@tonic-gate */
6530Sstevel@tonic-gate static void
calc_resp_time(send_time)6540Sstevel@tonic-gate calc_resp_time(send_time)
6550Sstevel@tonic-gate struct timeval *send_time;
6560Sstevel@tonic-gate {
6570Sstevel@tonic-gate struct timeval time_now;
6580Sstevel@tonic-gate
6590Sstevel@tonic-gate (void) gettimeofday(&time_now, (struct timezone *)0);
6600Sstevel@tonic-gate if (time_now.tv_usec < send_time->tv_usec) {
6610Sstevel@tonic-gate time_now.tv_sec--;
6620Sstevel@tonic-gate time_now.tv_usec += 1000000;
6630Sstevel@tonic-gate }
6640Sstevel@tonic-gate send_time->tv_sec = time_now.tv_sec - send_time->tv_sec;
6650Sstevel@tonic-gate send_time->tv_usec = time_now.tv_usec - send_time->tv_usec;
6660Sstevel@tonic-gate }
6670Sstevel@tonic-gate
6680Sstevel@tonic-gate static void
free_transports(trans)6690Sstevel@tonic-gate free_transports(trans)
6700Sstevel@tonic-gate struct transp *trans;
6710Sstevel@tonic-gate {
6720Sstevel@tonic-gate struct transp *t, *tmpt = NULL;
6730Sstevel@tonic-gate struct addrs *a, *tmpa = NULL;
6740Sstevel@tonic-gate struct tstamps *ts, *tmpts = NULL;
6750Sstevel@tonic-gate
6760Sstevel@tonic-gate for (t = trans; t; t = tmpt) {
6770Sstevel@tonic-gate if (t->tr_taddr)
6780Sstevel@tonic-gate (void) t_free((char *)t->tr_taddr, T_BIND);
6790Sstevel@tonic-gate if (t->tr_fd > 0)
6800Sstevel@tonic-gate (void) t_close(t->tr_fd);
6810Sstevel@tonic-gate for (a = t->tr_addrs; a; a = tmpa) {
6820Sstevel@tonic-gate for (ts = a->addr_if_tstamps; ts; ts = tmpts) {
6830Sstevel@tonic-gate tmpts = ts->ts_next;
6840Sstevel@tonic-gate free(ts);
6850Sstevel@tonic-gate }
6860Sstevel@tonic-gate (void) netdir_free((char *)a->addr_addrs, ND_ADDRLIST);
6870Sstevel@tonic-gate tmpa = a->addr_next;
6880Sstevel@tonic-gate free(a);
6890Sstevel@tonic-gate }
6900Sstevel@tonic-gate tmpt = t->tr_next;
6910Sstevel@tonic-gate free(t);
6920Sstevel@tonic-gate }
6930Sstevel@tonic-gate }
694