xref: /onnv-gate/usr/src/lib/libc/port/gen/select_large_fdset.c (revision 1778:6357a59054f7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*	Copyright (c) 1988 AT&T	*/
30 /*	  All Rights Reserved  	*/
31 
32 /*
33  * Emulation of select() system call using _pollsys() system call.
34  *
35  * Assumptions:
36  *	polling for input only is most common.
37  *	polling for exceptional conditions is very rare.
38  *
39  * Note that is it not feasible to emulate all error conditions,
40  * in particular conditions that would return EFAULT are far too
41  * difficult to check for in a library routine.
42  *
43  * This is the alternate large fd_set select.
44  *
45  */
46 
47 /*
48  * Must precede any include files
49  */
50 #ifdef FD_SETSIZE
51 #undef FD_SETSIZE
52 #endif
53 #define	FD_SETSIZE 65536
54 
55 /*
56  * We do not #redefine the name since the only users of this
57  * are external to the libraries and commands.
58  *
59  *  #pragma weak pselect_large_fdset = _pselect_large_fdset
60  *  #pragma weak select_large_fdset = _select_large_fdset
61  */
62 
63 #include "synonyms.h"
64 #include <values.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <errno.h>
68 #include <sys/time.h>
69 #include <sys/types.h>
70 #include <sys/poll.h>
71 #include <string.h>
72 #include <stdlib.h>
73 #include "libc.h"
74 
75 #define	DEFAULT_POLL_SIZE 64
76 
77 static struct pollfd *realloc_fds(int *, struct pollfd **, struct pollfd *);
78 
79 int
80 pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
81 	const timespec_t *tsp, const sigset_t *sigmask)
82 {
83 	long *in, *out, *ex;
84 	ulong_t m;	/* bit mask */
85 	int j;		/* loop counter */
86 	ulong_t b;	/* bits to test */
87 	int n, rv;
88 	int lastj = -1;
89 	int nused;
90 
91 	/*
92 	 * Rather than have a mammoth pollfd (65K) list on the stack
93 	 * we start with a small one and then malloc larger chunks
94 	 * on the heap if necessary.
95 	 */
96 
97 	struct pollfd pfd[DEFAULT_POLL_SIZE];
98 	struct pollfd *p;
99 	struct pollfd *pfd_list;
100 	int nfds_on_list;
101 
102 	fd_set zero;
103 
104 	/*
105 	 * Check for invalid conditions at outset.
106 	 * Required for spec1170.
107 	 * SUSV3: We must behave as a cancellation point even if we fail early.
108 	 */
109 	if (nfds >= 0 && nfds <= FD_SETSIZE) {
110 		if (tsp != NULL) {
111 			if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
112 			    tsp->tv_sec < 0) {
113 				_private_testcancel();
114 				errno = EINVAL;
115 				return (-1);
116 			}
117 		}
118 	} else {
119 		_private_testcancel();
120 		errno = EINVAL;
121 		return (-1);
122 	}
123 
124 	/*
125 	 * If any input args are null, point them at the null array.
126 	 */
127 	(void) memset(&zero, 0, sizeof (fd_set));
128 	if (in0 == NULL)
129 		in0 = &zero;
130 	if (out0 == NULL)
131 		out0 = &zero;
132 	if (ex0 == NULL)
133 		ex0 = &zero;
134 
135 	nfds_on_list = DEFAULT_POLL_SIZE;
136 	pfd_list = pfd;
137 	p = pfd_list;
138 	(void) memset(pfd, 0, sizeof (pfd));
139 	/*
140 	 * For each fd, if any bits are set convert them into
141 	 * the appropriate pollfd struct.
142 	 */
143 	in = (long *)in0->fds_bits;
144 	out = (long *)out0->fds_bits;
145 	ex = (long *)ex0->fds_bits;
146 	nused = 0;
147 	/*
148 	 * nused reflects the number of pollfd structs currently used
149 	 * less one. If realloc_fds returns 0 it is because malloc
150 	 * failed. We expect malloc() to have done the proper
151 	 * thing with errno.
152 	 */
153 	for (n = 0; n < nfds; n += NFDBITS) {
154 		b = (ulong_t)(*in | *out | *ex);
155 		for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
156 			if (b & 1) {
157 				p->fd = n + j;
158 				if (p->fd < nfds) {
159 					p->events = 0;
160 					if (*in & m)
161 						p->events |= POLLRDNORM;
162 					if (*out & m)
163 						p->events |= POLLWRNORM;
164 					if (*ex & m)
165 						p->events |= POLLRDBAND;
166 					if (nused < (nfds_on_list - 1)) {
167 						p++;
168 					} else {
169 						p = realloc_fds(
170 						    &nfds_on_list,
171 						    &pfd_list, pfd);
172 						if (p == 0) {
173 						    if (pfd_list != pfd)
174 							(void) free(pfd_list);
175 						    _private_testcancel();
176 						    return (-1);
177 						}
178 					}
179 					nused++;
180 				} else
181 					goto done;
182 			}
183 		}
184 		in++;
185 		out++;
186 		ex++;
187 	}
188 done:
189 	/*
190 	 * Now do the poll.
191 	 */
192 	do {
193 		rv = _pollsys(pfd_list, (nfds_t)nused, tsp, sigmask);
194 	} while (rv < 0 && errno == EAGAIN);
195 
196 	if (rv < 0) {		/* no need to set bit masks */
197 		if (pfd_list != pfd)
198 			(void) free(pfd_list);
199 		return (rv);
200 	} else if (rv == 0) {
201 		/*
202 		 * Clear out bit masks, just in case.
203 		 * On the assumption that usually only
204 		 * one bit mask is set, use three loops.
205 		 */
206 		if (in0 != &zero) {
207 			in = (long *)in0->fds_bits;
208 			for (n = 0; n < nfds; n += NFDBITS)
209 				*in++ = 0;
210 		}
211 		if (out0 != &zero) {
212 			out = (long *)out0->fds_bits;
213 			for (n = 0; n < nfds; n += NFDBITS)
214 				*out++ = 0;
215 		}
216 		if (ex0 != &zero) {
217 			ex = (long *)ex0->fds_bits;
218 			for (n = 0; n < nfds; n += NFDBITS)
219 				*ex++ = 0;
220 		}
221 		if (pfd_list != pfd)
222 			(void) free(pfd_list);
223 		return (0);
224 	}
225 
226 	/*
227 	 * Check for EINVAL error case first to avoid changing any bits
228 	 * if we're going to return an error.
229 	 */
230 	for (p = pfd_list, j = nused; j-- > 0; p++) {
231 		/*
232 		 * select will return EBADF immediately if any fd's
233 		 * are bad.  poll will complete the poll on the
234 		 * rest of the fd's and include the error indication
235 		 * in the returned bits.  This is a rare case so we
236 		 * accept this difference and return the error after
237 		 * doing more work than select would've done.
238 		 */
239 		if (p->revents & POLLNVAL) {
240 			errno = EBADF;
241 			if (pfd_list != pfd)
242 				(void) free(pfd_list);
243 			return (-1);
244 		}
245 		/*
246 		 * We would like to make POLLHUP available to select,
247 		 * checking to see if we have pending data to be read.
248 		 * BUT until we figure out how not to break Xsun's
249 		 * dependencies on select's existing features...
250 		 * This is what we _thought_ would work ... sigh!
251 		 */
252 		/*
253 		 * if ((p->revents & POLLHUP) &&
254 		 *	!(p->revents & (POLLRDNORM|POLLRDBAND))) {
255 		 *	errno = EINTR;
256 		 *	return (-1);
257 		 * }
258 		 */
259 	}
260 
261 	/*
262 	 * Convert results of poll back into bits
263 	 * in the argument arrays.
264 	 *
265 	 * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
266 	 * on return from poll if they were set on input, thus we don't
267 	 * worry about accidentally setting the corresponding bits in the
268 	 * zero array if the input bit masks were null.
269 	 *
270 	 * Must return number of bits set, not number of ready descriptors
271 	 * (as the man page says, and as poll() does).
272 	 */
273 	rv = 0;
274 	for (p = pfd_list; nused-- > 0; p++) {
275 		j = (int)(p->fd / NFDBITS);
276 		/* have we moved into another word of the bit mask yet? */
277 		if (j != lastj) {
278 			/* clear all output bits to start with */
279 			in = (long *)&in0->fds_bits[j];
280 			out = (long *)&out0->fds_bits[j];
281 			ex = (long *)&ex0->fds_bits[j];
282 			/*
283 			 * In case we made "zero" read-only (e.g., with
284 			 * cc -R), avoid actually storing into it.
285 			 */
286 			if (in0 != &zero)
287 				*in = 0;
288 			if (out0 != &zero)
289 				*out = 0;
290 			if (ex0 != &zero)
291 				*ex = 0;
292 			lastj = j;
293 		}
294 		if (p->revents) {
295 			m = 1L << (p->fd % NFDBITS);
296 			if (p->revents & POLLRDNORM) {
297 				*in |= m;
298 				rv++;
299 			}
300 			if (p->revents & POLLWRNORM) {
301 				*out |= m;
302 				rv++;
303 			}
304 			if (p->revents & POLLRDBAND) {
305 				*ex |= m;
306 				rv++;
307 			}
308 			/*
309 			 * Only set this bit on return if we asked about
310 			 * input conditions.
311 			 */
312 			if ((p->revents & (POLLHUP|POLLERR)) &&
313 			    (p->events & POLLRDNORM)) {
314 				if ((*in & m) == 0)
315 					rv++;	/* wasn't already set */
316 				*in |= m;
317 			}
318 			/*
319 			 * Only set this bit on return if we asked about
320 			 * output conditions.
321 			 */
322 			if ((p->revents & (POLLHUP|POLLERR)) &&
323 			    (p->events & POLLWRNORM)) {
324 				if ((*out & m) == 0)
325 					rv++;	/* wasn't already set */
326 				*out |= m;
327 			}
328 			/*
329 			 * Only set this bit on return if we asked about
330 			 * output conditions.
331 			 */
332 			if ((p->revents & (POLLHUP|POLLERR)) &&
333 			    (p->events & POLLRDBAND)) {
334 				if ((*ex & m) == 0)
335 					rv++;   /* wasn't already set */
336 				*ex |= m;
337 			}
338 		}
339 	}
340 	if (pfd_list != pfd)
341 		(void) free(pfd_list);
342 	return (rv);
343 }
344 
345 int
346 select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
347 	struct timeval *tv)
348 {
349 	timespec_t ts;
350 	timespec_t *tsp;
351 
352 	if (tv == NULL)
353 		tsp = NULL;
354 	else {
355 		/* check timeval validity */
356 		if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
357 			errno = EINVAL;
358 			return (-1);
359 		}
360 		/*
361 		 * Convert timeval to timespec.
362 		 * To preserve compatibility with past behavior,
363 		 * when select was built upon poll(2), which has a
364 		 * minimum non-zero timeout of 1 millisecond, force
365 		 * a minimum non-zero timeout of 500 microseconds.
366 		 */
367 		ts.tv_sec = tv->tv_sec;
368 		ts.tv_nsec = tv->tv_usec * 1000;
369 		if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
370 			ts.tv_nsec = 500000;
371 		tsp = &ts;
372 	}
373 
374 	return (pselect_large_fdset(nfds, in0, out0, ex0, tsp, NULL));
375 }
376 
377 /*
378  * Reallocate buffers of pollfds for our list. We malloc a new buffer
379  * and, in the case where the old buffer does not match what is passed
380  * in orig, free the buffer after copying the contents.
381  */
382 struct pollfd *
383 realloc_fds(int *num, struct pollfd **list_head, struct pollfd *orig)
384 {
385 	struct pollfd *b;
386 	int nta;
387 	int n2;
388 
389 	n2 = *num * 2;
390 	nta = n2 * sizeof (struct pollfd);
391 	b = malloc(nta);
392 	if (b) {
393 		(void) memset(b, 0, (size_t)nta);
394 		(void) memcpy(b, *list_head, nta / 2);
395 		if (*list_head != orig)
396 			(void) free (*list_head);
397 		*list_head = b;
398 		b += *num;
399 		*num = n2;
400 	}
401 	return (b);
402 }
403