xref: /illumos-gate/usr/src/lib/libc/port/aio/posix_aio.c (revision 4b9db4f6425b1a08fca4390f446072c4a6aae8d5)
1f841f6adSraf /*
2f841f6adSraf  * CDDL HEADER START
3f841f6adSraf  *
4f841f6adSraf  * The contents of this file are subject to the terms of the
5f841f6adSraf  * Common Development and Distribution License (the "License").
6f841f6adSraf  * You may not use this file except in compliance with the License.
7f841f6adSraf  *
8f841f6adSraf  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9f841f6adSraf  * or http://www.opensolaris.org/os/licensing.
10f841f6adSraf  * See the License for the specific language governing permissions
11f841f6adSraf  * and limitations under the License.
12f841f6adSraf  *
13f841f6adSraf  * When distributing Covered Code, include this CDDL HEADER in each
14f841f6adSraf  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15f841f6adSraf  * If applicable, add the following below this CDDL HEADER, with the
16f841f6adSraf  * fields enclosed by brackets "[]" replaced with your own identifying
17f841f6adSraf  * information: Portions Copyright [yyyy] [name of copyright owner]
18f841f6adSraf  *
19f841f6adSraf  * CDDL HEADER END
20f841f6adSraf  */
21f841f6adSraf 
22f841f6adSraf /*
2375e1bcdeSPrakash Sangappa  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24f841f6adSraf  * Use is subject to license terms.
25f841f6adSraf  */
26f841f6adSraf 
27f841f6adSraf /*
28f841f6adSraf  * posix_aio.c implements the POSIX async. I/O functions.
29f841f6adSraf  *
30f841f6adSraf  *	aio_read
31f841f6adSraf  *	aio_write
32f841f6adSraf  *	aio_error
33f841f6adSraf  *	aio_return
34f841f6adSraf  *	aio_suspend
35f841f6adSraf  *	lio_listio
36f841f6adSraf  *	aio_fsync
37f841f6adSraf  *	aio_cancel
38f841f6adSraf  */
39f841f6adSraf 
407257d1b4Sraf #include "lint.h"
41f841f6adSraf #include "thr_uberdata.h"
424763305eSRobert Mustacchi #include "libc.h"
43f841f6adSraf #include "asyncio.h"
44f841f6adSraf #include <atomic.h>
45f841f6adSraf #include <sys/file.h>
46f841f6adSraf #include <sys/port.h>
47f841f6adSraf 
48f841f6adSraf cond_t	_aio_waitn_cv = DEFAULTCV;	/* wait for end of aio_waitn */
49f841f6adSraf 
50f841f6adSraf static int _aio_check_timeout(const timespec_t *, timespec_t *, int *);
51f841f6adSraf 
52f841f6adSraf /* defines for timedwait in __aio_waitn()  and __aio_suspend() */
53f841f6adSraf #define	AIO_TIMEOUT_INDEF	-1
54f841f6adSraf #define	AIO_TIMEOUT_POLL	0
55f841f6adSraf #define	AIO_TIMEOUT_WAIT	1
56f841f6adSraf #define	AIO_TIMEOUT_UNDEF	2
57f841f6adSraf 
58f841f6adSraf /*
59f841f6adSraf  * List I/O stuff
60f841f6adSraf  */
61f841f6adSraf static void _lio_list_decr(aio_lio_t *);
62f841f6adSraf static long aio_list_max = 0;
63f841f6adSraf 
64f841f6adSraf int
aio_read(aiocb_t * aiocbp)65f841f6adSraf aio_read(aiocb_t *aiocbp)
66f841f6adSraf {
676e628f27Sraf 	if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
68f841f6adSraf 		errno = EINVAL;
69f841f6adSraf 		return (-1);
70f841f6adSraf 	}
71f841f6adSraf 	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
72f841f6adSraf 		errno = EBUSY;
73f841f6adSraf 		return (-1);
74f841f6adSraf 	}
75f841f6adSraf 	if (_aio_sigev_thread(aiocbp) != 0)
76f841f6adSraf 		return (-1);
77f841f6adSraf 	aiocbp->aio_lio_opcode = LIO_READ;
78f841f6adSraf 	return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAREAD,
79f841f6adSraf 	    (AIO_KAIO | AIO_NO_DUPS)));
80f841f6adSraf }
81f841f6adSraf 
82f841f6adSraf int
aio_write(aiocb_t * aiocbp)83f841f6adSraf aio_write(aiocb_t *aiocbp)
84f841f6adSraf {
856e628f27Sraf 	if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
86f841f6adSraf 		errno = EINVAL;
87f841f6adSraf 		return (-1);
88f841f6adSraf 	}
89f841f6adSraf 	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
90f841f6adSraf 		errno = EBUSY;
91f841f6adSraf 		return (-1);
92f841f6adSraf 	}
93f841f6adSraf 	if (_aio_sigev_thread(aiocbp) != 0)
94f841f6adSraf 		return (-1);
95f841f6adSraf 	aiocbp->aio_lio_opcode = LIO_WRITE;
96f841f6adSraf 	return (_aio_rw(aiocbp, NULL, &__nextworker_rw, AIOAWRITE,
97f841f6adSraf 	    (AIO_KAIO | AIO_NO_DUPS)));
98f841f6adSraf }
99f841f6adSraf 
100f841f6adSraf /*
101f841f6adSraf  * __lio_listio() cancellation handler.
102f841f6adSraf  */
103f841f6adSraf /* ARGSUSED */
104f841f6adSraf static void
_lio_listio_cleanup(aio_lio_t * head)105f841f6adSraf _lio_listio_cleanup(aio_lio_t *head)
106f841f6adSraf {
107f841f6adSraf 	int freeit = 0;
108f841f6adSraf 
109f841f6adSraf 	ASSERT(MUTEX_HELD(&head->lio_mutex));
110f841f6adSraf 	if (head->lio_refcnt == 0) {
111f841f6adSraf 		ASSERT(head->lio_nent == 0);
112f841f6adSraf 		freeit = 1;
113f841f6adSraf 	}
114f841f6adSraf 	head->lio_waiting = 0;
115f841f6adSraf 	sig_mutex_unlock(&head->lio_mutex);
116f841f6adSraf 	if (freeit)
117f841f6adSraf 		_aio_lio_free(head);
118f841f6adSraf }
119f841f6adSraf 
120f841f6adSraf int
lio_listio(int mode,aiocb_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)121f841f6adSraf lio_listio(int mode, aiocb_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
122f841f6adSraf     int nent, struct sigevent *_RESTRICT_KYWD sigevp)
123f841f6adSraf {
124f841f6adSraf 	int		aio_ufs = 0;
125f841f6adSraf 	int		oerrno = 0;
126f841f6adSraf 	aio_lio_t	*head = NULL;
127f841f6adSraf 	aiocb_t		*aiocbp;
128f841f6adSraf 	int		state = 0;
129f841f6adSraf 	int		EIOflg = 0;
130f841f6adSraf 	int		rw;
131f841f6adSraf 	int		do_kaio = 0;
132f841f6adSraf 	int		error;
133f841f6adSraf 	int		i;
134f841f6adSraf 
135f841f6adSraf 	if (!_kaio_ok)
136f841f6adSraf 		_kaio_init();
137f841f6adSraf 
138f841f6adSraf 	if (aio_list_max == 0)
139f841f6adSraf 		aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
140f841f6adSraf 
141f841f6adSraf 	if (nent <= 0 || nent > aio_list_max) {
142f841f6adSraf 		errno = EINVAL;
143f841f6adSraf 		return (-1);
144f841f6adSraf 	}
145f841f6adSraf 
146f841f6adSraf 	switch (mode) {
147f841f6adSraf 	case LIO_WAIT:
148f841f6adSraf 		state = NOCHECK;
149f841f6adSraf 		break;
150f841f6adSraf 	case LIO_NOWAIT:
151f841f6adSraf 		state = CHECK;
152f841f6adSraf 		break;
153f841f6adSraf 	default:
154f841f6adSraf 		errno = EINVAL;
155f841f6adSraf 		return (-1);
156f841f6adSraf 	}
157f841f6adSraf 
158f841f6adSraf 	for (i = 0; i < nent; i++) {
159f841f6adSraf 		if ((aiocbp = list[i]) == NULL)
160f841f6adSraf 			continue;
161f841f6adSraf 		if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
162f841f6adSraf 			errno = EBUSY;
163f841f6adSraf 			return (-1);
164f841f6adSraf 		}
165f841f6adSraf 		if (_aio_sigev_thread(aiocbp) != 0)
166f841f6adSraf 			return (-1);
167f841f6adSraf 		if (aiocbp->aio_lio_opcode == LIO_NOP)
168f841f6adSraf 			aiocbp->aio_state = NOCHECK;
169f841f6adSraf 		else {
170f841f6adSraf 			aiocbp->aio_state = state;
171f841f6adSraf 			if (KAIO_SUPPORTED(aiocbp->aio_fildes))
172f841f6adSraf 				do_kaio++;
173f841f6adSraf 			else
174f841f6adSraf 				aiocbp->aio_resultp.aio_errno = ENOTSUP;
175f841f6adSraf 		}
176f841f6adSraf 	}
177f841f6adSraf 	if (_aio_sigev_thread_init(sigevp) != 0)
178f841f6adSraf 		return (-1);
179f841f6adSraf 
180f841f6adSraf 	if (do_kaio) {
181f841f6adSraf 		error = (int)_kaio(AIOLIO, mode, list, nent, sigevp);
182f841f6adSraf 		if (error == 0)
183f841f6adSraf 			return (0);
184f841f6adSraf 		oerrno = errno;
185f841f6adSraf 	} else {
186f841f6adSraf 		oerrno = errno = ENOTSUP;
187f841f6adSraf 		error = -1;
188f841f6adSraf 	}
189f841f6adSraf 
190f841f6adSraf 	if (error == -1 && errno == ENOTSUP) {
191f841f6adSraf 		error = errno = 0;
192f841f6adSraf 		/*
193f841f6adSraf 		 * If LIO_WAIT, or notification required, allocate a list head.
194f841f6adSraf 		 */
195f841f6adSraf 		if (mode == LIO_WAIT ||
196f841f6adSraf 		    (sigevp != NULL &&
197f841f6adSraf 		    (sigevp->sigev_notify == SIGEV_SIGNAL ||
198f841f6adSraf 		    sigevp->sigev_notify == SIGEV_THREAD ||
199f841f6adSraf 		    sigevp->sigev_notify == SIGEV_PORT)))
200f841f6adSraf 			head = _aio_lio_alloc();
201f841f6adSraf 		if (head) {
202f841f6adSraf 			sig_mutex_lock(&head->lio_mutex);
203f841f6adSraf 			head->lio_mode = mode;
204f841f6adSraf 			head->lio_largefile = 0;
205f841f6adSraf 			if (mode == LIO_NOWAIT && sigevp != NULL) {
206f841f6adSraf 				if (sigevp->sigev_notify == SIGEV_THREAD) {
207f841f6adSraf 					head->lio_port = sigevp->sigev_signo;
208f841f6adSraf 					head->lio_event = AIOLIO;
209f841f6adSraf 					head->lio_sigevent = sigevp;
210f841f6adSraf 					head->lio_sigval.sival_ptr =
211f841f6adSraf 					    sigevp->sigev_value.sival_ptr;
212f841f6adSraf 				} else if (sigevp->sigev_notify == SIGEV_PORT) {
213f841f6adSraf 					port_notify_t *pn =
214f841f6adSraf 					    sigevp->sigev_value.sival_ptr;
215f841f6adSraf 					head->lio_port = pn->portnfy_port;
216f841f6adSraf 					head->lio_event = AIOLIO;
217f841f6adSraf 					head->lio_sigevent = sigevp;
218f841f6adSraf 					head->lio_sigval.sival_ptr =
219f841f6adSraf 					    pn->portnfy_user;
220f841f6adSraf 				} else {	/* SIGEV_SIGNAL */
221f841f6adSraf 					head->lio_signo = sigevp->sigev_signo;
222f841f6adSraf 					head->lio_sigval.sival_ptr =
223f841f6adSraf 					    sigevp->sigev_value.sival_ptr;
224f841f6adSraf 				}
225f841f6adSraf 			}
226f841f6adSraf 			head->lio_nent = head->lio_refcnt = nent;
227f841f6adSraf 			sig_mutex_unlock(&head->lio_mutex);
228f841f6adSraf 		}
229f841f6adSraf 		/*
230f841f6adSraf 		 * find UFS requests, errno == ENOTSUP/EBADFD,
231f841f6adSraf 		 */
232f841f6adSraf 		for (i = 0; i < nent; i++) {
233f841f6adSraf 			if ((aiocbp = list[i]) == NULL ||
234f841f6adSraf 			    aiocbp->aio_lio_opcode == LIO_NOP ||
235f841f6adSraf 			    (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
236f841f6adSraf 			    aiocbp->aio_resultp.aio_errno != EBADFD)) {
237f841f6adSraf 				if (head)
238f841f6adSraf 					_lio_list_decr(head);
239f841f6adSraf 				continue;
240f841f6adSraf 			}
241f841f6adSraf 			if (aiocbp->aio_resultp.aio_errno == EBADFD)
242f841f6adSraf 				SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
2436e628f27Sraf 			if (aiocbp->aio_reqprio != 0) {
244f841f6adSraf 				aiocbp->aio_resultp.aio_errno = EINVAL;
245f841f6adSraf 				aiocbp->aio_resultp.aio_return = -1;
246f841f6adSraf 				EIOflg = 1;
247f841f6adSraf 				if (head)
248f841f6adSraf 					_lio_list_decr(head);
249f841f6adSraf 				continue;
250f841f6adSraf 			}
251f841f6adSraf 			/*
252f841f6adSraf 			 * submit an AIO request with flags AIO_NO_KAIO
253f841f6adSraf 			 * to avoid the kaio() syscall in _aio_rw()
254f841f6adSraf 			 */
255f841f6adSraf 			switch (aiocbp->aio_lio_opcode) {
256f841f6adSraf 			case LIO_READ:
257f841f6adSraf 				rw = AIOAREAD;
258f841f6adSraf 				break;
259f841f6adSraf 			case LIO_WRITE:
260f841f6adSraf 				rw = AIOAWRITE;
261f841f6adSraf 				break;
262f841f6adSraf 			}
263f841f6adSraf 			error = _aio_rw(aiocbp, head, &__nextworker_rw, rw,
264f841f6adSraf 			    (AIO_NO_KAIO | AIO_NO_DUPS));
265f841f6adSraf 			if (error == 0)
266f841f6adSraf 				aio_ufs++;
267f841f6adSraf 			else {
268f841f6adSraf 				if (head)
269f841f6adSraf 					_lio_list_decr(head);
270f841f6adSraf 				aiocbp->aio_resultp.aio_errno = error;
271f841f6adSraf 				EIOflg = 1;
272f841f6adSraf 			}
273f841f6adSraf 		}
274f841f6adSraf 	}
275f841f6adSraf 	if (EIOflg) {
276f841f6adSraf 		errno = EIO;
277f841f6adSraf 		return (-1);
278f841f6adSraf 	}
279f841f6adSraf 	if (mode == LIO_WAIT && oerrno == ENOTSUP) {
280f841f6adSraf 		/*
281f841f6adSraf 		 * call kaio(AIOLIOWAIT) to get all outstanding
282f841f6adSraf 		 * kernel AIO requests
283f841f6adSraf 		 */
284f841f6adSraf 		if ((nent - aio_ufs) > 0)
285f841f6adSraf 			(void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
286f841f6adSraf 		if (head != NULL && head->lio_nent > 0) {
287f841f6adSraf 			sig_mutex_lock(&head->lio_mutex);
288f841f6adSraf 			while (head->lio_refcnt > 0) {
289f841f6adSraf 				int err;
290f841f6adSraf 				head->lio_waiting = 1;
291f841f6adSraf 				pthread_cleanup_push(_lio_listio_cleanup, head);
292f841f6adSraf 				err = sig_cond_wait(&head->lio_cond_cv,
293f841f6adSraf 				    &head->lio_mutex);
294f841f6adSraf 				pthread_cleanup_pop(0);
295f841f6adSraf 				head->lio_waiting = 0;
296f841f6adSraf 				if (err && head->lio_nent > 0) {
297f841f6adSraf 					sig_mutex_unlock(&head->lio_mutex);
298f841f6adSraf 					errno = err;
299f841f6adSraf 					return (-1);
300f841f6adSraf 				}
301f841f6adSraf 			}
302f841f6adSraf 			sig_mutex_unlock(&head->lio_mutex);
303f841f6adSraf 			ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
304f841f6adSraf 			_aio_lio_free(head);
305f841f6adSraf 			for (i = 0; i < nent; i++) {
306f841f6adSraf 				if ((aiocbp = list[i]) != NULL &&
307f841f6adSraf 				    aiocbp->aio_resultp.aio_errno) {
308f841f6adSraf 					errno = EIO;
309f841f6adSraf 					return (-1);
310f841f6adSraf 				}
311f841f6adSraf 			}
312f841f6adSraf 		}
313f841f6adSraf 		return (0);
314f841f6adSraf 	}
315f841f6adSraf 	return (error);
316f841f6adSraf }
317f841f6adSraf 
318f841f6adSraf static void
_lio_list_decr(aio_lio_t * head)319f841f6adSraf _lio_list_decr(aio_lio_t *head)
320f841f6adSraf {
321f841f6adSraf 	sig_mutex_lock(&head->lio_mutex);
322f841f6adSraf 	head->lio_nent--;
323f841f6adSraf 	head->lio_refcnt--;
324f841f6adSraf 	sig_mutex_unlock(&head->lio_mutex);
325f841f6adSraf }
326f841f6adSraf 
327f841f6adSraf /*
328f841f6adSraf  * __aio_suspend() cancellation handler.
329f841f6adSraf  */
330f841f6adSraf /* ARGSUSED */
331f841f6adSraf static void
_aio_suspend_cleanup(int * counter)332f841f6adSraf _aio_suspend_cleanup(int *counter)
333f841f6adSraf {
334f841f6adSraf 	ASSERT(MUTEX_HELD(&__aio_mutex));
335f841f6adSraf 	(*counter)--;		/* _aio_kernel_suspend or _aio_suscv_cnt */
336f841f6adSraf 	sig_mutex_unlock(&__aio_mutex);
337f841f6adSraf }
338f841f6adSraf 
339f841f6adSraf static int
__aio_suspend(void ** list,int nent,const timespec_t * timo,int largefile)340f841f6adSraf __aio_suspend(void **list, int nent, const timespec_t *timo, int largefile)
341f841f6adSraf {
342f841f6adSraf 	int		cv_err;	/* error code from cond_xxx() */
343f841f6adSraf 	int		kerr;	/* error code from _kaio(AIOSUSPEND) */
344f841f6adSraf 	int		i;
345f841f6adSraf 	timespec_t	twait;	/* copy of timo for internal calculations */
346f841f6adSraf 	timespec_t	*wait = NULL;
347f841f6adSraf 	int		timedwait;
348f841f6adSraf 	int		req_outstanding;
349f841f6adSraf 	aiocb_t		**listp;
350f841f6adSraf 	aiocb_t		*aiocbp;
351f841f6adSraf #if !defined(_LP64)
352f841f6adSraf 	aiocb64_t	**listp64;
353f841f6adSraf 	aiocb64_t	*aiocbp64;
354f841f6adSraf #endif
355f841f6adSraf 	hrtime_t	hrtstart;
356f841f6adSraf 	hrtime_t	hrtend;
357f841f6adSraf 	hrtime_t	hrtres;
358f841f6adSraf 
359f841f6adSraf #if defined(_LP64)
360f841f6adSraf 	if (largefile)
361f841f6adSraf 		aio_panic("__aio_suspend: largefile set when _LP64 defined");
362f841f6adSraf #endif
363f841f6adSraf 
364f841f6adSraf 	if (nent <= 0) {
365f841f6adSraf 		errno = EINVAL;
366f841f6adSraf 		return (-1);
367f841f6adSraf 	}
368f841f6adSraf 
369f841f6adSraf 	if (timo) {
370f841f6adSraf 		if (timo->tv_sec < 0 || timo->tv_nsec < 0 ||
371f841f6adSraf 		    timo->tv_nsec >= NANOSEC) {
372f841f6adSraf 			errno = EINVAL;
373f841f6adSraf 			return (-1);
374f841f6adSraf 		}
375f841f6adSraf 		/* Initialize start time if time monitoring desired */
376f841f6adSraf 		if (timo->tv_sec > 0 || timo->tv_nsec > 0) {
377f841f6adSraf 			timedwait = AIO_TIMEOUT_WAIT;
378f841f6adSraf 			hrtstart = gethrtime();
379f841f6adSraf 		} else {
380f841f6adSraf 			/* content of timeout = 0 : polling */
381f841f6adSraf 			timedwait = AIO_TIMEOUT_POLL;
382f841f6adSraf 		}
383f841f6adSraf 	} else {
384f841f6adSraf 		/* timeout pointer = NULL : wait indefinitely */
385f841f6adSraf 		timedwait = AIO_TIMEOUT_INDEF;
386f841f6adSraf 	}
387f841f6adSraf 
388f841f6adSraf #if !defined(_LP64)
389f841f6adSraf 	if (largefile) {
390f841f6adSraf 		listp64 = (aiocb64_t **)list;
391f841f6adSraf 		for (i = 0; i < nent; i++) {
392f841f6adSraf 			if ((aiocbp64 = listp64[i]) != NULL &&
393f841f6adSraf 			    aiocbp64->aio_state == CHECK)
394f841f6adSraf 				aiocbp64->aio_state = CHECKED;
395f841f6adSraf 		}
396f841f6adSraf 	} else
397f841f6adSraf #endif	/* !_LP64 */
398f841f6adSraf 	{
399f841f6adSraf 		listp = (aiocb_t **)list;
400f841f6adSraf 		for (i = 0; i < nent; i++) {
401f841f6adSraf 			if ((aiocbp = listp[i]) != NULL &&
402f841f6adSraf 			    aiocbp->aio_state == CHECK)
403f841f6adSraf 				aiocbp->aio_state = CHECKED;
404f841f6adSraf 		}
405f841f6adSraf 	}
406f841f6adSraf 
407f841f6adSraf 	sig_mutex_lock(&__aio_mutex);
408f841f6adSraf 
409f841f6adSraf 	/*
410f841f6adSraf 	 * The next "if -case" is required to accelerate the
411f841f6adSraf 	 * access to completed RAW-IO requests.
412f841f6adSraf 	 */
413f841f6adSraf 	if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
414f841f6adSraf 		/* Only kernel requests pending */
415f841f6adSraf 
416f841f6adSraf 		/*
417f841f6adSraf 		 * _aio_kernel_suspend is used to detect completed non RAW-IO
418f841f6adSraf 		 * requests.
419f841f6adSraf 		 * As long as this thread resides in the kernel (_kaio) further
420f841f6adSraf 		 * asynchronous non RAW-IO requests could be submitted.
421f841f6adSraf 		 */
422f841f6adSraf 		_aio_kernel_suspend++;
423f841f6adSraf 
424f841f6adSraf 		/*
425f841f6adSraf 		 * Always do the kaio() call without using the KAIO_SUPPORTED()
426f841f6adSraf 		 * checks because it is not mandatory to have a valid fd
427f841f6adSraf 		 * set in the list entries, only the resultp must be set.
428f841f6adSraf 		 *
429f841f6adSraf 		 * _kaio(AIOSUSPEND ...) return values :
430f841f6adSraf 		 *  0:  everythink ok, completed request found
431f841f6adSraf 		 * -1:  error
432f841f6adSraf 		 *  1:  no error : _aiodone awaked the _kaio(AIOSUSPEND,,)
433f841f6adSraf 		 *	system call using  _kaio(AIONOTIFY). It means, that some
434f841f6adSraf 		 *	non RAW-IOs completed inbetween.
435f841f6adSraf 		 */
436f841f6adSraf 
437f841f6adSraf 		pthread_cleanup_push(_aio_suspend_cleanup,
438f841f6adSraf 		    &_aio_kernel_suspend);
439f841f6adSraf 		pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
440f841f6adSraf 		sig_mutex_unlock(&__aio_mutex);
441f841f6adSraf 		_cancel_prologue();
442f841f6adSraf 		kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
443f841f6adSraf 		    list, nent, timo, -1);
444f841f6adSraf 		_cancel_epilogue();
445f841f6adSraf 		pthread_cleanup_pop(1);	/* sig_mutex_lock(&__aio_mutex) */
446f841f6adSraf 		pthread_cleanup_pop(0);
447f841f6adSraf 
448f841f6adSraf 		_aio_kernel_suspend--;
449f841f6adSraf 
450f841f6adSraf 		if (!kerr) {
451f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
452f841f6adSraf 			return (0);
453f841f6adSraf 		}
454f841f6adSraf 	} else {
455f841f6adSraf 		kerr = 1;	/* simulation: _kaio detected AIONOTIFY */
456f841f6adSraf 	}
457f841f6adSraf 
458f841f6adSraf 	/*
459f841f6adSraf 	 * Return kernel error code if no other IOs are outstanding.
460f841f6adSraf 	 */
461f841f6adSraf 	req_outstanding = _aio_doneq_cnt + _aio_outstand_cnt;
462f841f6adSraf 
463f841f6adSraf 	sig_mutex_unlock(&__aio_mutex);
464f841f6adSraf 
465f841f6adSraf 	if (req_outstanding == 0) {
466f841f6adSraf 		/* no IOs outstanding in the thread pool */
467f841f6adSraf 		if (kerr == 1)
468f841f6adSraf 			/* return "no IOs completed" */
469f841f6adSraf 			errno = EAGAIN;
470f841f6adSraf 		return (-1);
471f841f6adSraf 	}
472f841f6adSraf 
473f841f6adSraf 	/*
474f841f6adSraf 	 * IOs using the thread pool are outstanding.
475f841f6adSraf 	 */
476f841f6adSraf 	if (timedwait == AIO_TIMEOUT_WAIT) {
477f841f6adSraf 		/* time monitoring */
478f841f6adSraf 		hrtend = hrtstart + (hrtime_t)timo->tv_sec * (hrtime_t)NANOSEC +
479f841f6adSraf 		    (hrtime_t)timo->tv_nsec;
480f841f6adSraf 		hrtres = hrtend - gethrtime();
481f841f6adSraf 		if (hrtres <= 0)
482f841f6adSraf 			hrtres = 1;
483f841f6adSraf 		twait.tv_sec = hrtres / (hrtime_t)NANOSEC;
484f841f6adSraf 		twait.tv_nsec = hrtres % (hrtime_t)NANOSEC;
485f841f6adSraf 		wait = &twait;
486f841f6adSraf 	} else if (timedwait == AIO_TIMEOUT_POLL) {
487f841f6adSraf 		twait = *timo;	/* content of timo = 0 : polling */
488f841f6adSraf 		wait = &twait;
489f841f6adSraf 	}
490f841f6adSraf 
491f841f6adSraf 	for (;;) {
492f841f6adSraf 		int	error;
493f841f6adSraf 		int	inprogress;
494f841f6adSraf 
495f841f6adSraf 		/* first scan file system requests */
496f841f6adSraf 		inprogress = 0;
497f841f6adSraf 		for (i = 0; i < nent; i++) {
498f841f6adSraf #if !defined(_LP64)
499f841f6adSraf 			if (largefile) {
500f841f6adSraf 				if ((aiocbp64 = listp64[i]) == NULL)
501f841f6adSraf 					continue;
502f841f6adSraf 				error = aiocbp64->aio_resultp.aio_errno;
503f841f6adSraf 			} else
504f841f6adSraf #endif
505f841f6adSraf 			{
506f841f6adSraf 				if ((aiocbp = listp[i]) == NULL)
507f841f6adSraf 					continue;
508f841f6adSraf 				error = aiocbp->aio_resultp.aio_errno;
509f841f6adSraf 			}
510f841f6adSraf 			if (error == EINPROGRESS)
511f841f6adSraf 				inprogress = 1;
512f841f6adSraf 			else if (error != ECANCELED) {
513f841f6adSraf 				errno = 0;
514f841f6adSraf 				return (0);
515f841f6adSraf 			}
516f841f6adSraf 		}
517f841f6adSraf 
518f841f6adSraf 		sig_mutex_lock(&__aio_mutex);
519f841f6adSraf 
520f841f6adSraf 		/*
521f841f6adSraf 		 * If there aren't outstanding I/Os in the thread pool then
522f841f6adSraf 		 * we have to return here, provided that all kernel RAW-IOs
523f841f6adSraf 		 * also completed.
524f841f6adSraf 		 * If the kernel was notified to return, then we have to check
525f841f6adSraf 		 * possible pending RAW-IOs.
526f841f6adSraf 		 */
527f841f6adSraf 		if (_aio_outstand_cnt == 0 && inprogress == 0 && kerr != 1) {
528f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
529f841f6adSraf 			errno = EAGAIN;
530f841f6adSraf 			break;
531f841f6adSraf 		}
532f841f6adSraf 
533f841f6adSraf 		/*
534f841f6adSraf 		 * There are outstanding IOs in the thread pool or the kernel
535f841f6adSraf 		 * was notified to return.
536f841f6adSraf 		 * Check pending RAW-IOs first.
537f841f6adSraf 		 */
538f841f6adSraf 		if (kerr == 1) {
539f841f6adSraf 			/*
540f841f6adSraf 			 * _aiodone just notified the kernel about
541f841f6adSraf 			 * completed non RAW-IOs (AIONOTIFY was detected).
542f841f6adSraf 			 */
543f841f6adSraf 			if (timedwait == AIO_TIMEOUT_WAIT) {
544f841f6adSraf 				/* Update remaining timeout for the kernel */
545f841f6adSraf 				hrtres = hrtend - gethrtime();
546f841f6adSraf 				if (hrtres <= 0) {
547f841f6adSraf 					/* timer expired */
548f841f6adSraf 					sig_mutex_unlock(&__aio_mutex);
549f841f6adSraf 					errno = EAGAIN;
550f841f6adSraf 					break;
551f841f6adSraf 				}
552f841f6adSraf 				wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
553f841f6adSraf 				wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
554f841f6adSraf 			}
555f841f6adSraf 			_aio_kernel_suspend++;
556f841f6adSraf 
557f841f6adSraf 			pthread_cleanup_push(_aio_suspend_cleanup,
558f841f6adSraf 			    &_aio_kernel_suspend);
559f841f6adSraf 			pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
560f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
561f841f6adSraf 			_cancel_prologue();
562f841f6adSraf 			kerr = (int)_kaio(largefile? AIOSUSPEND64 : AIOSUSPEND,
563f841f6adSraf 			    list, nent, wait, -1);
564f841f6adSraf 			_cancel_epilogue();
565f841f6adSraf 			pthread_cleanup_pop(1);
566f841f6adSraf 			pthread_cleanup_pop(0);
567f841f6adSraf 
568f841f6adSraf 			_aio_kernel_suspend--;
569f841f6adSraf 
570f841f6adSraf 			if (!kerr) {
571f841f6adSraf 				sig_mutex_unlock(&__aio_mutex);
572f841f6adSraf 				return (0);
573f841f6adSraf 			}
574f841f6adSraf 		}
575f841f6adSraf 
576f841f6adSraf 		if (timedwait == AIO_TIMEOUT_POLL) {
577f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
578f841f6adSraf 			errno = EAGAIN;
579f841f6adSraf 			break;
580f841f6adSraf 		}
581f841f6adSraf 
582f841f6adSraf 		if (timedwait == AIO_TIMEOUT_WAIT) {
583f841f6adSraf 			/* Update remaining timeout */
584f841f6adSraf 			hrtres = hrtend - gethrtime();
585f841f6adSraf 			if (hrtres <= 0) {
586f841f6adSraf 				/* timer expired */
587f841f6adSraf 				sig_mutex_unlock(&__aio_mutex);
588f841f6adSraf 				errno = EAGAIN;
589f841f6adSraf 				break;
590f841f6adSraf 			}
591f841f6adSraf 			wait->tv_sec = hrtres / (hrtime_t)NANOSEC;
592f841f6adSraf 			wait->tv_nsec = hrtres % (hrtime_t)NANOSEC;
593f841f6adSraf 		}
594f841f6adSraf 
595f841f6adSraf 		if (_aio_outstand_cnt == 0) {
596f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
597f841f6adSraf 			continue;
598f841f6adSraf 		}
599f841f6adSraf 
600f841f6adSraf 		_aio_suscv_cnt++;	/* ID for _aiodone (wake up) */
601f841f6adSraf 
602f841f6adSraf 		pthread_cleanup_push(_aio_suspend_cleanup, &_aio_suscv_cnt);
603f841f6adSraf 		if (timedwait == AIO_TIMEOUT_WAIT) {
604f841f6adSraf 			cv_err = sig_cond_reltimedwait(&_aio_iowait_cv,
605f841f6adSraf 			    &__aio_mutex, wait);
606f841f6adSraf 			if (cv_err == ETIME)
607f841f6adSraf 				cv_err = EAGAIN;
608f841f6adSraf 		} else {
609f841f6adSraf 			/* wait indefinitely */
610f841f6adSraf 			cv_err = sig_cond_wait(&_aio_iowait_cv, &__aio_mutex);
611f841f6adSraf 		}
612f841f6adSraf 		/* this decrements _aio_suscv_cnt and drops __aio_mutex */
613f841f6adSraf 		pthread_cleanup_pop(1);
614f841f6adSraf 
615f841f6adSraf 		if (cv_err) {
616f841f6adSraf 			errno = cv_err;
617f841f6adSraf 			break;
618f841f6adSraf 		}
619f841f6adSraf 	}
620f841f6adSraf 	return (-1);
621f841f6adSraf }
622f841f6adSraf 
623f841f6adSraf int
aio_suspend(const aiocb_t * const list[],int nent,const timespec_t * timeout)624f841f6adSraf aio_suspend(const aiocb_t * const list[], int nent,
625f841f6adSraf     const timespec_t *timeout)
626f841f6adSraf {
627f841f6adSraf 	return (__aio_suspend((void **)list, nent, timeout, 0));
628f841f6adSraf }
629f841f6adSraf 
630f841f6adSraf int
aio_error(const aiocb_t * aiocbp)631f841f6adSraf aio_error(const aiocb_t *aiocbp)
632f841f6adSraf {
633f841f6adSraf 	const aio_result_t *resultp = &aiocbp->aio_resultp;
63475e1bcdeSPrakash Sangappa 	aio_req_t *reqp;
635f841f6adSraf 	int error;
636f841f6adSraf 
637f841f6adSraf 	if ((error = resultp->aio_errno) == EINPROGRESS) {
638f841f6adSraf 		if (aiocbp->aio_state == CHECK) {
639f841f6adSraf 			/*
640f841f6adSraf 			 * Always do the kaio() call without using the
641f841f6adSraf 			 * KAIO_SUPPORTED() checks because it is not
642f841f6adSraf 			 * mandatory to have a valid fd set in the
643f841f6adSraf 			 * aiocb, only the resultp must be set.
644f841f6adSraf 			 */
645f841f6adSraf 			if ((int)_kaio(AIOERROR, aiocbp) == EINVAL) {
646f841f6adSraf 				errno = EINVAL;
647f841f6adSraf 				return (-1);
648f841f6adSraf 			}
649f841f6adSraf 			error = resultp->aio_errno;
650f841f6adSraf 		} else if (aiocbp->aio_state == CHECKED) {
651f841f6adSraf 			((aiocb_t *)aiocbp)->aio_state = CHECK;
652f841f6adSraf 		}
65375e1bcdeSPrakash Sangappa 	} else if (aiocbp->aio_state == USERAIO) {
65475e1bcdeSPrakash Sangappa 		sig_mutex_lock(&__aio_mutex);
65575e1bcdeSPrakash Sangappa 		if ((reqp = _aio_hash_del((aio_result_t *)resultp)) == NULL) {
65675e1bcdeSPrakash Sangappa 			sig_mutex_unlock(&__aio_mutex);
65775e1bcdeSPrakash Sangappa 			((aiocb_t *)aiocbp)->aio_state = CHECKED;
65875e1bcdeSPrakash Sangappa 		} else {
65975e1bcdeSPrakash Sangappa 			((aiocb_t *)aiocbp)->aio_state = NOCHECK;
66075e1bcdeSPrakash Sangappa 			ASSERT(reqp->req_head == NULL);
66175e1bcdeSPrakash Sangappa 			(void) _aio_req_remove(reqp);
66275e1bcdeSPrakash Sangappa 			sig_mutex_unlock(&__aio_mutex);
66375e1bcdeSPrakash Sangappa 			_aio_req_free(reqp);
66475e1bcdeSPrakash Sangappa 		}
665f841f6adSraf 	}
666f841f6adSraf 	return (error);
667f841f6adSraf }
668f841f6adSraf 
669f841f6adSraf ssize_t
aio_return(aiocb_t * aiocbp)670f841f6adSraf aio_return(aiocb_t *aiocbp)
671f841f6adSraf {
672f841f6adSraf 	aio_result_t *resultp = &aiocbp->aio_resultp;
673f841f6adSraf 	aio_req_t *reqp;
674f841f6adSraf 	int error;
675f841f6adSraf 	ssize_t retval;
676f841f6adSraf 
677f841f6adSraf 	/*
678f841f6adSraf 	 * The _aiodone() function stores resultp->aio_return before
679f841f6adSraf 	 * storing resultp->aio_errno (with an membar_producer() in
680f841f6adSraf 	 * between).  We use membar_consumer() below to ensure proper
681f841f6adSraf 	 * memory ordering between _aiodone() and ourself.
682f841f6adSraf 	 */
683f841f6adSraf 	error = resultp->aio_errno;
684f841f6adSraf 	membar_consumer();
685f841f6adSraf 	retval = resultp->aio_return;
686f841f6adSraf 
687f841f6adSraf 	/*
688f841f6adSraf 	 * we use this condition to indicate either that
689f841f6adSraf 	 * aio_return() has been called before or should
690f841f6adSraf 	 * not have been called yet.
691f841f6adSraf 	 */
692f841f6adSraf 	if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
693f841f6adSraf 		errno = error;
694f841f6adSraf 		return (-1);
695f841f6adSraf 	}
696f841f6adSraf 
697f841f6adSraf 	/*
698f841f6adSraf 	 * Before we return, mark the result as being returned so that later
699f841f6adSraf 	 * calls to aio_return() will return the fact that the result has
700f841f6adSraf 	 * already been returned.
701f841f6adSraf 	 */
702f841f6adSraf 	sig_mutex_lock(&__aio_mutex);
703f841f6adSraf 	/* retest, in case more than one thread actually got in here */
704f841f6adSraf 	if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
705f841f6adSraf 		sig_mutex_unlock(&__aio_mutex);
706f841f6adSraf 		errno = EINVAL;
707f841f6adSraf 		return (-1);
708f841f6adSraf 	}
709f841f6adSraf 	resultp->aio_return = -1;
710f841f6adSraf 	resultp->aio_errno = EINVAL;
711f841f6adSraf 	if ((reqp = _aio_hash_del(resultp)) == NULL)
712f841f6adSraf 		sig_mutex_unlock(&__aio_mutex);
713f841f6adSraf 	else {
714f841f6adSraf 		aiocbp->aio_state = NOCHECK;
715f841f6adSraf 		ASSERT(reqp->req_head == NULL);
716f841f6adSraf 		(void) _aio_req_remove(reqp);
717f841f6adSraf 		sig_mutex_unlock(&__aio_mutex);
718f841f6adSraf 		_aio_req_free(reqp);
719f841f6adSraf 	}
720f841f6adSraf 
721f841f6adSraf 	if (retval == -1)
722f841f6adSraf 		errno = error;
723f841f6adSraf 	return (retval);
724f841f6adSraf }
725f841f6adSraf 
726f841f6adSraf void
_lio_remove(aio_req_t * reqp)727f841f6adSraf _lio_remove(aio_req_t *reqp)
728f841f6adSraf {
729f841f6adSraf 	aio_lio_t *head;
730f841f6adSraf 	int refcnt;
731f841f6adSraf 
732f841f6adSraf 	if ((head = reqp->req_head) != NULL) {
733f841f6adSraf 		sig_mutex_lock(&head->lio_mutex);
734f841f6adSraf 		ASSERT(head->lio_refcnt == head->lio_nent);
735f841f6adSraf 		refcnt = --head->lio_nent;
736f841f6adSraf 		head->lio_refcnt--;
737f841f6adSraf 		sig_mutex_unlock(&head->lio_mutex);
738f841f6adSraf 		if (refcnt == 0)
739f841f6adSraf 			_aio_lio_free(head);
740f841f6adSraf 		reqp->req_head = NULL;
741f841f6adSraf 	}
742f841f6adSraf }
743f841f6adSraf 
744f841f6adSraf /*
745f841f6adSraf  * This function returns the number of asynchronous I/O requests submitted.
746f841f6adSraf  */
747f841f6adSraf static int
__aio_fsync_bar(aiocb_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)748f841f6adSraf __aio_fsync_bar(aiocb_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
749f841f6adSraf     int workerscnt)
750f841f6adSraf {
751f841f6adSraf 	int i;
752f841f6adSraf 	int error;
753f841f6adSraf 	aio_worker_t *next = aiowp;
754f841f6adSraf 
755f841f6adSraf 	for (i = 0; i < workerscnt; i++) {
756f841f6adSraf 		error = _aio_rw(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
757f841f6adSraf 		if (error != 0) {
758f841f6adSraf 			sig_mutex_lock(&head->lio_mutex);
759f841f6adSraf 			head->lio_mode = LIO_DESTROY;	/* ignore fsync */
760f841f6adSraf 			head->lio_nent -= workerscnt - i;
761f841f6adSraf 			head->lio_refcnt -= workerscnt - i;
762f841f6adSraf 			sig_mutex_unlock(&head->lio_mutex);
763f841f6adSraf 			errno = EAGAIN;
764f841f6adSraf 			return (i);
765f841f6adSraf 		}
766f841f6adSraf 		next = next->work_forw;
767f841f6adSraf 	}
768f841f6adSraf 	return (i);
769f841f6adSraf }
770f841f6adSraf 
771f841f6adSraf int
aio_fsync(int op,aiocb_t * aiocbp)772f841f6adSraf aio_fsync(int op, aiocb_t *aiocbp)
773f841f6adSraf {
774f841f6adSraf 	aio_lio_t *head;
775f841f6adSraf 	struct stat statb;
776f841f6adSraf 	int fret;
777f841f6adSraf 
778f841f6adSraf 	if (aiocbp == NULL)
779f841f6adSraf 		return (0);
7806e628f27Sraf 	if (op != O_DSYNC && op != O_SYNC) {
781f841f6adSraf 		errno = EINVAL;
782f841f6adSraf 		return (-1);
783f841f6adSraf 	}
784f841f6adSraf 	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
785f841f6adSraf 		errno = EBUSY;
786f841f6adSraf 		return (-1);
787f841f6adSraf 	}
788f841f6adSraf 	if (fstat(aiocbp->aio_fildes, &statb) < 0)
789f841f6adSraf 		return (-1);
790f841f6adSraf 	if (_aio_sigev_thread(aiocbp) != 0)
791f841f6adSraf 		return (-1);
792f841f6adSraf 
793f841f6adSraf 	/*
794f841f6adSraf 	 * Kernel aio_fsync() is not supported.
795f841f6adSraf 	 * We force user-level aio_fsync() just
796f841f6adSraf 	 * for the notification side-effect.
797f841f6adSraf 	 */
798f841f6adSraf 	if (!__uaio_ok && __uaio_init() == -1)
799f841f6adSraf 		return (-1);
800f841f6adSraf 
801f841f6adSraf 	/*
802f841f6adSraf 	 * The first asynchronous I/O request in the current process will
803f841f6adSraf 	 * create a bunch of workers (via __uaio_init()).  If the number
804f841f6adSraf 	 * of workers is zero then the number of pending asynchronous I/O
805f841f6adSraf 	 * requests is zero.  In such a case only execute the standard
806*4b9db4f6SChris Fraire 	 * fsync(3C) or fdatasync(3C) as appropriate.
807f841f6adSraf 	 */
808f841f6adSraf 	if (__rw_workerscnt == 0) {
809f841f6adSraf 		if (op == O_DSYNC)
8104763305eSRobert Mustacchi 			return (__fdsync(aiocbp->aio_fildes, FDSYNC_DATA));
811f841f6adSraf 		else
8124763305eSRobert Mustacchi 			return (__fdsync(aiocbp->aio_fildes, FDSYNC_FILE));
813f841f6adSraf 	}
814f841f6adSraf 
815f841f6adSraf 	/*
816f841f6adSraf 	 * re-use aio_offset as the op field.
817f841f6adSraf 	 *	O_DSYNC - fdatasync()
818f841f6adSraf 	 *	O_SYNC - fsync()
819f841f6adSraf 	 */
820f841f6adSraf 	aiocbp->aio_offset = op;
821f841f6adSraf 	aiocbp->aio_lio_opcode = AIOFSYNC;
822f841f6adSraf 
823f841f6adSraf 	/*
824f841f6adSraf 	 * Create a list of fsync requests.  The worker that
825f841f6adSraf 	 * gets the last request will do the fsync request.
826f841f6adSraf 	 */
827f841f6adSraf 	head = _aio_lio_alloc();
828f841f6adSraf 	if (head == NULL) {
829f841f6adSraf 		errno = EAGAIN;
830f841f6adSraf 		return (-1);
831f841f6adSraf 	}
832f841f6adSraf 	head->lio_mode = LIO_FSYNC;
833f841f6adSraf 	head->lio_nent = head->lio_refcnt = __rw_workerscnt;
834f841f6adSraf 	head->lio_largefile = 0;
835f841f6adSraf 
836f841f6adSraf 	/*
837f841f6adSraf 	 * Insert an fsync request on every worker's queue.
838f841f6adSraf 	 */
839f841f6adSraf 	fret = __aio_fsync_bar(aiocbp, head, __workers_rw, __rw_workerscnt);
840f841f6adSraf 	if (fret != __rw_workerscnt) {
841f841f6adSraf 		/*
842f841f6adSraf 		 * Fewer fsync requests than workers means that it was
843f841f6adSraf 		 * not possible to submit fsync requests to all workers.
844f841f6adSraf 		 * Actions:
845f841f6adSraf 		 * a) number of fsync requests submitted is 0:
846f841f6adSraf 		 *    => free allocated memory (aio_lio_t).
847f841f6adSraf 		 * b) number of fsync requests submitted is > 0:
848f841f6adSraf 		 *    => the last worker executing the fsync request
849f841f6adSraf 		 *	 will free the aio_lio_t struct.
850f841f6adSraf 		 */
851f841f6adSraf 		if (fret == 0)
852f841f6adSraf 			_aio_lio_free(head);
853f841f6adSraf 		return (-1);
854f841f6adSraf 	}
855f841f6adSraf 	return (0);
856f841f6adSraf }
857f841f6adSraf 
858f841f6adSraf int
aio_cancel(int fd,aiocb_t * aiocbp)859f841f6adSraf aio_cancel(int fd, aiocb_t *aiocbp)
860f841f6adSraf {
861f841f6adSraf 	aio_req_t *reqp;
862f841f6adSraf 	aio_worker_t *aiowp;
863f841f6adSraf 	int done = 0;
864f841f6adSraf 	int canceled = 0;
865f841f6adSraf 	struct stat buf;
866f841f6adSraf 
867f841f6adSraf 	if (fstat(fd, &buf) < 0)
868f841f6adSraf 		return (-1);
869f841f6adSraf 
870f841f6adSraf 	if (aiocbp != NULL) {
871f841f6adSraf 		if (fd != aiocbp->aio_fildes) {
872f841f6adSraf 			errno = EINVAL;
873f841f6adSraf 			return (-1);
874f841f6adSraf 		}
875f841f6adSraf 		if (aiocbp->aio_state == USERAIO) {
876f841f6adSraf 			sig_mutex_lock(&__aio_mutex);
877f841f6adSraf 			reqp = _aio_hash_find(&aiocbp->aio_resultp);
878f841f6adSraf 			if (reqp == NULL) {
879f841f6adSraf 				sig_mutex_unlock(&__aio_mutex);
880f841f6adSraf 				return (AIO_ALLDONE);
881f841f6adSraf 			}
882f841f6adSraf 			aiowp = reqp->req_worker;
883f841f6adSraf 			sig_mutex_lock(&aiowp->work_qlock1);
884f841f6adSraf 			(void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
885f841f6adSraf 			sig_mutex_unlock(&aiowp->work_qlock1);
886f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
887f841f6adSraf 			if (done)
888f841f6adSraf 				return (AIO_ALLDONE);
889f841f6adSraf 			if (canceled)
890f841f6adSraf 				return (AIO_CANCELED);
891f841f6adSraf 			return (AIO_NOTCANCELED);
892f841f6adSraf 		}
893f841f6adSraf 		if (aiocbp->aio_state == USERAIO_DONE)
894f841f6adSraf 			return (AIO_ALLDONE);
895f841f6adSraf 		return ((int)_kaio(AIOCANCEL, fd, aiocbp));
896f841f6adSraf 	}
897f841f6adSraf 
898f841f6adSraf 	return (aiocancel_all(fd));
899f841f6adSraf }
900f841f6adSraf 
901f841f6adSraf /*
902f841f6adSraf  * __aio_waitn() cancellation handler.
903f841f6adSraf  */
904f841f6adSraf static void
_aio_waitn_cleanup(void * arg __unused)9054a38094cSToomas Soome _aio_waitn_cleanup(void *arg __unused)
906f841f6adSraf {
907f841f6adSraf 	ASSERT(MUTEX_HELD(&__aio_mutex));
908f841f6adSraf 
909f841f6adSraf 	/* check for pending aio_waitn() calls */
910f841f6adSraf 	_aio_flags &= ~(AIO_LIB_WAITN | AIO_WAIT_INPROGRESS | AIO_IO_WAITING);
911f841f6adSraf 	if (_aio_flags & AIO_LIB_WAITN_PENDING) {
912f841f6adSraf 		_aio_flags &= ~AIO_LIB_WAITN_PENDING;
913f841f6adSraf 		(void) cond_signal(&_aio_waitn_cv);
914f841f6adSraf 	}
915f841f6adSraf 
916f841f6adSraf 	sig_mutex_unlock(&__aio_mutex);
917f841f6adSraf }
918f841f6adSraf 
919f841f6adSraf /*
920f841f6adSraf  * aio_waitn can be used to reap the results of several I/O operations that
921f841f6adSraf  * were submitted asynchronously. The submission of I/Os can be done using
922f841f6adSraf  * existing POSIX interfaces: lio_listio, aio_write or aio_read.
923f841f6adSraf  * aio_waitn waits until "nwait" I/Os (supplied as a parameter) have
924f841f6adSraf  * completed and it returns the descriptors for these I/Os in "list". The
925f841f6adSraf  * maximum size of this list is given by "nent" and the actual number of I/Os
926f841f6adSraf  * completed is returned in "nwait". Otherwise aio_waitn might also
927f841f6adSraf  * return if the timeout expires. Additionally, aio_waitn returns 0 if
928f841f6adSraf  * successful or -1 if an error occurred.
929f841f6adSraf  */
930f841f6adSraf static int
__aio_waitn(void ** list,uint_t nent,uint_t * nwait,const timespec_t * utimo)931f841f6adSraf __aio_waitn(void **list, uint_t nent, uint_t *nwait, const timespec_t *utimo)
932f841f6adSraf {
933f841f6adSraf 	int error = 0;
934f841f6adSraf 	uint_t dnwait = 0;	/* amount of requests in the waitn-done list */
935f841f6adSraf 	uint_t kwaitcnt;	/* expected "done" requests from kernel */
936f841f6adSraf 	uint_t knentcnt;	/* max. expected "done" requests from kernel */
937f841f6adSraf 	int uerrno = 0;
938f841f6adSraf 	int kerrno = 0;		/* save errno from _kaio() call */
939f841f6adSraf 	int timedwait = AIO_TIMEOUT_UNDEF;
940f841f6adSraf 	aio_req_t *reqp;
941f841f6adSraf 	timespec_t end;
942f841f6adSraf 	timespec_t twait;	/* copy of utimo for internal calculations */
943f841f6adSraf 	timespec_t *wait = NULL;
944f841f6adSraf 
945f841f6adSraf 	if (nent == 0 || *nwait == 0 || *nwait > nent) {
946f841f6adSraf 		errno = EINVAL;
947f841f6adSraf 		return (-1);
948f841f6adSraf 	}
949f841f6adSraf 
950f841f6adSraf 	/*
951f841f6adSraf 	 * Only one running aio_waitn call per process allowed.
952f841f6adSraf 	 * Further calls will be blocked here until the running
953f841f6adSraf 	 * call finishes.
954f841f6adSraf 	 */
955f841f6adSraf 
956f841f6adSraf 	sig_mutex_lock(&__aio_mutex);
957f841f6adSraf 
958f841f6adSraf 	while (_aio_flags & AIO_LIB_WAITN) {
959f841f6adSraf 		if (utimo && utimo->tv_sec == 0 && utimo->tv_nsec == 0) {
960f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
961f841f6adSraf 			*nwait = 0;
962f841f6adSraf 			return (0);
963f841f6adSraf 		}
964f841f6adSraf 		_aio_flags |= AIO_LIB_WAITN_PENDING;
965f841f6adSraf 		pthread_cleanup_push(sig_mutex_unlock, &__aio_mutex);
966f841f6adSraf 		error = sig_cond_wait(&_aio_waitn_cv, &__aio_mutex);
967f841f6adSraf 		pthread_cleanup_pop(0);
968f841f6adSraf 		if (error != 0) {
969f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
970f841f6adSraf 			*nwait = 0;
971f841f6adSraf 			errno = error;
972f841f6adSraf 			return (-1);
973f841f6adSraf 		}
974f841f6adSraf 	}
975f841f6adSraf 
976f841f6adSraf 	pthread_cleanup_push(_aio_waitn_cleanup, NULL);
977f841f6adSraf 
978f841f6adSraf 	_aio_flags |= AIO_LIB_WAITN;
979f841f6adSraf 
980f841f6adSraf 	if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
981f841f6adSraf 		error = -1;
982f841f6adSraf 		dnwait = 0;
983f841f6adSraf 		goto out;
984f841f6adSraf 	}
985f841f6adSraf 	if (timedwait != AIO_TIMEOUT_INDEF) {
986f841f6adSraf 		twait = *utimo;
987f841f6adSraf 		wait = &twait;
988f841f6adSraf 	}
989f841f6adSraf 
990f841f6adSraf 	/*
991f841f6adSraf 	 * If both counters are still set to zero, then only
992f841f6adSraf 	 * kernel requests are currently outstanding (raw-I/Os).
993f841f6adSraf 	 */
994f841f6adSraf 	if ((_aio_doneq_cnt + _aio_outstand_cnt) == 0) {
995f841f6adSraf 		for (;;) {
996f841f6adSraf 			kwaitcnt = *nwait - dnwait;
997f841f6adSraf 			knentcnt = nent - dnwait;
998f841f6adSraf 			if (knentcnt > AIO_WAITN_MAXIOCBS)
999f841f6adSraf 				knentcnt = AIO_WAITN_MAXIOCBS;
1000f841f6adSraf 			kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
1001f841f6adSraf 
1002f841f6adSraf 			pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
1003f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
1004f841f6adSraf 			_cancel_prologue();
1005f841f6adSraf 			error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
1006f841f6adSraf 			    &kwaitcnt, wait);
1007f841f6adSraf 			_cancel_epilogue();
1008f841f6adSraf 			pthread_cleanup_pop(1);
1009f841f6adSraf 
1010f841f6adSraf 			if (error == 0) {
1011f841f6adSraf 				dnwait += kwaitcnt;
1012f841f6adSraf 				if (dnwait >= *nwait ||
1013f841f6adSraf 				    *nwait < AIO_WAITN_MAXIOCBS)
1014f841f6adSraf 					break;
1015f841f6adSraf 				if (timedwait == AIO_TIMEOUT_WAIT) {
1016f841f6adSraf 					error = _aio_get_timedelta(&end, wait);
1017f841f6adSraf 					if (error ==  -1) {
1018f841f6adSraf 						/* timer expired */
1019f841f6adSraf 						errno = ETIME;
1020f841f6adSraf 						break;
1021f841f6adSraf 					}
1022f841f6adSraf 				}
1023f841f6adSraf 				continue;
1024f841f6adSraf 			}
1025f841f6adSraf 			if (errno == EAGAIN) {
1026f841f6adSraf 				if (dnwait > 0)
1027f841f6adSraf 					error = 0;
1028f841f6adSraf 				break;
1029f841f6adSraf 			}
1030f841f6adSraf 			if (errno == ETIME || errno == EINTR) {
1031f841f6adSraf 				dnwait += kwaitcnt;
1032f841f6adSraf 				break;
1033f841f6adSraf 			}
1034f841f6adSraf 			/* fatal error */
1035f841f6adSraf 			break;
1036f841f6adSraf 		}
1037f841f6adSraf 
1038f841f6adSraf 		goto out;
1039f841f6adSraf 	}
1040f841f6adSraf 
1041f841f6adSraf 	/* File system I/Os outstanding ... */
1042f841f6adSraf 
1043f841f6adSraf 	if (timedwait == AIO_TIMEOUT_UNDEF) {
1044f841f6adSraf 		if (_aio_check_timeout(utimo, &end, &timedwait) != 0) {
1045f841f6adSraf 			error = -1;
1046f841f6adSraf 			dnwait = 0;
1047f841f6adSraf 			goto out;
1048f841f6adSraf 		}
1049f841f6adSraf 		if (timedwait != AIO_TIMEOUT_INDEF) {
1050f841f6adSraf 			twait = *utimo;
1051f841f6adSraf 			wait = &twait;
1052f841f6adSraf 		}
1053f841f6adSraf 	}
1054f841f6adSraf 
1055f841f6adSraf 	for (;;) {
1056f841f6adSraf 		uint_t	sum_reqs;
1057f841f6adSraf 
1058f841f6adSraf 		/*
1059f841f6adSraf 		 * Calculate sum of active non RAW-IO requests (sum_reqs).
1060f841f6adSraf 		 * If the expected amount of completed requests (*nwait) is
1061f841f6adSraf 		 * greater than the calculated sum (sum_reqs) then
1062f841f6adSraf 		 * use _kaio to check pending RAW-IO requests.
1063f841f6adSraf 		 */
1064f841f6adSraf 		sum_reqs = _aio_doneq_cnt + dnwait + _aio_outstand_cnt;
1065f841f6adSraf 		kwaitcnt = (*nwait > sum_reqs) ? *nwait - sum_reqs : 0;
1066f841f6adSraf 
1067f841f6adSraf 		if (kwaitcnt != 0) {
1068f841f6adSraf 			/* possibly some kernel I/Os outstanding */
1069f841f6adSraf 			knentcnt = nent - dnwait;
1070f841f6adSraf 			if (knentcnt > AIO_WAITN_MAXIOCBS)
1071f841f6adSraf 				knentcnt = AIO_WAITN_MAXIOCBS;
1072f841f6adSraf 			kwaitcnt = (kwaitcnt > knentcnt) ? knentcnt : kwaitcnt;
1073f841f6adSraf 
1074f841f6adSraf 			_aio_flags |= AIO_WAIT_INPROGRESS;
1075f841f6adSraf 
1076f841f6adSraf 			pthread_cleanup_push(sig_mutex_lock, &__aio_mutex);
1077f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
1078f841f6adSraf 			_cancel_prologue();
1079f841f6adSraf 			error = (int)_kaio(AIOWAITN, &list[dnwait], knentcnt,
1080f841f6adSraf 			    &kwaitcnt, wait);
1081f841f6adSraf 			_cancel_epilogue();
1082f841f6adSraf 			pthread_cleanup_pop(1);
1083f841f6adSraf 
1084f841f6adSraf 			_aio_flags &= ~AIO_WAIT_INPROGRESS;
1085f841f6adSraf 
1086f841f6adSraf 			if (error == 0) {
1087f841f6adSraf 				dnwait += kwaitcnt;
1088f841f6adSraf 			} else {
1089f841f6adSraf 				switch (errno) {
1090f841f6adSraf 				case EINVAL:
1091f841f6adSraf 				case EAGAIN:
1092f841f6adSraf 					/* don't wait for kernel I/Os */
1093f841f6adSraf 					kerrno = 0; /* ignore _kaio() errno */
1094f841f6adSraf 					*nwait = _aio_doneq_cnt +
1095f841f6adSraf 					    _aio_outstand_cnt + dnwait;
1096f841f6adSraf 					error = 0;
1097f841f6adSraf 					break;
1098f841f6adSraf 				case EINTR:
1099f841f6adSraf 				case ETIME:
1100f841f6adSraf 					/* just scan for completed LIB I/Os */
1101f841f6adSraf 					dnwait += kwaitcnt;
1102f841f6adSraf 					timedwait = AIO_TIMEOUT_POLL;
1103f841f6adSraf 					kerrno = errno;	/* save _kaio() errno */
1104f841f6adSraf 					error = 0;
1105f841f6adSraf 					break;
1106f841f6adSraf 				default:
1107f841f6adSraf 					kerrno = errno;	/* save _kaio() errno */
1108f841f6adSraf 					break;
1109f841f6adSraf 				}
1110f841f6adSraf 			}
1111f841f6adSraf 			if (error)
1112f841f6adSraf 				break;		/* fatal kernel error */
1113f841f6adSraf 		}
1114f841f6adSraf 
1115f841f6adSraf 		/* check completed FS requests in the "done" queue */
1116f841f6adSraf 
1117f841f6adSraf 		while (_aio_doneq_cnt && dnwait < nent) {
1118f841f6adSraf 			/* get done requests */
1119f841f6adSraf 			if ((reqp = _aio_req_remove(NULL)) != NULL) {
1120f841f6adSraf 				(void) _aio_hash_del(reqp->req_resultp);
1121f841f6adSraf 				list[dnwait++] = reqp->req_aiocbp;
1122f841f6adSraf 				_aio_req_mark_done(reqp);
1123f841f6adSraf 				_lio_remove(reqp);
1124f841f6adSraf 				_aio_req_free(reqp);
1125f841f6adSraf 			}
1126f841f6adSraf 		}
1127f841f6adSraf 
1128f841f6adSraf 		if (dnwait >= *nwait) {
1129f841f6adSraf 			/* min. requested amount of completed I/Os satisfied */
1130f841f6adSraf 			break;
1131f841f6adSraf 		}
1132f841f6adSraf 		if (timedwait == AIO_TIMEOUT_WAIT &&
1133f841f6adSraf 		    (error = _aio_get_timedelta(&end, wait)) == -1) {
1134f841f6adSraf 			/* timer expired */
1135f841f6adSraf 			uerrno = ETIME;
1136f841f6adSraf 			break;
1137f841f6adSraf 		}
1138f841f6adSraf 
1139f841f6adSraf 		/*
1140f841f6adSraf 		 * If some I/Os are outstanding and we have to wait for them,
1141f841f6adSraf 		 * then sleep here.  _aiodone() will call _aio_waitn_wakeup()
1142f841f6adSraf 		 * to wakeup this thread as soon as the required amount of
1143f841f6adSraf 		 * completed I/Os is done.
1144f841f6adSraf 		 */
1145f841f6adSraf 		if (_aio_outstand_cnt > 0 && timedwait != AIO_TIMEOUT_POLL) {
1146f841f6adSraf 			/*
1147f841f6adSraf 			 * _aio_waitn_wakeup() will wake up this thread when:
1148f841f6adSraf 			 * - _aio_waitncnt requests are completed or
1149f841f6adSraf 			 * - _aio_outstand_cnt becomes zero.
1150f841f6adSraf 			 * sig_cond_reltimedwait() could also return with
1151f841f6adSraf 			 * a timeout error (ETIME).
1152f841f6adSraf 			 */
1153f841f6adSraf 			if (*nwait < _aio_outstand_cnt)
1154f841f6adSraf 				_aio_waitncnt = *nwait;
1155f841f6adSraf 			else
1156f841f6adSraf 				_aio_waitncnt = _aio_outstand_cnt;
1157f841f6adSraf 
1158f841f6adSraf 			_aio_flags |= AIO_IO_WAITING;
1159f841f6adSraf 
1160f841f6adSraf 			if (wait)
1161f841f6adSraf 				uerrno = sig_cond_reltimedwait(&_aio_iowait_cv,
1162f841f6adSraf 				    &__aio_mutex, wait);
1163f841f6adSraf 			else
1164f841f6adSraf 				uerrno = sig_cond_wait(&_aio_iowait_cv,
1165f841f6adSraf 				    &__aio_mutex);
1166f841f6adSraf 
1167f841f6adSraf 			_aio_flags &= ~AIO_IO_WAITING;
1168f841f6adSraf 
1169f841f6adSraf 			if (uerrno == ETIME) {
1170f841f6adSraf 				timedwait = AIO_TIMEOUT_POLL;
1171f841f6adSraf 				continue;
1172f841f6adSraf 			}
1173f841f6adSraf 			if (uerrno != 0)
1174f841f6adSraf 				timedwait = AIO_TIMEOUT_POLL;
1175f841f6adSraf 		}
1176f841f6adSraf 
1177f841f6adSraf 		if (timedwait == AIO_TIMEOUT_POLL) {
1178f841f6adSraf 			/* polling or timer expired */
1179f841f6adSraf 			break;
1180f841f6adSraf 		}
1181f841f6adSraf 	}
1182f841f6adSraf 
1183f841f6adSraf 	errno = uerrno == 0 ? kerrno : uerrno;
1184f841f6adSraf 	if (errno)
1185f841f6adSraf 		error = -1;
1186f841f6adSraf 	else
1187f841f6adSraf 		error = 0;
1188f841f6adSraf 
1189f841f6adSraf out:
1190f841f6adSraf 	*nwait = dnwait;
1191f841f6adSraf 
1192f841f6adSraf 	pthread_cleanup_pop(1);		/* drops __aio_mutex */
1193f841f6adSraf 
1194f841f6adSraf 	return (error);
1195f841f6adSraf }
1196f841f6adSraf 
1197f841f6adSraf int
aio_waitn(aiocb_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)1198f841f6adSraf aio_waitn(aiocb_t *list[], uint_t nent, uint_t *nwait,
1199f841f6adSraf     const timespec_t *timeout)
1200f841f6adSraf {
1201f841f6adSraf 	return (__aio_waitn((void **)list, nent, nwait, timeout));
1202f841f6adSraf }
1203f841f6adSraf 
1204f841f6adSraf void
_aio_waitn_wakeup(void)1205f841f6adSraf _aio_waitn_wakeup(void)
1206f841f6adSraf {
1207f841f6adSraf 	/*
1208f841f6adSraf 	 * __aio_waitn() sets AIO_IO_WAITING to notify _aiodone() that
1209f841f6adSraf 	 * it is waiting for completed I/Os. The number of required
1210f841f6adSraf 	 * completed I/Os is stored into "_aio_waitncnt".
1211f841f6adSraf 	 * aio_waitn() is woken up when
1212f841f6adSraf 	 * - there are no further outstanding I/Os
1213f841f6adSraf 	 *   (_aio_outstand_cnt == 0) or
1214f841f6adSraf 	 * - the expected number of I/Os has completed.
1215f841f6adSraf 	 * Only one __aio_waitn() function waits for completed I/Os at
1216f841f6adSraf 	 * a time.
1217f841f6adSraf 	 *
1218f841f6adSraf 	 * __aio_suspend() increments "_aio_suscv_cnt" to notify
1219f841f6adSraf 	 * _aiodone() that at least one __aio_suspend() call is
1220f841f6adSraf 	 * waiting for completed I/Os.
1221f841f6adSraf 	 * There could be more than one __aio_suspend() function
1222f841f6adSraf 	 * waiting for completed I/Os. Because every function should
1223f841f6adSraf 	 * be waiting for different I/Os, _aiodone() has to wake up all
1224f841f6adSraf 	 * __aio_suspend() functions each time.
1225f841f6adSraf 	 * Every __aio_suspend() function will compare the recently
1226f841f6adSraf 	 * completed I/O with its own list.
1227f841f6adSraf 	 */
1228f841f6adSraf 	ASSERT(MUTEX_HELD(&__aio_mutex));
1229f841f6adSraf 	if (_aio_flags & AIO_IO_WAITING) {
1230f841f6adSraf 		if (_aio_waitncnt > 0)
1231f841f6adSraf 			_aio_waitncnt--;
1232f841f6adSraf 		if (_aio_outstand_cnt == 0 || _aio_waitncnt == 0 ||
1233f841f6adSraf 		    _aio_suscv_cnt > 0)
1234f841f6adSraf 			(void) cond_broadcast(&_aio_iowait_cv);
1235f841f6adSraf 	} else {
1236f841f6adSraf 		/* Wake up waiting aio_suspend calls */
1237f841f6adSraf 		if (_aio_suscv_cnt > 0)
1238f841f6adSraf 			(void) cond_broadcast(&_aio_iowait_cv);
1239f841f6adSraf 	}
1240f841f6adSraf }
1241f841f6adSraf 
1242f841f6adSraf /*
1243f841f6adSraf  * timedwait values :
1244f841f6adSraf  * AIO_TIMEOUT_POLL	: polling
1245f841f6adSraf  * AIO_TIMEOUT_WAIT	: timeout
1246f841f6adSraf  * AIO_TIMEOUT_INDEF	: wait indefinitely
1247f841f6adSraf  */
1248f841f6adSraf static int
_aio_check_timeout(const timespec_t * utimo,timespec_t * end,int * timedwait)1249f841f6adSraf _aio_check_timeout(const timespec_t *utimo, timespec_t *end, int *timedwait)
1250f841f6adSraf {
1251f841f6adSraf 	struct	timeval	curtime;
1252f841f6adSraf 
1253f841f6adSraf 	if (utimo) {
1254f841f6adSraf 		if (utimo->tv_sec < 0 || utimo->tv_nsec < 0 ||
1255f841f6adSraf 		    utimo->tv_nsec >= NANOSEC) {
1256f841f6adSraf 			errno = EINVAL;
1257f841f6adSraf 			return (-1);
1258f841f6adSraf 		}
1259f841f6adSraf 		if (utimo->tv_sec > 0 || utimo->tv_nsec > 0) {
1260f841f6adSraf 			(void) gettimeofday(&curtime, NULL);
1261f841f6adSraf 			end->tv_sec = utimo->tv_sec + curtime.tv_sec;
1262f841f6adSraf 			end->tv_nsec = utimo->tv_nsec + 1000 * curtime.tv_usec;
1263f841f6adSraf 			if (end->tv_nsec >= NANOSEC) {
1264f841f6adSraf 				end->tv_nsec -= NANOSEC;
1265f841f6adSraf 				end->tv_sec += 1;
1266f841f6adSraf 			}
1267f841f6adSraf 			*timedwait = AIO_TIMEOUT_WAIT;
1268f841f6adSraf 		} else {
1269f841f6adSraf 			/* polling */
1270f841f6adSraf 			*timedwait = AIO_TIMEOUT_POLL;
1271f841f6adSraf 		}
1272f841f6adSraf 	} else {
1273f841f6adSraf 		*timedwait = AIO_TIMEOUT_INDEF;		/* wait indefinitely */
1274f841f6adSraf 	}
1275f841f6adSraf 	return (0);
1276f841f6adSraf }
1277f841f6adSraf 
1278f841f6adSraf #if !defined(_LP64)
1279f841f6adSraf 
1280f841f6adSraf int
aio_read64(aiocb64_t * aiocbp)1281f841f6adSraf aio_read64(aiocb64_t *aiocbp)
1282f841f6adSraf {
12836e628f27Sraf 	if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
1284f841f6adSraf 		errno = EINVAL;
1285f841f6adSraf 		return (-1);
1286f841f6adSraf 	}
1287f841f6adSraf 	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1288f841f6adSraf 		errno = EBUSY;
1289f841f6adSraf 		return (-1);
1290f841f6adSraf 	}
1291f841f6adSraf 	if (_aio_sigev_thread64(aiocbp) != 0)
1292f841f6adSraf 		return (-1);
1293f841f6adSraf 	aiocbp->aio_lio_opcode = LIO_READ;
1294f841f6adSraf 	return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAREAD64,
1295f841f6adSraf 	    (AIO_KAIO | AIO_NO_DUPS)));
1296f841f6adSraf }
1297f841f6adSraf 
1298f841f6adSraf int
aio_write64(aiocb64_t * aiocbp)1299f841f6adSraf aio_write64(aiocb64_t *aiocbp)
1300f841f6adSraf {
13016e628f27Sraf 	if (aiocbp == NULL || aiocbp->aio_reqprio != 0) {
1302f841f6adSraf 		errno = EINVAL;
1303f841f6adSraf 		return (-1);
1304f841f6adSraf 	}
1305f841f6adSraf 	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1306f841f6adSraf 		errno = EBUSY;
1307f841f6adSraf 		return (-1);
1308f841f6adSraf 	}
1309f841f6adSraf 	if (_aio_sigev_thread64(aiocbp) != 0)
1310f841f6adSraf 		return (-1);
1311f841f6adSraf 	aiocbp->aio_lio_opcode = LIO_WRITE;
1312f841f6adSraf 	return (_aio_rw64(aiocbp, NULL, &__nextworker_rw, AIOAWRITE64,
1313f841f6adSraf 	    (AIO_KAIO | AIO_NO_DUPS)));
1314f841f6adSraf }
1315f841f6adSraf 
1316f841f6adSraf int
lio_listio64(int mode,aiocb64_t * _RESTRICT_KYWD const * _RESTRICT_KYWD list,int nent,struct sigevent * _RESTRICT_KYWD sigevp)1317f841f6adSraf lio_listio64(int mode, aiocb64_t *_RESTRICT_KYWD const *_RESTRICT_KYWD list,
1318f841f6adSraf     int nent, struct sigevent *_RESTRICT_KYWD sigevp)
1319f841f6adSraf {
1320f841f6adSraf 	int		aio_ufs = 0;
1321f841f6adSraf 	int		oerrno = 0;
1322f841f6adSraf 	aio_lio_t	*head = NULL;
1323f841f6adSraf 	aiocb64_t	*aiocbp;
1324f841f6adSraf 	int		state = 0;
1325f841f6adSraf 	int		EIOflg = 0;
1326f841f6adSraf 	int		rw;
1327f841f6adSraf 	int		do_kaio = 0;
1328f841f6adSraf 	int		error;
1329f841f6adSraf 	int		i;
1330f841f6adSraf 
1331f841f6adSraf 	if (!_kaio_ok)
1332f841f6adSraf 		_kaio_init();
1333f841f6adSraf 
1334f841f6adSraf 	if (aio_list_max == 0)
1335f841f6adSraf 		aio_list_max = sysconf(_SC_AIO_LISTIO_MAX);
1336f841f6adSraf 
1337f841f6adSraf 	if (nent <= 0 || nent > aio_list_max) {
1338f841f6adSraf 		errno = EINVAL;
1339f841f6adSraf 		return (-1);
1340f841f6adSraf 	}
1341f841f6adSraf 
1342f841f6adSraf 	switch (mode) {
1343f841f6adSraf 	case LIO_WAIT:
1344f841f6adSraf 		state = NOCHECK;
1345f841f6adSraf 		break;
1346f841f6adSraf 	case LIO_NOWAIT:
1347f841f6adSraf 		state = CHECK;
1348f841f6adSraf 		break;
1349f841f6adSraf 	default:
1350f841f6adSraf 		errno = EINVAL;
1351f841f6adSraf 		return (-1);
1352f841f6adSraf 	}
1353f841f6adSraf 
1354f841f6adSraf 	for (i = 0; i < nent; i++) {
1355f841f6adSraf 		if ((aiocbp = list[i]) == NULL)
1356f841f6adSraf 			continue;
1357f841f6adSraf 		if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1358f841f6adSraf 			errno = EBUSY;
1359f841f6adSraf 			return (-1);
1360f841f6adSraf 		}
1361f841f6adSraf 		if (_aio_sigev_thread64(aiocbp) != 0)
1362f841f6adSraf 			return (-1);
1363f841f6adSraf 		if (aiocbp->aio_lio_opcode == LIO_NOP)
1364f841f6adSraf 			aiocbp->aio_state = NOCHECK;
1365f841f6adSraf 		else {
1366f841f6adSraf 			aiocbp->aio_state = state;
1367f841f6adSraf 			if (KAIO_SUPPORTED(aiocbp->aio_fildes))
1368f841f6adSraf 				do_kaio++;
1369f841f6adSraf 			else
1370f841f6adSraf 				aiocbp->aio_resultp.aio_errno = ENOTSUP;
1371f841f6adSraf 		}
1372f841f6adSraf 	}
1373f841f6adSraf 	if (_aio_sigev_thread_init(sigevp) != 0)
1374f841f6adSraf 		return (-1);
1375f841f6adSraf 
1376f841f6adSraf 	if (do_kaio) {
1377f841f6adSraf 		error = (int)_kaio(AIOLIO64, mode, list, nent, sigevp);
1378f841f6adSraf 		if (error == 0)
1379f841f6adSraf 			return (0);
1380f841f6adSraf 		oerrno = errno;
1381f841f6adSraf 	} else {
1382f841f6adSraf 		oerrno = errno = ENOTSUP;
1383f841f6adSraf 		error = -1;
1384f841f6adSraf 	}
1385f841f6adSraf 
1386f841f6adSraf 	if (error == -1 && errno == ENOTSUP) {
1387f841f6adSraf 		error = errno = 0;
1388f841f6adSraf 		/*
1389f841f6adSraf 		 * If LIO_WAIT, or notification required, allocate a list head.
1390f841f6adSraf 		 */
1391f841f6adSraf 		if (mode == LIO_WAIT ||
1392f841f6adSraf 		    (sigevp != NULL &&
1393f841f6adSraf 		    (sigevp->sigev_notify == SIGEV_SIGNAL ||
1394f841f6adSraf 		    sigevp->sigev_notify == SIGEV_THREAD ||
1395f841f6adSraf 		    sigevp->sigev_notify == SIGEV_PORT)))
1396f841f6adSraf 			head = _aio_lio_alloc();
1397f841f6adSraf 		if (head) {
1398f841f6adSraf 			sig_mutex_lock(&head->lio_mutex);
1399f841f6adSraf 			head->lio_mode = mode;
1400f841f6adSraf 			head->lio_largefile = 1;
1401f841f6adSraf 			if (mode == LIO_NOWAIT && sigevp != NULL) {
1402f841f6adSraf 				if (sigevp->sigev_notify == SIGEV_THREAD) {
1403f841f6adSraf 					head->lio_port = sigevp->sigev_signo;
1404f841f6adSraf 					head->lio_event = AIOLIO64;
1405f841f6adSraf 					head->lio_sigevent = sigevp;
1406f841f6adSraf 					head->lio_sigval.sival_ptr =
1407f841f6adSraf 					    sigevp->sigev_value.sival_ptr;
1408f841f6adSraf 				} else if (sigevp->sigev_notify == SIGEV_PORT) {
1409f841f6adSraf 					port_notify_t *pn =
1410f841f6adSraf 					    sigevp->sigev_value.sival_ptr;
1411f841f6adSraf 					head->lio_port = pn->portnfy_port;
1412f841f6adSraf 					head->lio_event = AIOLIO64;
1413f841f6adSraf 					head->lio_sigevent = sigevp;
1414f841f6adSraf 					head->lio_sigval.sival_ptr =
1415f841f6adSraf 					    pn->portnfy_user;
1416f841f6adSraf 				} else {	/* SIGEV_SIGNAL */
1417f841f6adSraf 					head->lio_signo = sigevp->sigev_signo;
1418f841f6adSraf 					head->lio_sigval.sival_ptr =
1419f841f6adSraf 					    sigevp->sigev_value.sival_ptr;
1420f841f6adSraf 				}
1421f841f6adSraf 			}
1422f841f6adSraf 			head->lio_nent = head->lio_refcnt = nent;
1423f841f6adSraf 			sig_mutex_unlock(&head->lio_mutex);
1424f841f6adSraf 		}
1425f841f6adSraf 		/*
1426f841f6adSraf 		 * find UFS requests, errno == ENOTSUP/EBADFD,
1427f841f6adSraf 		 */
1428f841f6adSraf 		for (i = 0; i < nent; i++) {
1429f841f6adSraf 			if ((aiocbp = list[i]) == NULL ||
1430f841f6adSraf 			    aiocbp->aio_lio_opcode == LIO_NOP ||
1431f841f6adSraf 			    (aiocbp->aio_resultp.aio_errno != ENOTSUP &&
1432f841f6adSraf 			    aiocbp->aio_resultp.aio_errno != EBADFD)) {
1433f841f6adSraf 				if (head)
1434f841f6adSraf 					_lio_list_decr(head);
1435f841f6adSraf 				continue;
1436f841f6adSraf 			}
1437f841f6adSraf 			if (aiocbp->aio_resultp.aio_errno == EBADFD)
1438f841f6adSraf 				SET_KAIO_NOT_SUPPORTED(aiocbp->aio_fildes);
14396e628f27Sraf 			if (aiocbp->aio_reqprio != 0) {
1440f841f6adSraf 				aiocbp->aio_resultp.aio_errno = EINVAL;
1441f841f6adSraf 				aiocbp->aio_resultp.aio_return = -1;
1442f841f6adSraf 				EIOflg = 1;
1443f841f6adSraf 				if (head)
1444f841f6adSraf 					_lio_list_decr(head);
1445f841f6adSraf 				continue;
1446f841f6adSraf 			}
1447f841f6adSraf 			/*
1448f841f6adSraf 			 * submit an AIO request with flags AIO_NO_KAIO
1449f841f6adSraf 			 * to avoid the kaio() syscall in _aio_rw()
1450f841f6adSraf 			 */
1451f841f6adSraf 			switch (aiocbp->aio_lio_opcode) {
1452f841f6adSraf 			case LIO_READ:
1453f841f6adSraf 				rw = AIOAREAD64;
1454f841f6adSraf 				break;
1455f841f6adSraf 			case LIO_WRITE:
1456f841f6adSraf 				rw = AIOAWRITE64;
1457f841f6adSraf 				break;
1458f841f6adSraf 			}
1459f841f6adSraf 			error = _aio_rw64(aiocbp, head, &__nextworker_rw, rw,
1460f841f6adSraf 			    (AIO_NO_KAIO | AIO_NO_DUPS));
1461f841f6adSraf 			if (error == 0)
1462f841f6adSraf 				aio_ufs++;
1463f841f6adSraf 			else {
1464f841f6adSraf 				if (head)
1465f841f6adSraf 					_lio_list_decr(head);
1466f841f6adSraf 				aiocbp->aio_resultp.aio_errno = error;
1467f841f6adSraf 				EIOflg = 1;
1468f841f6adSraf 			}
1469f841f6adSraf 		}
1470f841f6adSraf 	}
1471f841f6adSraf 	if (EIOflg) {
1472f841f6adSraf 		errno = EIO;
1473f841f6adSraf 		return (-1);
1474f841f6adSraf 	}
1475f841f6adSraf 	if (mode == LIO_WAIT && oerrno == ENOTSUP) {
1476f841f6adSraf 		/*
1477f841f6adSraf 		 * call kaio(AIOLIOWAIT) to get all outstanding
1478f841f6adSraf 		 * kernel AIO requests
1479f841f6adSraf 		 */
1480f841f6adSraf 		if ((nent - aio_ufs) > 0)
1481f841f6adSraf 			(void) _kaio(AIOLIOWAIT, mode, list, nent, sigevp);
1482f841f6adSraf 		if (head != NULL && head->lio_nent > 0) {
1483f841f6adSraf 			sig_mutex_lock(&head->lio_mutex);
1484f841f6adSraf 			while (head->lio_refcnt > 0) {
1485f841f6adSraf 				int err;
1486f841f6adSraf 				head->lio_waiting = 1;
1487f841f6adSraf 				pthread_cleanup_push(_lio_listio_cleanup, head);
1488f841f6adSraf 				err = sig_cond_wait(&head->lio_cond_cv,
1489f841f6adSraf 				    &head->lio_mutex);
1490f841f6adSraf 				pthread_cleanup_pop(0);
1491f841f6adSraf 				head->lio_waiting = 0;
1492f841f6adSraf 				if (err && head->lio_nent > 0) {
1493f841f6adSraf 					sig_mutex_unlock(&head->lio_mutex);
1494f841f6adSraf 					errno = err;
1495f841f6adSraf 					return (-1);
1496f841f6adSraf 				}
1497f841f6adSraf 			}
1498f841f6adSraf 			sig_mutex_unlock(&head->lio_mutex);
1499f841f6adSraf 			ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
1500f841f6adSraf 			_aio_lio_free(head);
1501f841f6adSraf 			for (i = 0; i < nent; i++) {
1502f841f6adSraf 				if ((aiocbp = list[i]) != NULL &&
1503f841f6adSraf 				    aiocbp->aio_resultp.aio_errno) {
1504f841f6adSraf 					errno = EIO;
1505f841f6adSraf 					return (-1);
1506f841f6adSraf 				}
1507f841f6adSraf 			}
1508f841f6adSraf 		}
1509f841f6adSraf 		return (0);
1510f841f6adSraf 	}
1511f841f6adSraf 	return (error);
1512f841f6adSraf }
1513f841f6adSraf 
1514f841f6adSraf int
aio_suspend64(const aiocb64_t * const list[],int nent,const timespec_t * timeout)1515f841f6adSraf aio_suspend64(const aiocb64_t * const list[], int nent,
1516f841f6adSraf     const timespec_t *timeout)
1517f841f6adSraf {
1518f841f6adSraf 	return (__aio_suspend((void **)list, nent, timeout, 1));
1519f841f6adSraf }
1520f841f6adSraf 
1521f841f6adSraf int
aio_error64(const aiocb64_t * aiocbp)1522f841f6adSraf aio_error64(const aiocb64_t *aiocbp)
1523f841f6adSraf {
1524f841f6adSraf 	const aio_result_t *resultp = &aiocbp->aio_resultp;
1525f841f6adSraf 	int error;
1526f841f6adSraf 
1527f841f6adSraf 	if ((error = resultp->aio_errno) == EINPROGRESS) {
1528f841f6adSraf 		if (aiocbp->aio_state == CHECK) {
1529f841f6adSraf 			/*
1530f841f6adSraf 			 * Always do the kaio() call without using the
1531f841f6adSraf 			 * KAIO_SUPPORTED() checks because it is not
1532f841f6adSraf 			 * mandatory to have a valid fd set in the
1533f841f6adSraf 			 * aiocb, only the resultp must be set.
1534f841f6adSraf 			 */
1535f841f6adSraf 			if ((int)_kaio(AIOERROR64, aiocbp) == EINVAL) {
1536f841f6adSraf 				errno = EINVAL;
1537f841f6adSraf 				return (-1);
1538f841f6adSraf 			}
1539f841f6adSraf 			error = resultp->aio_errno;
1540f841f6adSraf 		} else if (aiocbp->aio_state == CHECKED) {
1541f841f6adSraf 			((aiocb64_t *)aiocbp)->aio_state = CHECK;
1542f841f6adSraf 		}
1543f841f6adSraf 	}
1544f841f6adSraf 	return (error);
1545f841f6adSraf }
1546f841f6adSraf 
1547f841f6adSraf ssize_t
aio_return64(aiocb64_t * aiocbp)1548f841f6adSraf aio_return64(aiocb64_t *aiocbp)
1549f841f6adSraf {
1550f841f6adSraf 	aio_result_t *resultp = &aiocbp->aio_resultp;
1551f841f6adSraf 	aio_req_t *reqp;
1552f841f6adSraf 	int error;
1553f841f6adSraf 	ssize_t retval;
1554f841f6adSraf 
1555f841f6adSraf 	/*
1556f841f6adSraf 	 * The _aiodone() function stores resultp->aio_return before
1557f841f6adSraf 	 * storing resultp->aio_errno (with an membar_producer() in
1558f841f6adSraf 	 * between).  We use membar_consumer() below to ensure proper
1559f841f6adSraf 	 * memory ordering between _aiodone() and ourself.
1560f841f6adSraf 	 */
1561f841f6adSraf 	error = resultp->aio_errno;
1562f841f6adSraf 	membar_consumer();
1563f841f6adSraf 	retval = resultp->aio_return;
1564f841f6adSraf 
1565f841f6adSraf 	/*
1566f841f6adSraf 	 * we use this condition to indicate either that
1567f841f6adSraf 	 * aio_return() has been called before or should
1568f841f6adSraf 	 * not have been called yet.
1569f841f6adSraf 	 */
1570f841f6adSraf 	if ((retval == -1 && error == EINVAL) || error == EINPROGRESS) {
1571f841f6adSraf 		errno = error;
1572f841f6adSraf 		return (-1);
1573f841f6adSraf 	}
1574f841f6adSraf 
1575f841f6adSraf 	/*
1576f841f6adSraf 	 * Before we return, mark the result as being returned so that later
1577f841f6adSraf 	 * calls to aio_return() will return the fact that the result has
1578f841f6adSraf 	 * already been returned.
1579f841f6adSraf 	 */
1580f841f6adSraf 	sig_mutex_lock(&__aio_mutex);
1581f841f6adSraf 	/* retest, in case more than one thread actually got in here */
1582f841f6adSraf 	if (resultp->aio_return == -1 && resultp->aio_errno == EINVAL) {
1583f841f6adSraf 		sig_mutex_unlock(&__aio_mutex);
1584f841f6adSraf 		errno = EINVAL;
1585f841f6adSraf 		return (-1);
1586f841f6adSraf 	}
1587f841f6adSraf 	resultp->aio_return = -1;
1588f841f6adSraf 	resultp->aio_errno = EINVAL;
1589f841f6adSraf 	if ((reqp = _aio_hash_del(resultp)) == NULL)
1590f841f6adSraf 		sig_mutex_unlock(&__aio_mutex);
1591f841f6adSraf 	else {
1592f841f6adSraf 		aiocbp->aio_state = NOCHECK;
1593f841f6adSraf 		ASSERT(reqp->req_head == NULL);
1594f841f6adSraf 		(void) _aio_req_remove(reqp);
1595f841f6adSraf 		sig_mutex_unlock(&__aio_mutex);
1596f841f6adSraf 		_aio_req_free(reqp);
1597f841f6adSraf 	}
1598f841f6adSraf 
1599f841f6adSraf 	if (retval == -1)
1600f841f6adSraf 		errno = error;
1601f841f6adSraf 	return (retval);
1602f841f6adSraf }
1603f841f6adSraf 
1604f841f6adSraf static int
__aio_fsync_bar64(aiocb64_t * aiocbp,aio_lio_t * head,aio_worker_t * aiowp,int workerscnt)1605f841f6adSraf __aio_fsync_bar64(aiocb64_t *aiocbp, aio_lio_t *head, aio_worker_t *aiowp,
1606f841f6adSraf     int workerscnt)
1607f841f6adSraf {
1608f841f6adSraf 	int i;
1609f841f6adSraf 	int error;
1610f841f6adSraf 	aio_worker_t *next = aiowp;
1611f841f6adSraf 
1612f841f6adSraf 	for (i = 0; i < workerscnt; i++) {
1613f841f6adSraf 		error = _aio_rw64(aiocbp, head, &next, AIOFSYNC, AIO_NO_KAIO);
1614f841f6adSraf 		if (error != 0) {
1615f841f6adSraf 			sig_mutex_lock(&head->lio_mutex);
1616f841f6adSraf 			head->lio_mode = LIO_DESTROY;	/* ignore fsync */
1617f841f6adSraf 			head->lio_nent -= workerscnt - i;
1618f841f6adSraf 			head->lio_refcnt -= workerscnt - i;
1619f841f6adSraf 			sig_mutex_unlock(&head->lio_mutex);
1620f841f6adSraf 			errno = EAGAIN;
1621f841f6adSraf 			return (i);
1622f841f6adSraf 		}
1623f841f6adSraf 		next = next->work_forw;
1624f841f6adSraf 	}
1625f841f6adSraf 	return (i);
1626f841f6adSraf }
1627f841f6adSraf 
1628f841f6adSraf int
aio_fsync64(int op,aiocb64_t * aiocbp)1629f841f6adSraf aio_fsync64(int op, aiocb64_t *aiocbp)
1630f841f6adSraf {
1631f841f6adSraf 	aio_lio_t *head;
16327e65cb05SArindam Sarkar 	struct stat64 statb;
1633f841f6adSraf 	int fret;
1634f841f6adSraf 
1635f841f6adSraf 	if (aiocbp == NULL)
1636f841f6adSraf 		return (0);
16376e628f27Sraf 	if (op != O_DSYNC && op != O_SYNC) {
1638f841f6adSraf 		errno = EINVAL;
1639f841f6adSraf 		return (-1);
1640f841f6adSraf 	}
1641f841f6adSraf 	if (_aio_hash_find(&aiocbp->aio_resultp) != NULL) {
1642f841f6adSraf 		errno = EBUSY;
1643f841f6adSraf 		return (-1);
1644f841f6adSraf 	}
16457e65cb05SArindam Sarkar 	if (fstat64(aiocbp->aio_fildes, &statb) < 0)
1646f841f6adSraf 		return (-1);
1647f841f6adSraf 	if (_aio_sigev_thread64(aiocbp) != 0)
1648f841f6adSraf 		return (-1);
1649f841f6adSraf 
1650f841f6adSraf 	/*
1651f841f6adSraf 	 * Kernel aio_fsync() is not supported.
1652f841f6adSraf 	 * We force user-level aio_fsync() just
1653f841f6adSraf 	 * for the notification side-effect.
1654f841f6adSraf 	 */
1655f841f6adSraf 	if (!__uaio_ok && __uaio_init() == -1)
1656f841f6adSraf 		return (-1);
1657f841f6adSraf 
1658f841f6adSraf 	/*
1659f841f6adSraf 	 * The first asynchronous I/O request in the current process will
1660f841f6adSraf 	 * create a bunch of workers (via __uaio_init()).  If the number
1661f841f6adSraf 	 * of workers is zero then the number of pending asynchronous I/O
1662f841f6adSraf 	 * requests is zero.  In such a case only execute the standard
1663*4b9db4f6SChris Fraire 	 * fsync(3C) or fdatasync(3C) as appropriate.
1664f841f6adSraf 	 */
1665f841f6adSraf 	if (__rw_workerscnt == 0) {
1666f841f6adSraf 		if (op == O_DSYNC)
16674763305eSRobert Mustacchi 			return (__fdsync(aiocbp->aio_fildes, FDSYNC_DATA));
1668f841f6adSraf 		else
16694763305eSRobert Mustacchi 			return (__fdsync(aiocbp->aio_fildes, FDSYNC_FILE));
1670f841f6adSraf 	}
1671f841f6adSraf 
1672f841f6adSraf 	/*
1673f841f6adSraf 	 * re-use aio_offset as the op field.
1674f841f6adSraf 	 *	O_DSYNC - fdatasync()
1675f841f6adSraf 	 *	O_SYNC - fsync()
1676f841f6adSraf 	 */
1677f841f6adSraf 	aiocbp->aio_offset = op;
1678f841f6adSraf 	aiocbp->aio_lio_opcode = AIOFSYNC;
1679f841f6adSraf 
1680f841f6adSraf 	/*
1681f841f6adSraf 	 * Create a list of fsync requests.  The worker that
1682f841f6adSraf 	 * gets the last request will do the fsync request.
1683f841f6adSraf 	 */
1684f841f6adSraf 	head = _aio_lio_alloc();
1685f841f6adSraf 	if (head == NULL) {
1686f841f6adSraf 		errno = EAGAIN;
1687f841f6adSraf 		return (-1);
1688f841f6adSraf 	}
1689f841f6adSraf 	head->lio_mode = LIO_FSYNC;
1690f841f6adSraf 	head->lio_nent = head->lio_refcnt = __rw_workerscnt;
1691f841f6adSraf 	head->lio_largefile = 1;
1692f841f6adSraf 
1693f841f6adSraf 	/*
1694f841f6adSraf 	 * Insert an fsync request on every worker's queue.
1695f841f6adSraf 	 */
1696f841f6adSraf 	fret = __aio_fsync_bar64(aiocbp, head, __workers_rw, __rw_workerscnt);
1697f841f6adSraf 	if (fret != __rw_workerscnt) {
1698f841f6adSraf 		/*
1699f841f6adSraf 		 * Fewer fsync requests than workers means that it was
1700f841f6adSraf 		 * not possible to submit fsync requests to all workers.
1701f841f6adSraf 		 * Actions:
1702f841f6adSraf 		 * a) number of fsync requests submitted is 0:
1703f841f6adSraf 		 *    => free allocated memory (aio_lio_t).
1704f841f6adSraf 		 * b) number of fsync requests submitted is > 0:
1705f841f6adSraf 		 *    => the last worker executing the fsync request
1706f841f6adSraf 		 *	 will free the aio_lio_t struct.
1707f841f6adSraf 		 */
1708f841f6adSraf 		if (fret == 0)
1709f841f6adSraf 			_aio_lio_free(head);
1710f841f6adSraf 		return (-1);
1711f841f6adSraf 	}
1712f841f6adSraf 	return (0);
1713f841f6adSraf }
1714f841f6adSraf 
1715f841f6adSraf int
aio_cancel64(int fd,aiocb64_t * aiocbp)1716f841f6adSraf aio_cancel64(int fd, aiocb64_t *aiocbp)
1717f841f6adSraf {
1718f841f6adSraf 	aio_req_t *reqp;
1719f841f6adSraf 	aio_worker_t *aiowp;
1720f841f6adSraf 	int done = 0;
1721f841f6adSraf 	int canceled = 0;
17227e65cb05SArindam Sarkar 	struct stat64 buf;
1723f841f6adSraf 
17247e65cb05SArindam Sarkar 	if (fstat64(fd, &buf) < 0)
1725f841f6adSraf 		return (-1);
1726f841f6adSraf 
1727f841f6adSraf 	if (aiocbp != NULL) {
1728f841f6adSraf 		if (fd != aiocbp->aio_fildes) {
1729f841f6adSraf 			errno = EINVAL;
1730f841f6adSraf 			return (-1);
1731f841f6adSraf 		}
1732f841f6adSraf 		if (aiocbp->aio_state == USERAIO) {
1733f841f6adSraf 			sig_mutex_lock(&__aio_mutex);
1734f841f6adSraf 			reqp = _aio_hash_find(&aiocbp->aio_resultp);
1735f841f6adSraf 			if (reqp == NULL) {
1736f841f6adSraf 				sig_mutex_unlock(&__aio_mutex);
1737f841f6adSraf 				return (AIO_ALLDONE);
1738f841f6adSraf 			}
1739f841f6adSraf 			aiowp = reqp->req_worker;
1740f841f6adSraf 			sig_mutex_lock(&aiowp->work_qlock1);
1741f841f6adSraf 			(void) _aio_cancel_req(aiowp, reqp, &canceled, &done);
1742f841f6adSraf 			sig_mutex_unlock(&aiowp->work_qlock1);
1743f841f6adSraf 			sig_mutex_unlock(&__aio_mutex);
1744f841f6adSraf 			if (done)
1745f841f6adSraf 				return (AIO_ALLDONE);
1746f841f6adSraf 			if (canceled)
1747f841f6adSraf 				return (AIO_CANCELED);
1748f841f6adSraf 			return (AIO_NOTCANCELED);
1749f841f6adSraf 		}
1750f841f6adSraf 		if (aiocbp->aio_state == USERAIO_DONE)
1751f841f6adSraf 			return (AIO_ALLDONE);
1752f841f6adSraf 		return ((int)_kaio(AIOCANCEL, fd, aiocbp));
1753f841f6adSraf 	}
1754f841f6adSraf 
1755f841f6adSraf 	return (aiocancel_all(fd));
1756f841f6adSraf }
1757f841f6adSraf 
1758f841f6adSraf int
aio_waitn64(aiocb64_t * list[],uint_t nent,uint_t * nwait,const timespec_t * timeout)1759f841f6adSraf aio_waitn64(aiocb64_t *list[], uint_t nent, uint_t *nwait,
1760f841f6adSraf     const timespec_t *timeout)
1761f841f6adSraf {
1762f841f6adSraf 	return (__aio_waitn((void **)list, nent, nwait, timeout));
1763f841f6adSraf }
1764f841f6adSraf 
1765f841f6adSraf #endif /* !defined(_LP64) */
1766