xref: /onnv-gate/usr/src/uts/common/os/aio.c (revision 4377:eca587d3ab4b)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51885Sraf  * Common Development and Distribution License (the "License").
61885Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211885Sraf 
220Sstevel@tonic-gate /*
23*4377Sraf  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * Kernel asynchronous I/O.
310Sstevel@tonic-gate  * This is only for raw devices now (as of Nov. 1993).
320Sstevel@tonic-gate  */
330Sstevel@tonic-gate 
340Sstevel@tonic-gate #include <sys/types.h>
350Sstevel@tonic-gate #include <sys/errno.h>
360Sstevel@tonic-gate #include <sys/conf.h>
370Sstevel@tonic-gate #include <sys/file.h>
380Sstevel@tonic-gate #include <sys/fs/snode.h>
390Sstevel@tonic-gate #include <sys/unistd.h>
400Sstevel@tonic-gate #include <sys/cmn_err.h>
410Sstevel@tonic-gate #include <vm/as.h>
420Sstevel@tonic-gate #include <vm/faultcode.h>
430Sstevel@tonic-gate #include <sys/sysmacros.h>
440Sstevel@tonic-gate #include <sys/procfs.h>
450Sstevel@tonic-gate #include <sys/kmem.h>
460Sstevel@tonic-gate #include <sys/autoconf.h>
470Sstevel@tonic-gate #include <sys/ddi_impldefs.h>
480Sstevel@tonic-gate #include <sys/sunddi.h>
490Sstevel@tonic-gate #include <sys/aio_impl.h>
500Sstevel@tonic-gate #include <sys/debug.h>
510Sstevel@tonic-gate #include <sys/param.h>
520Sstevel@tonic-gate #include <sys/systm.h>
530Sstevel@tonic-gate #include <sys/vmsystm.h>
540Sstevel@tonic-gate #include <sys/fs/pxfs_ki.h>
550Sstevel@tonic-gate #include <sys/contract/process_impl.h>
560Sstevel@tonic-gate 
570Sstevel@tonic-gate /*
580Sstevel@tonic-gate  * external entry point.
590Sstevel@tonic-gate  */
600Sstevel@tonic-gate #ifdef _LP64
610Sstevel@tonic-gate static int64_t kaioc(long, long, long, long, long, long);
620Sstevel@tonic-gate #endif
630Sstevel@tonic-gate static int kaio(ulong_t *, rval_t *);
640Sstevel@tonic-gate 
650Sstevel@tonic-gate 
660Sstevel@tonic-gate #define	AIO_64	0
670Sstevel@tonic-gate #define	AIO_32	1
680Sstevel@tonic-gate #define	AIO_LARGEFILE	2
690Sstevel@tonic-gate 
700Sstevel@tonic-gate /*
710Sstevel@tonic-gate  * implementation specific functions (private)
720Sstevel@tonic-gate  */
730Sstevel@tonic-gate #ifdef _LP64
741885Sraf static int alio(int, aiocb_t **, int, struct sigevent *);
750Sstevel@tonic-gate #endif
760Sstevel@tonic-gate static int aionotify(void);
770Sstevel@tonic-gate static int aioinit(void);
780Sstevel@tonic-gate static int aiostart(void);
790Sstevel@tonic-gate static void alio_cleanup(aio_t *, aiocb_t **, int, int);
800Sstevel@tonic-gate static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
810Sstevel@tonic-gate     cred_t *);
820Sstevel@tonic-gate static void lio_set_error(aio_req_t *);
830Sstevel@tonic-gate static aio_t *aio_aiop_alloc();
840Sstevel@tonic-gate static int aio_req_alloc(aio_req_t **, aio_result_t *);
850Sstevel@tonic-gate static int aio_lio_alloc(aio_lio_t **);
860Sstevel@tonic-gate static aio_req_t *aio_req_done(void *);
870Sstevel@tonic-gate static aio_req_t *aio_req_remove(aio_req_t *);
880Sstevel@tonic-gate static int aio_req_find(aio_result_t *, aio_req_t **);
890Sstevel@tonic-gate static int aio_hash_insert(struct aio_req_t *, aio_t *);
900Sstevel@tonic-gate static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
911885Sraf     aio_result_t *, vnode_t *);
920Sstevel@tonic-gate static int aio_cleanup_thread(aio_t *);
930Sstevel@tonic-gate static aio_lio_t *aio_list_get(aio_result_t *);
940Sstevel@tonic-gate static void lio_set_uerror(void *, int);
950Sstevel@tonic-gate extern void aio_zerolen(aio_req_t *);
960Sstevel@tonic-gate static int aiowait(struct timeval *, int, long	*);
970Sstevel@tonic-gate static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
980Sstevel@tonic-gate static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
990Sstevel@tonic-gate     aio_req_t *reqlist, aio_t *aiop, model_t model);
1000Sstevel@tonic-gate static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
1010Sstevel@tonic-gate static int aiosuspend(void *, int, struct  timespec *, int,
1020Sstevel@tonic-gate     long	*, int);
1030Sstevel@tonic-gate static int aliowait(int, void *, int, void *, int);
1040Sstevel@tonic-gate static int aioerror(void *, int);
1050Sstevel@tonic-gate static int aio_cancel(int, void *, long	*, int);
1060Sstevel@tonic-gate static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
1070Sstevel@tonic-gate static int aiorw(int, void *, int, int);
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate static int alioLF(int, void *, int, void *);
1101885Sraf static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
1111885Sraf     aio_result_t *, vnode_t *);
1120Sstevel@tonic-gate static int alio32(int, void *, int, void *);
1130Sstevel@tonic-gate static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1140Sstevel@tonic-gate static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1170Sstevel@tonic-gate static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
1180Sstevel@tonic-gate void	aiocb_32ton(aiocb32_t *, aiocb_t *);
1190Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
1200Sstevel@tonic-gate 
1210Sstevel@tonic-gate /*
1220Sstevel@tonic-gate  * implementation specific functions (external)
1230Sstevel@tonic-gate  */
1240Sstevel@tonic-gate void aio_req_free(aio_t *, aio_req_t *);
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate /*
1270Sstevel@tonic-gate  * Event Port framework
1280Sstevel@tonic-gate  */
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate void aio_req_free_port(aio_t *, aio_req_t *);
1310Sstevel@tonic-gate static int aio_port_callback(void *, int *, pid_t, int, void *);
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate /*
1340Sstevel@tonic-gate  * This is the loadable module wrapper.
1350Sstevel@tonic-gate  */
1360Sstevel@tonic-gate #include <sys/modctl.h>
1370Sstevel@tonic-gate #include <sys/syscall.h>
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate #ifdef _LP64
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate static struct sysent kaio_sysent = {
1420Sstevel@tonic-gate 	6,
1430Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
1440Sstevel@tonic-gate 	(int (*)())kaioc
1450Sstevel@tonic-gate };
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1480Sstevel@tonic-gate static struct sysent kaio_sysent32 = {
1490Sstevel@tonic-gate 	7,
1500Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL,
1510Sstevel@tonic-gate 	kaio
1520Sstevel@tonic-gate };
1530Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate #else   /* _LP64 */
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate static struct sysent kaio_sysent = {
1580Sstevel@tonic-gate 	7,
1590Sstevel@tonic-gate 	SE_NOUNLOAD | SE_32RVAL1,
1600Sstevel@tonic-gate 	kaio
1610Sstevel@tonic-gate };
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate #endif  /* _LP64 */
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate /*
1660Sstevel@tonic-gate  * Module linkage information for the kernel.
1670Sstevel@tonic-gate  */
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate static struct modlsys modlsys = {
1700Sstevel@tonic-gate 	&mod_syscallops,
1710Sstevel@tonic-gate 	"kernel Async I/O",
1720Sstevel@tonic-gate 	&kaio_sysent
1730Sstevel@tonic-gate };
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1760Sstevel@tonic-gate static struct modlsys modlsys32 = {
1770Sstevel@tonic-gate 	&mod_syscallops32,
1780Sstevel@tonic-gate 	"kernel Async I/O for 32 bit compatibility",
1790Sstevel@tonic-gate 	&kaio_sysent32
1800Sstevel@tonic-gate };
1810Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate static struct modlinkage modlinkage = {
1850Sstevel@tonic-gate 	MODREV_1,
1860Sstevel@tonic-gate 	&modlsys,
1870Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1880Sstevel@tonic-gate 	&modlsys32,
1890Sstevel@tonic-gate #endif
1900Sstevel@tonic-gate 	NULL
1910Sstevel@tonic-gate };
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate int
1940Sstevel@tonic-gate _init(void)
1950Sstevel@tonic-gate {
1960Sstevel@tonic-gate 	int retval;
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate 	if ((retval = mod_install(&modlinkage)) != 0)
1990Sstevel@tonic-gate 		return (retval);
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 	return (0);
2020Sstevel@tonic-gate }
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate int
2050Sstevel@tonic-gate _fini(void)
2060Sstevel@tonic-gate {
2070Sstevel@tonic-gate 	int retval;
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate 	retval = mod_remove(&modlinkage);
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate 	return (retval);
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate int
2150Sstevel@tonic-gate _info(struct modinfo *modinfop)
2160Sstevel@tonic-gate {
2170Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
2180Sstevel@tonic-gate }
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate #ifdef	_LP64
2210Sstevel@tonic-gate static int64_t
2220Sstevel@tonic-gate kaioc(
2230Sstevel@tonic-gate 	long	a0,
2240Sstevel@tonic-gate 	long	a1,
2250Sstevel@tonic-gate 	long	a2,
2260Sstevel@tonic-gate 	long	a3,
2270Sstevel@tonic-gate 	long	a4,
2280Sstevel@tonic-gate 	long	a5)
2290Sstevel@tonic-gate {
2300Sstevel@tonic-gate 	int	error;
2310Sstevel@tonic-gate 	long	rval = 0;
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	switch ((int)a0 & ~AIO_POLL_BIT) {
2340Sstevel@tonic-gate 	case AIOREAD:
2350Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2360Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FREAD);
2370Sstevel@tonic-gate 		break;
2380Sstevel@tonic-gate 	case AIOWRITE:
2390Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2400Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FWRITE);
2410Sstevel@tonic-gate 		break;
2420Sstevel@tonic-gate 	case AIOWAIT:
2430Sstevel@tonic-gate 		error = aiowait((struct timeval *)a1, (int)a2, &rval);
2440Sstevel@tonic-gate 		break;
2450Sstevel@tonic-gate 	case AIOWAITN:
2460Sstevel@tonic-gate 		error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
2470Sstevel@tonic-gate 		    (timespec_t *)a4);
2480Sstevel@tonic-gate 		break;
2490Sstevel@tonic-gate 	case AIONOTIFY:
2500Sstevel@tonic-gate 		error = aionotify();
2510Sstevel@tonic-gate 		break;
2520Sstevel@tonic-gate 	case AIOINIT:
2530Sstevel@tonic-gate 		error = aioinit();
2540Sstevel@tonic-gate 		break;
2550Sstevel@tonic-gate 	case AIOSTART:
2560Sstevel@tonic-gate 		error = aiostart();
2570Sstevel@tonic-gate 		break;
2580Sstevel@tonic-gate 	case AIOLIO:
2591885Sraf 		error = alio((int)a1, (aiocb_t **)a2, (int)a3,
2600Sstevel@tonic-gate 		    (struct sigevent *)a4);
2610Sstevel@tonic-gate 		break;
2620Sstevel@tonic-gate 	case AIOLIOWAIT:
2630Sstevel@tonic-gate 		error = aliowait((int)a1, (void *)a2, (int)a3,
2640Sstevel@tonic-gate 		    (struct sigevent *)a4, AIO_64);
2650Sstevel@tonic-gate 		break;
2660Sstevel@tonic-gate 	case AIOSUSPEND:
2670Sstevel@tonic-gate 		error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
2680Sstevel@tonic-gate 		    (int)a4, &rval, AIO_64);
2690Sstevel@tonic-gate 		break;
2700Sstevel@tonic-gate 	case AIOERROR:
2710Sstevel@tonic-gate 		error = aioerror((void *)a1, AIO_64);
2720Sstevel@tonic-gate 		break;
2730Sstevel@tonic-gate 	case AIOAREAD:
2740Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
2750Sstevel@tonic-gate 		break;
2760Sstevel@tonic-gate 	case AIOAWRITE:
2770Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
2780Sstevel@tonic-gate 		break;
2790Sstevel@tonic-gate 	case AIOCANCEL:
2800Sstevel@tonic-gate 		error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
2810Sstevel@tonic-gate 		break;
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 	/*
2840Sstevel@tonic-gate 	 * The large file related stuff is valid only for
2850Sstevel@tonic-gate 	 * 32 bit kernel and not for 64 bit kernel
2860Sstevel@tonic-gate 	 * On 64 bit kernel we convert large file calls
2870Sstevel@tonic-gate 	 * to regular 64bit calls.
2880Sstevel@tonic-gate 	 */
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate 	default:
2910Sstevel@tonic-gate 		error = EINVAL;
2920Sstevel@tonic-gate 	}
2930Sstevel@tonic-gate 	if (error)
2940Sstevel@tonic-gate 		return ((int64_t)set_errno(error));
2950Sstevel@tonic-gate 	return (rval);
2960Sstevel@tonic-gate }
2970Sstevel@tonic-gate #endif
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate static int
3000Sstevel@tonic-gate kaio(
3010Sstevel@tonic-gate 	ulong_t *uap,
3020Sstevel@tonic-gate 	rval_t *rvp)
3030Sstevel@tonic-gate {
3040Sstevel@tonic-gate 	long rval = 0;
3050Sstevel@tonic-gate 	int	error = 0;
3060Sstevel@tonic-gate 	offset_t	off;
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate 		rvp->r_vals = 0;
3100Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
3110Sstevel@tonic-gate 	off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
3120Sstevel@tonic-gate #else
3130Sstevel@tonic-gate 	off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
3140Sstevel@tonic-gate #endif
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	switch (uap[0] & ~AIO_POLL_BIT) {
3170Sstevel@tonic-gate 	/*
3180Sstevel@tonic-gate 	 * It must be the 32 bit system call on 64 bit kernel
3190Sstevel@tonic-gate 	 */
3200Sstevel@tonic-gate 	case AIOREAD:
3210Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3220Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
3230Sstevel@tonic-gate 	case AIOWRITE:
3240Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3250Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
3260Sstevel@tonic-gate 	case AIOWAIT:
3270Sstevel@tonic-gate 		error = aiowait((struct	timeval *)uap[1], (int)uap[2],
3280Sstevel@tonic-gate 		    &rval);
3290Sstevel@tonic-gate 		break;
3300Sstevel@tonic-gate 	case AIOWAITN:
3310Sstevel@tonic-gate 		error = aiowaitn((void *)uap[1], (uint_t)uap[2],
3320Sstevel@tonic-gate 		    (uint_t *)uap[3], (timespec_t *)uap[4]);
3330Sstevel@tonic-gate 		break;
3340Sstevel@tonic-gate 	case AIONOTIFY:
3350Sstevel@tonic-gate 		return (aionotify());
3360Sstevel@tonic-gate 	case AIOINIT:
3370Sstevel@tonic-gate 		return (aioinit());
3380Sstevel@tonic-gate 	case AIOSTART:
3390Sstevel@tonic-gate 		return (aiostart());
3400Sstevel@tonic-gate 	case AIOLIO:
3410Sstevel@tonic-gate 		return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
3420Sstevel@tonic-gate 		    (void *)uap[4]));
3430Sstevel@tonic-gate 	case AIOLIOWAIT:
3440Sstevel@tonic-gate 		return (aliowait((int)uap[1], (void *)uap[2],
3450Sstevel@tonic-gate 		    (int)uap[3], (struct sigevent *)uap[4], AIO_32));
3460Sstevel@tonic-gate 	case AIOSUSPEND:
3470Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3480Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4],
3490Sstevel@tonic-gate 		    &rval, AIO_32);
3500Sstevel@tonic-gate 		break;
3510Sstevel@tonic-gate 	case AIOERROR:
3520Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_32));
3530Sstevel@tonic-gate 	case AIOAREAD:
3540Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3550Sstevel@tonic-gate 		    FREAD, AIO_32));
3560Sstevel@tonic-gate 	case AIOAWRITE:
3570Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3580Sstevel@tonic-gate 		    FWRITE, AIO_32));
3590Sstevel@tonic-gate 	case AIOCANCEL:
3600Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
3610Sstevel@tonic-gate 		    AIO_32));
3620Sstevel@tonic-gate 		break;
3630Sstevel@tonic-gate 	case AIOLIO64:
3640Sstevel@tonic-gate 		return (alioLF((int)uap[1], (void *)uap[2],
3650Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4]));
3660Sstevel@tonic-gate 	case AIOLIOWAIT64:
3670Sstevel@tonic-gate 		return (aliowait(uap[1], (void *)uap[2],
3680Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
3690Sstevel@tonic-gate 	case AIOSUSPEND64:
3700Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3710Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4], &rval,
3720Sstevel@tonic-gate 		    AIO_LARGEFILE);
3730Sstevel@tonic-gate 		break;
3740Sstevel@tonic-gate 	case AIOERROR64:
3750Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_LARGEFILE));
3760Sstevel@tonic-gate 	case AIOAREAD64:
3770Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FREAD,
3780Sstevel@tonic-gate 		    AIO_LARGEFILE));
3790Sstevel@tonic-gate 	case AIOAWRITE64:
3800Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
3810Sstevel@tonic-gate 		    AIO_LARGEFILE));
3820Sstevel@tonic-gate 	case AIOCANCEL64:
3830Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2],
3840Sstevel@tonic-gate 		    &rval, AIO_LARGEFILE));
3850Sstevel@tonic-gate 		break;
3860Sstevel@tonic-gate 	default:
3870Sstevel@tonic-gate 		return (EINVAL);
3880Sstevel@tonic-gate 	}
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate 	rvp->r_val1 = rval;
3910Sstevel@tonic-gate 	return (error);
3920Sstevel@tonic-gate }
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate /*
3950Sstevel@tonic-gate  * wake up LWPs in this process that are sleeping in
3960Sstevel@tonic-gate  * aiowait().
3970Sstevel@tonic-gate  */
3980Sstevel@tonic-gate static int
3990Sstevel@tonic-gate aionotify(void)
4000Sstevel@tonic-gate {
4010Sstevel@tonic-gate 	aio_t	*aiop;
4020Sstevel@tonic-gate 
4030Sstevel@tonic-gate 	aiop = curproc->p_aio;
4040Sstevel@tonic-gate 	if (aiop == NULL)
4050Sstevel@tonic-gate 		return (0);
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
4080Sstevel@tonic-gate 	aiop->aio_notifycnt++;
4090Sstevel@tonic-gate 	cv_broadcast(&aiop->aio_waitcv);
4100Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 	return (0);
4130Sstevel@tonic-gate }
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate static int
4160Sstevel@tonic-gate timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
4170Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4180Sstevel@tonic-gate {
4190Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4200Sstevel@tonic-gate 	struct timeval32 wait_time_32;
4210Sstevel@tonic-gate #endif
4220Sstevel@tonic-gate 	struct timeval wait_time;
4230Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate 	*rqtp = NULL;
4260Sstevel@tonic-gate 	if (timout == NULL) {		/* wait indefinitely */
4270Sstevel@tonic-gate 		*blocking = 1;
4280Sstevel@tonic-gate 		return (0);
4290Sstevel@tonic-gate 	}
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate 	/*
4320Sstevel@tonic-gate 	 * Need to correctly compare with the -1 passed in for a user
4330Sstevel@tonic-gate 	 * address pointer, with both 32 bit and 64 bit apps.
4340Sstevel@tonic-gate 	 */
4350Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4360Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)-1) {	/* don't wait */
4370Sstevel@tonic-gate 			*blocking = 0;
4380Sstevel@tonic-gate 			return (0);
4390Sstevel@tonic-gate 		}
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 		if (copyin(timout, &wait_time, sizeof (wait_time)))
4420Sstevel@tonic-gate 			return (EFAULT);
4430Sstevel@tonic-gate 	}
4440Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4450Sstevel@tonic-gate 	else {
4460Sstevel@tonic-gate 		/*
4470Sstevel@tonic-gate 		 * -1 from a 32bit app. It will not get sign extended.
4480Sstevel@tonic-gate 		 * don't wait if -1.
4490Sstevel@tonic-gate 		 */
4500Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
4510Sstevel@tonic-gate 			*blocking = 0;
4520Sstevel@tonic-gate 			return (0);
4530Sstevel@tonic-gate 		}
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
4560Sstevel@tonic-gate 			return (EFAULT);
4570Sstevel@tonic-gate 		TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
4580Sstevel@tonic-gate 	}
4590Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate 	if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {	/* don't wait */
4620Sstevel@tonic-gate 		*blocking = 0;
4630Sstevel@tonic-gate 		return (0);
4640Sstevel@tonic-gate 	}
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate 	if (wait_time.tv_sec < 0 ||
4670Sstevel@tonic-gate 	    wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
4680Sstevel@tonic-gate 		return (EINVAL);
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 	rqtime->tv_sec = wait_time.tv_sec;
4710Sstevel@tonic-gate 	rqtime->tv_nsec = wait_time.tv_usec * 1000;
4720Sstevel@tonic-gate 	*rqtp = rqtime;
4730Sstevel@tonic-gate 	*blocking = 1;
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate 	return (0);
4760Sstevel@tonic-gate }
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate static int
4790Sstevel@tonic-gate timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
4800Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4810Sstevel@tonic-gate {
4820Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4830Sstevel@tonic-gate 	timespec32_t wait_time_32;
4840Sstevel@tonic-gate #endif
4850Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 	*rqtp = NULL;
4880Sstevel@tonic-gate 	if (timout == NULL) {
4890Sstevel@tonic-gate 		*blocking = 1;
4900Sstevel@tonic-gate 		return (0);
4910Sstevel@tonic-gate 	}
4920Sstevel@tonic-gate 
4930Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4940Sstevel@tonic-gate 		if (copyin(timout, rqtime, sizeof (*rqtime)))
4950Sstevel@tonic-gate 			return (EFAULT);
4960Sstevel@tonic-gate 	}
4970Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4980Sstevel@tonic-gate 	else {
4990Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
5000Sstevel@tonic-gate 			return (EFAULT);
5010Sstevel@tonic-gate 		TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
5020Sstevel@tonic-gate 	}
5030Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
5060Sstevel@tonic-gate 		*blocking = 0;
5070Sstevel@tonic-gate 		return (0);
5080Sstevel@tonic-gate 	}
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate 	if (rqtime->tv_sec < 0 ||
5110Sstevel@tonic-gate 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
5120Sstevel@tonic-gate 		return (EINVAL);
5130Sstevel@tonic-gate 
5140Sstevel@tonic-gate 	*rqtp = rqtime;
5150Sstevel@tonic-gate 	*blocking = 1;
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate 	return (0);
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate /*ARGSUSED*/
5210Sstevel@tonic-gate static int
5220Sstevel@tonic-gate aiowait(
5230Sstevel@tonic-gate 	struct timeval	*timout,
5240Sstevel@tonic-gate 	int	dontblockflg,
5250Sstevel@tonic-gate 	long	*rval)
5260Sstevel@tonic-gate {
5270Sstevel@tonic-gate 	int 		error;
5280Sstevel@tonic-gate 	aio_t		*aiop;
5290Sstevel@tonic-gate 	aio_req_t	*reqp;
5300Sstevel@tonic-gate 	clock_t		status;
5310Sstevel@tonic-gate 	int		blocking;
5324123Sdm120769 	int		timecheck;
5330Sstevel@tonic-gate 	timestruc_t	rqtime;
5340Sstevel@tonic-gate 	timestruc_t	*rqtp;
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 	aiop = curproc->p_aio;
5370Sstevel@tonic-gate 	if (aiop == NULL)
5380Sstevel@tonic-gate 		return (EINVAL);
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 	/*
5410Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
5420Sstevel@tonic-gate 	 */
5430Sstevel@tonic-gate 	error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
5440Sstevel@tonic-gate 	if (error)
5450Sstevel@tonic-gate 		return (error);
5460Sstevel@tonic-gate 	if (rqtp) {
5470Sstevel@tonic-gate 		timestruc_t now;
5484123Sdm120769 		timecheck = timechanged;
5490Sstevel@tonic-gate 		gethrestime(&now);
5500Sstevel@tonic-gate 		timespecadd(rqtp, &now);
5510Sstevel@tonic-gate 	}
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
5540Sstevel@tonic-gate 	for (;;) {
5550Sstevel@tonic-gate 		/* process requests on poll queue */
5560Sstevel@tonic-gate 		if (aiop->aio_pollq) {
5570Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
5580Sstevel@tonic-gate 			aio_cleanup(0);
5590Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
5600Sstevel@tonic-gate 		}
5610Sstevel@tonic-gate 		if ((reqp = aio_req_remove(NULL)) != NULL) {
5620Sstevel@tonic-gate 			*rval = (long)reqp->aio_req_resultp;
5630Sstevel@tonic-gate 			break;
5640Sstevel@tonic-gate 		}
5650Sstevel@tonic-gate 		/* user-level done queue might not be empty */
5660Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
5670Sstevel@tonic-gate 			aiop->aio_notifycnt--;
5680Sstevel@tonic-gate 			*rval = 1;
5690Sstevel@tonic-gate 			break;
5700Sstevel@tonic-gate 		}
5710Sstevel@tonic-gate 		/* don't block if no outstanding aio */
5720Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0 && dontblockflg) {
5730Sstevel@tonic-gate 			error = EINVAL;
5740Sstevel@tonic-gate 			break;
5750Sstevel@tonic-gate 		}
5760Sstevel@tonic-gate 		if (blocking) {
5770Sstevel@tonic-gate 			status = cv_waituntil_sig(&aiop->aio_waitcv,
5784123Sdm120769 			    &aiop->aio_mutex, rqtp, timecheck);
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate 			if (status > 0)		/* check done queue again */
5810Sstevel@tonic-gate 				continue;
5820Sstevel@tonic-gate 			if (status == 0) {	/* interrupted by a signal */
5830Sstevel@tonic-gate 				error = EINTR;
5840Sstevel@tonic-gate 				*rval = -1;
5850Sstevel@tonic-gate 			} else {		/* timer expired */
5860Sstevel@tonic-gate 				error = ETIME;
5870Sstevel@tonic-gate 			}
5880Sstevel@tonic-gate 		}
5890Sstevel@tonic-gate 		break;
5900Sstevel@tonic-gate 	}
5910Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
5920Sstevel@tonic-gate 	if (reqp) {
5930Sstevel@tonic-gate 		aphysio_unlock(reqp);
5940Sstevel@tonic-gate 		aio_copyout_result(reqp);
5950Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
5960Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
5970Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
5980Sstevel@tonic-gate 	}
5990Sstevel@tonic-gate 	return (error);
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate /*
6030Sstevel@tonic-gate  * aiowaitn can be used to reap completed asynchronous requests submitted with
6040Sstevel@tonic-gate  * lio_listio, aio_read or aio_write.
6050Sstevel@tonic-gate  * This function only reaps asynchronous raw I/Os.
6060Sstevel@tonic-gate  */
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate /*ARGSUSED*/
6090Sstevel@tonic-gate static int
6100Sstevel@tonic-gate aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
6110Sstevel@tonic-gate {
6120Sstevel@tonic-gate 	int 		error = 0;
6130Sstevel@tonic-gate 	aio_t		*aiop;
6140Sstevel@tonic-gate 	aio_req_t	*reqlist = NULL;
6150Sstevel@tonic-gate 	caddr_t		iocblist = NULL;	/* array of iocb ptr's */
6160Sstevel@tonic-gate 	uint_t		waitcnt, cnt = 0;	/* iocb cnt */
6170Sstevel@tonic-gate 	size_t		iocbsz;			/* users iocb size */
6180Sstevel@tonic-gate 	size_t		riocbsz;		/* returned iocb size */
6190Sstevel@tonic-gate 	int		iocb_index = 0;
6200Sstevel@tonic-gate 	model_t		model = get_udatamodel();
6210Sstevel@tonic-gate 	int		blocking = 1;
6224123Sdm120769 	int		timecheck;
6230Sstevel@tonic-gate 	timestruc_t	rqtime;
6240Sstevel@tonic-gate 	timestruc_t	*rqtp;
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	aiop = curproc->p_aio;
627*4377Sraf 
628*4377Sraf 	if (aiop == NULL || aiop->aio_outstanding == 0)
6290Sstevel@tonic-gate 		return (EAGAIN);
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate 	if (copyin(nwait, &waitcnt, sizeof (uint_t)))
6320Sstevel@tonic-gate 		return (EFAULT);
6330Sstevel@tonic-gate 
6340Sstevel@tonic-gate 	/* set *nwait to zero, if we must return prematurely */
6350Sstevel@tonic-gate 	if (copyout(&cnt, nwait, sizeof (uint_t)))
6360Sstevel@tonic-gate 		return (EFAULT);
6370Sstevel@tonic-gate 
6380Sstevel@tonic-gate 	if (waitcnt == 0) {
6390Sstevel@tonic-gate 		blocking = 0;
6400Sstevel@tonic-gate 		rqtp = NULL;
6410Sstevel@tonic-gate 		waitcnt = nent;
6420Sstevel@tonic-gate 	} else {
6430Sstevel@tonic-gate 		error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
6440Sstevel@tonic-gate 		if (error)
6450Sstevel@tonic-gate 			return (error);
6460Sstevel@tonic-gate 	}
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
6490Sstevel@tonic-gate 		iocbsz = (sizeof (aiocb_t *) * nent);
6500Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
6510Sstevel@tonic-gate 	else
6520Sstevel@tonic-gate 		iocbsz = (sizeof (caddr32_t) * nent);
6530Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate 	/*
6560Sstevel@tonic-gate 	 * Only one aio_waitn call is allowed at a time.
6570Sstevel@tonic-gate 	 * The active aio_waitn will collect all requests
6580Sstevel@tonic-gate 	 * out of the "done" list and if necessary it will wait
6590Sstevel@tonic-gate 	 * for some/all pending requests to fulfill the nwait
6600Sstevel@tonic-gate 	 * parameter.
6610Sstevel@tonic-gate 	 * A second or further aio_waitn calls will sleep here
6620Sstevel@tonic-gate 	 * until the active aio_waitn finishes and leaves the kernel
6630Sstevel@tonic-gate 	 * If the second call does not block (poll), then return
6640Sstevel@tonic-gate 	 * immediately with the error code : EAGAIN.
6650Sstevel@tonic-gate 	 * If the second call should block, then sleep here, but
6660Sstevel@tonic-gate 	 * do not touch the timeout. The timeout starts when this
6670Sstevel@tonic-gate 	 * aio_waitn-call becomes active.
6680Sstevel@tonic-gate 	 */
6690Sstevel@tonic-gate 
6700Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	while (aiop->aio_flags & AIO_WAITN) {
6730Sstevel@tonic-gate 		if (blocking == 0) {
6740Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6750Sstevel@tonic-gate 			return (EAGAIN);
6760Sstevel@tonic-gate 		}
6770Sstevel@tonic-gate 
6780Sstevel@tonic-gate 		/* block, no timeout */
6790Sstevel@tonic-gate 		aiop->aio_flags |= AIO_WAITN_PENDING;
6800Sstevel@tonic-gate 		if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
6810Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6820Sstevel@tonic-gate 			return (EINTR);
6830Sstevel@tonic-gate 		}
6840Sstevel@tonic-gate 	}
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 	/*
6870Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
6880Sstevel@tonic-gate 	 */
6890Sstevel@tonic-gate 	if (rqtp) {
6900Sstevel@tonic-gate 		timestruc_t now;
6914123Sdm120769 		timecheck = timechanged;
6920Sstevel@tonic-gate 		gethrestime(&now);
6930Sstevel@tonic-gate 		timespecadd(rqtp, &now);
6940Sstevel@tonic-gate 	}
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate 	if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
6970Sstevel@tonic-gate 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
6980Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
6990Sstevel@tonic-gate 	}
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate 	if (aiop->aio_iocb == NULL) {
7020Sstevel@tonic-gate 		iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
7030Sstevel@tonic-gate 		if (iocblist == NULL) {
7040Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7050Sstevel@tonic-gate 			return (ENOMEM);
7060Sstevel@tonic-gate 		}
7070Sstevel@tonic-gate 		aiop->aio_iocb = (aiocb_t **)iocblist;
7080Sstevel@tonic-gate 		aiop->aio_iocbsz = iocbsz;
7090Sstevel@tonic-gate 	} else {
7100Sstevel@tonic-gate 		iocblist = (char *)aiop->aio_iocb;
7110Sstevel@tonic-gate 	}
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate 	aiop->aio_waitncnt = waitcnt;
7140Sstevel@tonic-gate 	aiop->aio_flags |= AIO_WAITN;
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 	for (;;) {
7170Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
7180Sstevel@tonic-gate 		if (aiop->aio_pollq) {
7190Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7200Sstevel@tonic-gate 			aio_cleanup(0);
7210Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
7220Sstevel@tonic-gate 		}
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate 		/* check for requests on done queue */
7250Sstevel@tonic-gate 		if (aiop->aio_doneq) {
7260Sstevel@tonic-gate 			cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
7270Sstevel@tonic-gate 			aiop->aio_waitncnt = waitcnt - cnt;
7280Sstevel@tonic-gate 		}
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 		/* user-level done queue might not be empty */
7310Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
7320Sstevel@tonic-gate 			aiop->aio_notifycnt--;
7330Sstevel@tonic-gate 			error = 0;
7340Sstevel@tonic-gate 			break;
7350Sstevel@tonic-gate 		}
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 		/*
7380Sstevel@tonic-gate 		 * if we are here second time as a result of timer
7390Sstevel@tonic-gate 		 * expiration, we reset error if there are enough
7400Sstevel@tonic-gate 		 * aiocb's to satisfy request.
7410Sstevel@tonic-gate 		 * We return also if all requests are already done
7420Sstevel@tonic-gate 		 * and we picked up the whole done queue.
7430Sstevel@tonic-gate 		 */
7440Sstevel@tonic-gate 
7450Sstevel@tonic-gate 		if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
7460Sstevel@tonic-gate 		    aiop->aio_doneq == NULL)) {
7470Sstevel@tonic-gate 			error = 0;
7480Sstevel@tonic-gate 			break;
7490Sstevel@tonic-gate 		}
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 		if ((cnt < waitcnt) && blocking) {
7520Sstevel@tonic-gate 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
7534123Sdm120769 				&aiop->aio_mutex, rqtp, timecheck);
7540Sstevel@tonic-gate 			if (rval > 0)
7550Sstevel@tonic-gate 				continue;
7560Sstevel@tonic-gate 			if (rval < 0) {
7570Sstevel@tonic-gate 				error = ETIME;
7580Sstevel@tonic-gate 				blocking = 0;
7590Sstevel@tonic-gate 				continue;
7600Sstevel@tonic-gate 			}
7610Sstevel@tonic-gate 			error = EINTR;
7620Sstevel@tonic-gate 		}
7630Sstevel@tonic-gate 		break;
7640Sstevel@tonic-gate 	}
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	if (cnt > 0) {
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate 		iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
7710Sstevel@tonic-gate 		    aiop, model);
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE)
7740Sstevel@tonic-gate 			riocbsz = (sizeof (aiocb_t *) * cnt);
7750Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
7760Sstevel@tonic-gate 		else
7770Sstevel@tonic-gate 			riocbsz = (sizeof (caddr32_t) * cnt);
7780Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate 		if (copyout(iocblist, uiocb, riocbsz) ||
7810Sstevel@tonic-gate 		    copyout(&cnt, nwait, sizeof (uint_t)))
7820Sstevel@tonic-gate 			error = EFAULT;
7830Sstevel@tonic-gate 	}
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 	if (aiop->aio_iocbsz > AIO_IOCB_MAX) {
7860Sstevel@tonic-gate 		kmem_free(iocblist, aiop->aio_iocbsz);
7870Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
7880Sstevel@tonic-gate 	}
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate 	/* check if there is another thread waiting for execution */
7910Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
7920Sstevel@tonic-gate 	aiop->aio_flags &= ~AIO_WAITN;
7930Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_WAITN_PENDING) {
7940Sstevel@tonic-gate 		aiop->aio_flags &= ~AIO_WAITN_PENDING;
7950Sstevel@tonic-gate 		cv_signal(&aiop->aio_waitncv);
7960Sstevel@tonic-gate 	}
7970Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 	return (error);
8000Sstevel@tonic-gate }
8010Sstevel@tonic-gate 
8020Sstevel@tonic-gate /*
8030Sstevel@tonic-gate  * aio_unlock_requests
8040Sstevel@tonic-gate  * copyouts the result of the request as well as the return value.
8050Sstevel@tonic-gate  * It builds the list of completed asynchronous requests,
8060Sstevel@tonic-gate  * unlocks the allocated memory ranges and
8070Sstevel@tonic-gate  * put the aio request structure back into the free list.
8080Sstevel@tonic-gate  */
8090Sstevel@tonic-gate 
8100Sstevel@tonic-gate static int
8110Sstevel@tonic-gate aio_unlock_requests(
8120Sstevel@tonic-gate 	caddr_t	iocblist,
8130Sstevel@tonic-gate 	int	iocb_index,
8140Sstevel@tonic-gate 	aio_req_t *reqlist,
8150Sstevel@tonic-gate 	aio_t	*aiop,
8160Sstevel@tonic-gate 	model_t	model)
8170Sstevel@tonic-gate {
8180Sstevel@tonic-gate 	aio_req_t	*reqp, *nreqp;
8190Sstevel@tonic-gate 
8200Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
8210Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8220Sstevel@tonic-gate 			(((caddr_t *)iocblist)[iocb_index++]) =
8230Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb;
8240Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8250Sstevel@tonic-gate 			aphysio_unlock(reqp);
8260Sstevel@tonic-gate 			aio_copyout_result(reqp);
8270Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8280Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8290Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8300Sstevel@tonic-gate 		}
8310Sstevel@tonic-gate 	}
8320Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
8330Sstevel@tonic-gate 	else {
8340Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8350Sstevel@tonic-gate 			((caddr32_t *)iocblist)[iocb_index++] =
8360Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb32;
8370Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8380Sstevel@tonic-gate 			aphysio_unlock(reqp);
8390Sstevel@tonic-gate 			aio_copyout_result(reqp);
8400Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8410Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8420Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8430Sstevel@tonic-gate 		}
8440Sstevel@tonic-gate 	}
8450Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
8460Sstevel@tonic-gate 	return (iocb_index);
8470Sstevel@tonic-gate }
8480Sstevel@tonic-gate 
8490Sstevel@tonic-gate /*
8500Sstevel@tonic-gate  * aio_reqlist_concat
8510Sstevel@tonic-gate  * moves "max" elements from the done queue to the reqlist queue and removes
8520Sstevel@tonic-gate  * the AIO_DONEQ flag.
8530Sstevel@tonic-gate  * - reqlist queue is a simple linked list
8540Sstevel@tonic-gate  * - done queue is a double linked list
8550Sstevel@tonic-gate  */
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate static int
8580Sstevel@tonic-gate aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
8590Sstevel@tonic-gate {
8600Sstevel@tonic-gate 	aio_req_t *q2, *q2work, *list;
8610Sstevel@tonic-gate 	int count = 0;
8620Sstevel@tonic-gate 
8630Sstevel@tonic-gate 	list = *reqlist;
8640Sstevel@tonic-gate 	q2 = aiop->aio_doneq;
8650Sstevel@tonic-gate 	q2work = q2;
8660Sstevel@tonic-gate 	while (max-- > 0) {
8670Sstevel@tonic-gate 		q2work->aio_req_flags &= ~AIO_DONEQ;
8680Sstevel@tonic-gate 		q2work = q2work->aio_req_next;
8690Sstevel@tonic-gate 		count++;
8700Sstevel@tonic-gate 		if (q2work == q2)
8710Sstevel@tonic-gate 			break;
8720Sstevel@tonic-gate 	}
8730Sstevel@tonic-gate 
8740Sstevel@tonic-gate 	if (q2work == q2) {
8750Sstevel@tonic-gate 		/* all elements revised */
8760Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = list;
8770Sstevel@tonic-gate 		list = q2;
8780Sstevel@tonic-gate 		aiop->aio_doneq = NULL;
8790Sstevel@tonic-gate 	} else {
8800Sstevel@tonic-gate 		/*
8810Sstevel@tonic-gate 		 * max < elements in the doneq
8820Sstevel@tonic-gate 		 * detach only the required amount of elements
8830Sstevel@tonic-gate 		 * out of the doneq
8840Sstevel@tonic-gate 		 */
8850Sstevel@tonic-gate 		q2work->aio_req_prev->aio_req_next = list;
8860Sstevel@tonic-gate 		list = q2;
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate 		aiop->aio_doneq = q2work;
8890Sstevel@tonic-gate 		q2work->aio_req_prev = q2->aio_req_prev;
8900Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = q2work;
8910Sstevel@tonic-gate 	}
8920Sstevel@tonic-gate 	*reqlist = list;
8930Sstevel@tonic-gate 	return (count);
8940Sstevel@tonic-gate }
8950Sstevel@tonic-gate 
8960Sstevel@tonic-gate /*ARGSUSED*/
8970Sstevel@tonic-gate static int
8980Sstevel@tonic-gate aiosuspend(
8990Sstevel@tonic-gate 	void	*aiocb,
9000Sstevel@tonic-gate 	int	nent,
9010Sstevel@tonic-gate 	struct	timespec	*timout,
9020Sstevel@tonic-gate 	int	flag,
9030Sstevel@tonic-gate 	long	*rval,
9040Sstevel@tonic-gate 	int	run_mode)
9050Sstevel@tonic-gate {
9060Sstevel@tonic-gate 	int 		error;
9070Sstevel@tonic-gate 	aio_t		*aiop;
9080Sstevel@tonic-gate 	aio_req_t	*reqp, *found, *next;
9090Sstevel@tonic-gate 	caddr_t		cbplist = NULL;
9100Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
9110Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9120Sstevel@tonic-gate 	aiocb32_t	*cbp32;
9130Sstevel@tonic-gate 	caddr32_t	*ucbp32;
9140Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9150Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
9160Sstevel@tonic-gate 	int		rv;
9170Sstevel@tonic-gate 	int		i;
9180Sstevel@tonic-gate 	size_t		ssize;
9190Sstevel@tonic-gate 	model_t		model = get_udatamodel();
9200Sstevel@tonic-gate 	int		blocking;
9214123Sdm120769 	int		timecheck;
9220Sstevel@tonic-gate 	timestruc_t	rqtime;
9230Sstevel@tonic-gate 	timestruc_t	*rqtp;
9240Sstevel@tonic-gate 
9250Sstevel@tonic-gate 	aiop = curproc->p_aio;
9260Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0)
9270Sstevel@tonic-gate 		return (EINVAL);
9280Sstevel@tonic-gate 
9290Sstevel@tonic-gate 	/*
9300Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
9310Sstevel@tonic-gate 	 */
9320Sstevel@tonic-gate 	error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
9330Sstevel@tonic-gate 	if (error)
9340Sstevel@tonic-gate 		return (error);
9350Sstevel@tonic-gate 	if (rqtp) {
9360Sstevel@tonic-gate 		timestruc_t now;
9374123Sdm120769 		timecheck = timechanged;
9380Sstevel@tonic-gate 		gethrestime(&now);
9390Sstevel@tonic-gate 		timespecadd(rqtp, &now);
9400Sstevel@tonic-gate 	}
9410Sstevel@tonic-gate 
9420Sstevel@tonic-gate 	/*
9430Sstevel@tonic-gate 	 * If we are not blocking and there's no IO complete
9440Sstevel@tonic-gate 	 * skip aiocb copyin.
9450Sstevel@tonic-gate 	 */
9460Sstevel@tonic-gate 	if (!blocking && (aiop->aio_pollq == NULL) &&
9470Sstevel@tonic-gate 	    (aiop->aio_doneq == NULL)) {
9480Sstevel@tonic-gate 		return (EAGAIN);
9490Sstevel@tonic-gate 	}
9500Sstevel@tonic-gate 
9510Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
9520Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
9530Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9540Sstevel@tonic-gate 	else
9550Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
9560Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9570Sstevel@tonic-gate 
9580Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_NOSLEEP);
9590Sstevel@tonic-gate 	if (cbplist == NULL)
9600Sstevel@tonic-gate 		return (ENOMEM);
9610Sstevel@tonic-gate 
9620Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
9630Sstevel@tonic-gate 		error = EFAULT;
9640Sstevel@tonic-gate 		goto done;
9650Sstevel@tonic-gate 	}
9660Sstevel@tonic-gate 
9670Sstevel@tonic-gate 	found = NULL;
9680Sstevel@tonic-gate 	/*
9690Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
9700Sstevel@tonic-gate 	 * aio_req_done().
9710Sstevel@tonic-gate 	 */
9720Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
9730Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
9740Sstevel@tonic-gate 	for (;;) {
9750Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
9760Sstevel@tonic-gate 		if (aiop->aio_pollq) {
9770Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
9780Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
9790Sstevel@tonic-gate 			aio_cleanup(0);
9800Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
9810Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9820Sstevel@tonic-gate 		}
9830Sstevel@tonic-gate 		/* check for requests on done queue */
9840Sstevel@tonic-gate 		if (aiop->aio_doneq) {
9850Sstevel@tonic-gate 			if (model == DATAMODEL_NATIVE)
9860Sstevel@tonic-gate 				ucbp = (aiocb_t **)cbplist;
9870Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9880Sstevel@tonic-gate 			else
9890Sstevel@tonic-gate 				ucbp32 = (caddr32_t *)cbplist;
9900Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9910Sstevel@tonic-gate 			for (i = 0; i < nent; i++) {
9920Sstevel@tonic-gate 				if (model == DATAMODEL_NATIVE) {
9930Sstevel@tonic-gate 					if ((cbp = *ucbp++) == NULL)
9940Sstevel@tonic-gate 						continue;
9950Sstevel@tonic-gate 					if (run_mode != AIO_LARGEFILE)
9960Sstevel@tonic-gate 						reqp = aio_req_done(
9970Sstevel@tonic-gate 						    &cbp->aio_resultp);
9980Sstevel@tonic-gate 					else {
9990Sstevel@tonic-gate 						cbp64 = (aiocb64_32_t *)cbp;
10000Sstevel@tonic-gate 						reqp = aio_req_done(
10010Sstevel@tonic-gate 						    &cbp64->aio_resultp);
10020Sstevel@tonic-gate 					}
10030Sstevel@tonic-gate 				}
10040Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
10050Sstevel@tonic-gate 				else {
10060Sstevel@tonic-gate 					if (run_mode == AIO_32) {
10070Sstevel@tonic-gate 						if ((cbp32 =
10080Sstevel@tonic-gate 						    (aiocb32_t *)(uintptr_t)
10090Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10100Sstevel@tonic-gate 							continue;
10110Sstevel@tonic-gate 						reqp = aio_req_done(
10120Sstevel@tonic-gate 						    &cbp32->aio_resultp);
10130Sstevel@tonic-gate 					} else if (run_mode == AIO_LARGEFILE) {
10140Sstevel@tonic-gate 						if ((cbp64 =
10150Sstevel@tonic-gate 						    (aiocb64_32_t *)(uintptr_t)
10160Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10170Sstevel@tonic-gate 							continue;
10180Sstevel@tonic-gate 						    reqp = aio_req_done(
10190Sstevel@tonic-gate 							&cbp64->aio_resultp);
10200Sstevel@tonic-gate 					}
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 				}
10230Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
10240Sstevel@tonic-gate 				if (reqp) {
10250Sstevel@tonic-gate 					reqp->aio_req_next = found;
10260Sstevel@tonic-gate 					found = reqp;
10270Sstevel@tonic-gate 				}
10280Sstevel@tonic-gate 				if (aiop->aio_doneq == NULL)
10290Sstevel@tonic-gate 					break;
10300Sstevel@tonic-gate 			}
10310Sstevel@tonic-gate 			if (found)
10320Sstevel@tonic-gate 				break;
10330Sstevel@tonic-gate 		}
10340Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
10350Sstevel@tonic-gate 			/*
10360Sstevel@tonic-gate 			 * nothing on the kernel's queue. the user
10370Sstevel@tonic-gate 			 * has notified the kernel that it has items
10380Sstevel@tonic-gate 			 * on a user-level queue.
10390Sstevel@tonic-gate 			 */
10400Sstevel@tonic-gate 			aiop->aio_notifycnt--;
10410Sstevel@tonic-gate 			*rval = 1;
10420Sstevel@tonic-gate 			error = 0;
10430Sstevel@tonic-gate 			break;
10440Sstevel@tonic-gate 		}
10450Sstevel@tonic-gate 		/* don't block if nothing is outstanding */
10460Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0) {
10470Sstevel@tonic-gate 			error = EAGAIN;
10480Sstevel@tonic-gate 			break;
10490Sstevel@tonic-gate 		}
10500Sstevel@tonic-gate 		if (blocking) {
10510Sstevel@tonic-gate 			/*
10520Sstevel@tonic-gate 			 * drop the aio_cleanupq_mutex as we are
10530Sstevel@tonic-gate 			 * going to block.
10540Sstevel@tonic-gate 			 */
10550Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
10560Sstevel@tonic-gate 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
10574123Sdm120769 				&aiop->aio_mutex, rqtp, timecheck);
10580Sstevel@tonic-gate 			/*
10590Sstevel@tonic-gate 			 * we have to drop aio_mutex and
10600Sstevel@tonic-gate 			 * grab it in the right order.
10610Sstevel@tonic-gate 			 */
10620Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
10630Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
10640Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
10650Sstevel@tonic-gate 			if (rv > 0)	/* check done queue again */
10660Sstevel@tonic-gate 				continue;
10670Sstevel@tonic-gate 			if (rv == 0)	/* interrupted by a signal */
10680Sstevel@tonic-gate 				error = EINTR;
10690Sstevel@tonic-gate 			else		/* timer expired */
10700Sstevel@tonic-gate 				error = ETIME;
10710Sstevel@tonic-gate 		} else {
10720Sstevel@tonic-gate 			error = EAGAIN;
10730Sstevel@tonic-gate 		}
10740Sstevel@tonic-gate 		break;
10750Sstevel@tonic-gate 	}
10760Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
10770Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
10780Sstevel@tonic-gate 	for (reqp = found; reqp != NULL; reqp = next) {
10790Sstevel@tonic-gate 		next = reqp->aio_req_next;
10800Sstevel@tonic-gate 		aphysio_unlock(reqp);
10810Sstevel@tonic-gate 		aio_copyout_result(reqp);
10820Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
10830Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
10840Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
10850Sstevel@tonic-gate 	}
10860Sstevel@tonic-gate done:
10870Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
10880Sstevel@tonic-gate 	return (error);
10890Sstevel@tonic-gate }
10900Sstevel@tonic-gate 
10910Sstevel@tonic-gate /*
10920Sstevel@tonic-gate  * initialize aio by allocating an aio_t struct for this
10930Sstevel@tonic-gate  * process.
10940Sstevel@tonic-gate  */
10950Sstevel@tonic-gate static int
10960Sstevel@tonic-gate aioinit(void)
10970Sstevel@tonic-gate {
10980Sstevel@tonic-gate 	proc_t *p = curproc;
10990Sstevel@tonic-gate 	aio_t *aiop;
11000Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11010Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL) {
11020Sstevel@tonic-gate 		aiop = aio_aiop_alloc();
11030Sstevel@tonic-gate 		p->p_aio = aiop;
11040Sstevel@tonic-gate 	}
11050Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11060Sstevel@tonic-gate 	if (aiop == NULL)
11070Sstevel@tonic-gate 		return (ENOMEM);
11080Sstevel@tonic-gate 	return (0);
11090Sstevel@tonic-gate }
11100Sstevel@tonic-gate 
11110Sstevel@tonic-gate /*
11120Sstevel@tonic-gate  * start a special thread that will cleanup after aio requests
11130Sstevel@tonic-gate  * that are preventing a segment from being unmapped. as_unmap()
11140Sstevel@tonic-gate  * blocks until all phsyio to this segment is completed. this
11150Sstevel@tonic-gate  * doesn't happen until all the pages in this segment are not
11160Sstevel@tonic-gate  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
11170Sstevel@tonic-gate  * requests still outstanding. this special thread will make sure
11180Sstevel@tonic-gate  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
11190Sstevel@tonic-gate  *
11200Sstevel@tonic-gate  * this function will return an error if the process has only
11210Sstevel@tonic-gate  * one LWP. the assumption is that the caller is a separate LWP
11220Sstevel@tonic-gate  * that remains blocked in the kernel for the life of this process.
11230Sstevel@tonic-gate  */
11240Sstevel@tonic-gate static int
11250Sstevel@tonic-gate aiostart(void)
11260Sstevel@tonic-gate {
11270Sstevel@tonic-gate 	proc_t *p = curproc;
11280Sstevel@tonic-gate 	aio_t *aiop;
11290Sstevel@tonic-gate 	int first, error = 0;
11300Sstevel@tonic-gate 
11310Sstevel@tonic-gate 	if (p->p_lwpcnt == 1)
11320Sstevel@tonic-gate 		return (EDEADLK);
11330Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11340Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL)
11350Sstevel@tonic-gate 		error = EINVAL;
11360Sstevel@tonic-gate 	else {
11370Sstevel@tonic-gate 		first = aiop->aio_ok;
11380Sstevel@tonic-gate 		if (aiop->aio_ok == 0)
11390Sstevel@tonic-gate 			aiop->aio_ok = 1;
11400Sstevel@tonic-gate 	}
11410Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11420Sstevel@tonic-gate 	if (error == 0 && first == 0) {
11430Sstevel@tonic-gate 		return (aio_cleanup_thread(aiop));
11440Sstevel@tonic-gate 		/* should return only to exit */
11450Sstevel@tonic-gate 	}
11460Sstevel@tonic-gate 	return (error);
11470Sstevel@tonic-gate }
11480Sstevel@tonic-gate 
11490Sstevel@tonic-gate /*
11500Sstevel@tonic-gate  * Associate an aiocb with a port.
11510Sstevel@tonic-gate  * This function is used by aiorw() to associate a transaction with a port.
11520Sstevel@tonic-gate  * Allocate an event port structure (port_alloc_event()) and store the
11530Sstevel@tonic-gate  * delivered user pointer (portnfy_user) in the portkev_user field of the
11540Sstevel@tonic-gate  * port_kevent_t structure..
11550Sstevel@tonic-gate  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
11560Sstevel@tonic-gate  * the port association.
11570Sstevel@tonic-gate  */
11580Sstevel@tonic-gate 
11590Sstevel@tonic-gate static int
11601885Sraf aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
11611885Sraf 	aio_req_t *reqp, int event)
11620Sstevel@tonic-gate {
11630Sstevel@tonic-gate 	port_kevent_t	*pkevp = NULL;
11640Sstevel@tonic-gate 	int		error;
11650Sstevel@tonic-gate 
11660Sstevel@tonic-gate 	error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
11670Sstevel@tonic-gate 	    PORT_SOURCE_AIO, &pkevp);
11680Sstevel@tonic-gate 	if (error) {
11690Sstevel@tonic-gate 		if ((error == ENOMEM) || (error == EAGAIN))
11700Sstevel@tonic-gate 			error = EAGAIN;
11710Sstevel@tonic-gate 		else
11720Sstevel@tonic-gate 			error = EINVAL;
11730Sstevel@tonic-gate 	} else {
11740Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
11750Sstevel@tonic-gate 		    aio_port_callback, reqp);
11761885Sraf 		pkevp->portkev_events = event;
11770Sstevel@tonic-gate 		reqp->aio_req_portkev = pkevp;
11780Sstevel@tonic-gate 		reqp->aio_req_port = pntfy->portnfy_port;
11790Sstevel@tonic-gate 	}
11800Sstevel@tonic-gate 	return (error);
11810Sstevel@tonic-gate }
11820Sstevel@tonic-gate 
11830Sstevel@tonic-gate #ifdef _LP64
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate /*
11860Sstevel@tonic-gate  * Asynchronous list IO. A chain of aiocb's are copied in
11870Sstevel@tonic-gate  * one at a time. If the aiocb is invalid, it is skipped.
11880Sstevel@tonic-gate  * For each aiocb, the appropriate driver entry point is
11890Sstevel@tonic-gate  * called. Optimize for the common case where the list
11900Sstevel@tonic-gate  * of requests is to the same file descriptor.
11910Sstevel@tonic-gate  *
11920Sstevel@tonic-gate  * One possible optimization is to define a new driver entry
11930Sstevel@tonic-gate  * point that supports a list of IO requests. Whether this
11940Sstevel@tonic-gate  * improves performance depends somewhat on the driver's
11950Sstevel@tonic-gate  * locking strategy. Processing a list could adversely impact
11960Sstevel@tonic-gate  * the driver's interrupt latency.
11970Sstevel@tonic-gate  */
11980Sstevel@tonic-gate static int
11990Sstevel@tonic-gate alio(
12001885Sraf 	int		mode_arg,
12011885Sraf 	aiocb_t		**aiocb_arg,
12021885Sraf 	int		nent,
12031885Sraf 	struct sigevent	*sigev)
12040Sstevel@tonic-gate {
12050Sstevel@tonic-gate 	file_t		*fp;
12060Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
12070Sstevel@tonic-gate 	int		prev_mode = -1;
12080Sstevel@tonic-gate 	struct vnode	*vp;
12090Sstevel@tonic-gate 	aio_lio_t	*head;
12100Sstevel@tonic-gate 	aio_req_t	*reqp;
12110Sstevel@tonic-gate 	aio_t		*aiop;
12120Sstevel@tonic-gate 	caddr_t		cbplist;
12130Sstevel@tonic-gate 	aiocb_t		cb;
12140Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
12151885Sraf 	aiocb_t		*cbp;
12161885Sraf 	aiocb_t		**ucbp;
12170Sstevel@tonic-gate 	struct sigevent sigevk;
12180Sstevel@tonic-gate 	sigqueue_t	*sqp;
12190Sstevel@tonic-gate 	int		(*aio_func)();
12200Sstevel@tonic-gate 	int		mode;
12210Sstevel@tonic-gate 	int		error = 0;
12220Sstevel@tonic-gate 	int		aio_errors = 0;
12230Sstevel@tonic-gate 	int		i;
12240Sstevel@tonic-gate 	size_t		ssize;
12250Sstevel@tonic-gate 	int		deadhead = 0;
12260Sstevel@tonic-gate 	int		aio_notsupported = 0;
12271885Sraf 	int		lio_head_port;
12281885Sraf 	int		aio_port;
12291885Sraf 	int		aio_thread;
12300Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
12310Sstevel@tonic-gate 	port_notify_t	pnotify;
12321885Sraf 	int		event;
12330Sstevel@tonic-gate 
12340Sstevel@tonic-gate 	aiop = curproc->p_aio;
12350Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
12360Sstevel@tonic-gate 		return (EINVAL);
12370Sstevel@tonic-gate 
12380Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
12390Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
12400Sstevel@tonic-gate 	ucbp = (aiocb_t **)cbplist;
12410Sstevel@tonic-gate 
12421885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
12431885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
12440Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
12450Sstevel@tonic-gate 		return (EFAULT);
12460Sstevel@tonic-gate 	}
12470Sstevel@tonic-gate 
12481885Sraf 	/* Event Ports  */
12491885Sraf 	if (sigev &&
12501885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
12511885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
12521885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
12531885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
12541885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
12551885Sraf 		} else if (copyin(sigevk.sigev_value.sival_ptr,
12561885Sraf 		    &pnotify, sizeof (pnotify))) {
12570Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
12580Sstevel@tonic-gate 			return (EFAULT);
12590Sstevel@tonic-gate 		}
12601885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
12611885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
12621885Sraf 		if (error) {
12631885Sraf 			if (error == ENOMEM || error == EAGAIN)
12641885Sraf 				error = EAGAIN;
12651885Sraf 			else
12661885Sraf 				error = EINVAL;
12671885Sraf 			kmem_free(cbplist, ssize);
12681885Sraf 			return (error);
12691885Sraf 		}
12701885Sraf 		lio_head_port = pnotify.portnfy_port;
12710Sstevel@tonic-gate 	}
12720Sstevel@tonic-gate 
12730Sstevel@tonic-gate 	/*
12740Sstevel@tonic-gate 	 * a list head should be allocated if notification is
12750Sstevel@tonic-gate 	 * enabled for this list.
12760Sstevel@tonic-gate 	 */
12770Sstevel@tonic-gate 	head = NULL;
12780Sstevel@tonic-gate 
12791885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
12800Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
12810Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
12820Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
12830Sstevel@tonic-gate 		if (error)
12840Sstevel@tonic-gate 			goto done;
12850Sstevel@tonic-gate 		deadhead = 1;
12860Sstevel@tonic-gate 		head->lio_nent = nent;
12870Sstevel@tonic-gate 		head->lio_refcnt = nent;
12881885Sraf 		head->lio_port = -1;
12891885Sraf 		head->lio_portkev = NULL;
12901885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
12911885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
12920Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
12930Sstevel@tonic-gate 			if (sqp == NULL) {
12940Sstevel@tonic-gate 				error = EAGAIN;
12950Sstevel@tonic-gate 				goto done;
12960Sstevel@tonic-gate 			}
12970Sstevel@tonic-gate 			sqp->sq_func = NULL;
12980Sstevel@tonic-gate 			sqp->sq_next = NULL;
12990Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
13000Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
13010Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
13020Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
13030Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
13040Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
13050Sstevel@tonic-gate 			sqp->sq_info.si_value = sigevk.sigev_value;
13060Sstevel@tonic-gate 			head->lio_sigqp = sqp;
13070Sstevel@tonic-gate 		} else {
13080Sstevel@tonic-gate 			head->lio_sigqp = NULL;
13090Sstevel@tonic-gate 		}
13101885Sraf 		if (pkevtp) {
13111885Sraf 			/*
13121885Sraf 			 * Prepare data to send when list of aiocb's
13131885Sraf 			 * has completed.
13141885Sraf 			 */
13151885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
13161885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
13171885Sraf 			    NULL, head);
13181885Sraf 			pkevtp->portkev_events = AIOLIO;
13191885Sraf 			head->lio_portkev = pkevtp;
13201885Sraf 			head->lio_port = pnotify.portnfy_port;
13211885Sraf 		}
13220Sstevel@tonic-gate 	}
13230Sstevel@tonic-gate 
13240Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
13250Sstevel@tonic-gate 
13260Sstevel@tonic-gate 		cbp = *ucbp;
13270Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
13281885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
13290Sstevel@tonic-gate 			if (head) {
13300Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13310Sstevel@tonic-gate 				head->lio_nent--;
13320Sstevel@tonic-gate 				head->lio_refcnt--;
13330Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13340Sstevel@tonic-gate 			}
13350Sstevel@tonic-gate 			continue;
13360Sstevel@tonic-gate 		}
13370Sstevel@tonic-gate 
13380Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
13390Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
13400Sstevel@tonic-gate 		if (mode == LIO_NOP) {
13410Sstevel@tonic-gate 			cbp = NULL;
13420Sstevel@tonic-gate 			if (head) {
13430Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13440Sstevel@tonic-gate 				head->lio_nent--;
13450Sstevel@tonic-gate 				head->lio_refcnt--;
13460Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13470Sstevel@tonic-gate 			}
13480Sstevel@tonic-gate 			continue;
13490Sstevel@tonic-gate 		}
13500Sstevel@tonic-gate 
13510Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
13520Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
13530Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13540Sstevel@tonic-gate 			if (head) {
13550Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13560Sstevel@tonic-gate 				head->lio_nent--;
13570Sstevel@tonic-gate 				head->lio_refcnt--;
13580Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13590Sstevel@tonic-gate 			}
13600Sstevel@tonic-gate 			aio_errors++;
13610Sstevel@tonic-gate 			continue;
13620Sstevel@tonic-gate 		}
13630Sstevel@tonic-gate 
13640Sstevel@tonic-gate 		/*
13650Sstevel@tonic-gate 		 * check the permission of the partition
13660Sstevel@tonic-gate 		 */
13670Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
13680Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
13690Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13700Sstevel@tonic-gate 			if (head) {
13710Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13720Sstevel@tonic-gate 				head->lio_nent--;
13730Sstevel@tonic-gate 				head->lio_refcnt--;
13740Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13750Sstevel@tonic-gate 			}
13760Sstevel@tonic-gate 			aio_errors++;
13770Sstevel@tonic-gate 			continue;
13780Sstevel@tonic-gate 		}
13790Sstevel@tonic-gate 
13800Sstevel@tonic-gate 		/*
13811885Sraf 		 * common case where requests are to the same fd
13821885Sraf 		 * for the same r/w operation.
13830Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
13840Sstevel@tonic-gate 		 */
13851885Sraf 		vp = fp->f_vnode;
13861885Sraf 		if (fp != prev_fp || mode != prev_mode) {
13870Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
13880Sstevel@tonic-gate 			if (aio_func == NULL) {
13890Sstevel@tonic-gate 				prev_fp = NULL;
13900Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
13910Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
13920Sstevel@tonic-gate 				aio_notsupported++;
13930Sstevel@tonic-gate 				if (head) {
13940Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
13950Sstevel@tonic-gate 					head->lio_nent--;
13960Sstevel@tonic-gate 					head->lio_refcnt--;
13970Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
13980Sstevel@tonic-gate 				}
13990Sstevel@tonic-gate 				continue;
14000Sstevel@tonic-gate 			} else {
14010Sstevel@tonic-gate 				prev_fp = fp;
14020Sstevel@tonic-gate 				prev_mode = mode;
14030Sstevel@tonic-gate 			}
14040Sstevel@tonic-gate 		}
14050Sstevel@tonic-gate 
14061885Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
14071885Sraf 		    &cbp->aio_resultp, vp);
14081885Sraf 		if (error) {
14090Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14100Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14110Sstevel@tonic-gate 			if (head) {
14120Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14130Sstevel@tonic-gate 				head->lio_nent--;
14140Sstevel@tonic-gate 				head->lio_refcnt--;
14150Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14160Sstevel@tonic-gate 			}
14170Sstevel@tonic-gate 			aio_errors++;
14180Sstevel@tonic-gate 			continue;
14190Sstevel@tonic-gate 		}
14200Sstevel@tonic-gate 
14210Sstevel@tonic-gate 		reqp->aio_req_lio = head;
14220Sstevel@tonic-gate 		deadhead = 0;
14230Sstevel@tonic-gate 
14240Sstevel@tonic-gate 		/*
14250Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
14260Sstevel@tonic-gate 		 * the driver to avoid a race condition
14270Sstevel@tonic-gate 		 */
14280Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
14290Sstevel@tonic-gate 		    EINPROGRESS);
14300Sstevel@tonic-gate 
14310Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = (caddr_t)cbp;
14320Sstevel@tonic-gate 
14331885Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
14341885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
14351885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
14361885Sraf 		if (aio_port | aio_thread) {
14371885Sraf 			port_kevent_t *lpkevp;
14381885Sraf 			/*
14391885Sraf 			 * Prepare data to send with each aiocb completed.
14401885Sraf 			 */
14411885Sraf 			if (aio_port) {
14421885Sraf 				void *paddr =
14431885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
14441885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
14451885Sraf 					error = EFAULT;
14461885Sraf 			} else {	/* aio_thread */
14471885Sraf 				pnotify.portnfy_port =
14481885Sraf 				    aiocb->aio_sigevent.sigev_signo;
14491885Sraf 				pnotify.portnfy_user =
14501885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
14511885Sraf 			}
14521885Sraf 			if (error)
14531885Sraf 				/* EMPTY */;
14541885Sraf 			else if (pkevtp != NULL &&
14551885Sraf 			    pnotify.portnfy_port == lio_head_port)
14561885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
14571885Sraf 				    PORT_ALLOC_DEFAULT);
14581885Sraf 			else
14591885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
14601885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
14611885Sraf 				    &lpkevp);
14621885Sraf 			if (error == 0) {
14631885Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
14641885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
14651885Sraf 				    aio_port_callback, reqp);
14661885Sraf 				lpkevp->portkev_events = event;
14671885Sraf 				reqp->aio_req_portkev = lpkevp;
14681885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
14691885Sraf 			}
14700Sstevel@tonic-gate 		}
14710Sstevel@tonic-gate 
14720Sstevel@tonic-gate 		/*
14730Sstevel@tonic-gate 		 * send the request to driver.
14740Sstevel@tonic-gate 		 */
14750Sstevel@tonic-gate 		if (error == 0) {
14760Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
14770Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
14780Sstevel@tonic-gate 				aio_zerolen(reqp);
14790Sstevel@tonic-gate 				continue;
14800Sstevel@tonic-gate 			}
14810Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
14820Sstevel@tonic-gate 			    CRED());
14830Sstevel@tonic-gate 		}
14841885Sraf 
14850Sstevel@tonic-gate 		/*
14860Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
14870Sstevel@tonic-gate 		 * completed unless there was an error.
14880Sstevel@tonic-gate 		 */
14890Sstevel@tonic-gate 		if (error) {
14900Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14910Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14920Sstevel@tonic-gate 			if (head) {
14930Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14940Sstevel@tonic-gate 				head->lio_nent--;
14950Sstevel@tonic-gate 				head->lio_refcnt--;
14960Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14970Sstevel@tonic-gate 			}
14980Sstevel@tonic-gate 			if (error == ENOTSUP)
14990Sstevel@tonic-gate 				aio_notsupported++;
15000Sstevel@tonic-gate 			else
15010Sstevel@tonic-gate 				aio_errors++;
15020Sstevel@tonic-gate 			lio_set_error(reqp);
15030Sstevel@tonic-gate 		} else {
15040Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
15050Sstevel@tonic-gate 		}
15060Sstevel@tonic-gate 	}
15070Sstevel@tonic-gate 
15080Sstevel@tonic-gate 	if (aio_notsupported) {
15090Sstevel@tonic-gate 		error = ENOTSUP;
15100Sstevel@tonic-gate 	} else if (aio_errors) {
15110Sstevel@tonic-gate 		/*
15120Sstevel@tonic-gate 		 * return EIO if any request failed
15130Sstevel@tonic-gate 		 */
15140Sstevel@tonic-gate 		error = EIO;
15150Sstevel@tonic-gate 	}
15160Sstevel@tonic-gate 
15170Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
15180Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
15190Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
15200Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
15210Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
15220Sstevel@tonic-gate 				error = EINTR;
15230Sstevel@tonic-gate 				goto done;
15240Sstevel@tonic-gate 			}
15250Sstevel@tonic-gate 		}
15260Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
15270Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
15280Sstevel@tonic-gate 	}
15290Sstevel@tonic-gate 
15300Sstevel@tonic-gate done:
15310Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
15320Sstevel@tonic-gate 	if (deadhead) {
15330Sstevel@tonic-gate 		if (head->lio_sigqp)
15340Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
15351885Sraf 		if (head->lio_portkev)
15361885Sraf 			port_free_event(head->lio_portkev);
15370Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
15380Sstevel@tonic-gate 	}
15390Sstevel@tonic-gate 	return (error);
15400Sstevel@tonic-gate }
15410Sstevel@tonic-gate 
15420Sstevel@tonic-gate #endif /* _LP64 */
15430Sstevel@tonic-gate 
15440Sstevel@tonic-gate /*
15450Sstevel@tonic-gate  * Asynchronous list IO.
15460Sstevel@tonic-gate  * If list I/O is called with LIO_WAIT it can still return
15470Sstevel@tonic-gate  * before all the I/O's are completed if a signal is caught
15480Sstevel@tonic-gate  * or if the list include UFS I/O requests. If this happens,
15490Sstevel@tonic-gate  * libaio will call aliowait() to wait for the I/O's to
15500Sstevel@tonic-gate  * complete
15510Sstevel@tonic-gate  */
15520Sstevel@tonic-gate /*ARGSUSED*/
15530Sstevel@tonic-gate static int
15540Sstevel@tonic-gate aliowait(
15550Sstevel@tonic-gate 	int	mode,
15560Sstevel@tonic-gate 	void	*aiocb,
15570Sstevel@tonic-gate 	int	nent,
15580Sstevel@tonic-gate 	void	*sigev,
15590Sstevel@tonic-gate 	int	run_mode)
15600Sstevel@tonic-gate {
15610Sstevel@tonic-gate 	aio_lio_t	*head;
15620Sstevel@tonic-gate 	aio_t		*aiop;
15630Sstevel@tonic-gate 	caddr_t		cbplist;
15640Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
15650Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15660Sstevel@tonic-gate 	aiocb32_t	*cbp32;
15670Sstevel@tonic-gate 	caddr32_t	*ucbp32;
15680Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
15690Sstevel@tonic-gate #endif
15700Sstevel@tonic-gate 	int		error = 0;
15710Sstevel@tonic-gate 	int		i;
15720Sstevel@tonic-gate 	size_t		ssize = 0;
15730Sstevel@tonic-gate 	model_t		model = get_udatamodel();
15740Sstevel@tonic-gate 
15750Sstevel@tonic-gate 	aiop = curproc->p_aio;
15760Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
15770Sstevel@tonic-gate 		return (EINVAL);
15780Sstevel@tonic-gate 
15790Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15800Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
15810Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15820Sstevel@tonic-gate 	else
15830Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
15840Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15850Sstevel@tonic-gate 
15860Sstevel@tonic-gate 	if (ssize == 0)
15870Sstevel@tonic-gate 		return (EINVAL);
15880Sstevel@tonic-gate 
15890Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
15900Sstevel@tonic-gate 
15910Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15920Sstevel@tonic-gate 		ucbp = (aiocb_t **)cbplist;
15930Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15940Sstevel@tonic-gate 	else
15950Sstevel@tonic-gate 		ucbp32 = (caddr32_t *)cbplist;
15960Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15970Sstevel@tonic-gate 
15980Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
15990Sstevel@tonic-gate 		error = EFAULT;
16000Sstevel@tonic-gate 		goto done;
16010Sstevel@tonic-gate 	}
16020Sstevel@tonic-gate 
16030Sstevel@tonic-gate 	/*
16040Sstevel@tonic-gate 	 * To find the list head, we go through the
16050Sstevel@tonic-gate 	 * list of aiocb structs, find the request
16060Sstevel@tonic-gate 	 * its for, then get the list head that reqp
16070Sstevel@tonic-gate 	 * points to
16080Sstevel@tonic-gate 	 */
16090Sstevel@tonic-gate 	head = NULL;
16100Sstevel@tonic-gate 
16110Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
16120Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE) {
16130Sstevel@tonic-gate 			/*
16140Sstevel@tonic-gate 			 * Since we are only checking for a NULL pointer
16150Sstevel@tonic-gate 			 * Following should work on both native data sizes
16160Sstevel@tonic-gate 			 * as well as for largefile aiocb.
16170Sstevel@tonic-gate 			 */
16180Sstevel@tonic-gate 			if ((cbp = *ucbp++) == NULL)
16190Sstevel@tonic-gate 				continue;
16200Sstevel@tonic-gate 			if (run_mode != AIO_LARGEFILE)
16210Sstevel@tonic-gate 				if (head = aio_list_get(&cbp->aio_resultp))
16220Sstevel@tonic-gate 					break;
16230Sstevel@tonic-gate 			else {
16240Sstevel@tonic-gate 				/*
16250Sstevel@tonic-gate 				 * This is a case when largefile call is
16260Sstevel@tonic-gate 				 * made on 32 bit kernel.
16270Sstevel@tonic-gate 				 * Treat each pointer as pointer to
16280Sstevel@tonic-gate 				 * aiocb64_32
16290Sstevel@tonic-gate 				 */
16300Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16310Sstevel@tonic-gate 				    &(((aiocb64_32_t *)cbp)->aio_resultp)))
16320Sstevel@tonic-gate 					break;
16330Sstevel@tonic-gate 			}
16340Sstevel@tonic-gate 		}
16350Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
16360Sstevel@tonic-gate 		else {
16370Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
16380Sstevel@tonic-gate 				if ((cbp64 = (aiocb64_32_t *)
16390Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16400Sstevel@tonic-gate 					continue;
16410Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16420Sstevel@tonic-gate 				    &cbp64->aio_resultp))
16430Sstevel@tonic-gate 					break;
16440Sstevel@tonic-gate 			} else if (run_mode == AIO_32) {
16450Sstevel@tonic-gate 				if ((cbp32 = (aiocb32_t *)
16460Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16470Sstevel@tonic-gate 					continue;
16480Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16490Sstevel@tonic-gate 				    &cbp32->aio_resultp))
16500Sstevel@tonic-gate 					break;
16510Sstevel@tonic-gate 			}
16520Sstevel@tonic-gate 		}
16530Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
16540Sstevel@tonic-gate 	}
16550Sstevel@tonic-gate 
16560Sstevel@tonic-gate 	if (head == NULL) {
16570Sstevel@tonic-gate 		error = EINVAL;
16580Sstevel@tonic-gate 		goto done;
16590Sstevel@tonic-gate 	}
16600Sstevel@tonic-gate 
16610Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
16620Sstevel@tonic-gate 	while (head->lio_refcnt > 0) {
16630Sstevel@tonic-gate 		if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
16640Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
16650Sstevel@tonic-gate 			error = EINTR;
16660Sstevel@tonic-gate 			goto done;
16670Sstevel@tonic-gate 		}
16680Sstevel@tonic-gate 	}
16690Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
16700Sstevel@tonic-gate 	alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
16710Sstevel@tonic-gate done:
16720Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
16730Sstevel@tonic-gate 	return (error);
16740Sstevel@tonic-gate }
16750Sstevel@tonic-gate 
16760Sstevel@tonic-gate aio_lio_t *
16770Sstevel@tonic-gate aio_list_get(aio_result_t *resultp)
16780Sstevel@tonic-gate {
16790Sstevel@tonic-gate 	aio_lio_t	*head = NULL;
16800Sstevel@tonic-gate 	aio_t		*aiop;
16810Sstevel@tonic-gate 	aio_req_t 	**bucket;
16820Sstevel@tonic-gate 	aio_req_t 	*reqp;
16830Sstevel@tonic-gate 	long		index;
16840Sstevel@tonic-gate 
16850Sstevel@tonic-gate 	aiop = curproc->p_aio;
16860Sstevel@tonic-gate 	if (aiop == NULL)
16870Sstevel@tonic-gate 		return (NULL);
16880Sstevel@tonic-gate 
16890Sstevel@tonic-gate 	if (resultp) {
16900Sstevel@tonic-gate 		index = AIO_HASH(resultp);
16910Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
16920Sstevel@tonic-gate 		for (reqp = *bucket; reqp != NULL;
16930Sstevel@tonic-gate 		    reqp = reqp->aio_hash_next) {
16940Sstevel@tonic-gate 			if (reqp->aio_req_resultp == resultp) {
16950Sstevel@tonic-gate 				head = reqp->aio_req_lio;
16960Sstevel@tonic-gate 				return (head);
16970Sstevel@tonic-gate 			}
16980Sstevel@tonic-gate 		}
16990Sstevel@tonic-gate 	}
17000Sstevel@tonic-gate 	return (NULL);
17010Sstevel@tonic-gate }
17020Sstevel@tonic-gate 
17030Sstevel@tonic-gate 
17040Sstevel@tonic-gate static void
17050Sstevel@tonic-gate lio_set_uerror(void *resultp, int error)
17060Sstevel@tonic-gate {
17070Sstevel@tonic-gate 	/*
17080Sstevel@tonic-gate 	 * the resultp field is a pointer to where the
17090Sstevel@tonic-gate 	 * error should be written out to the user's
17100Sstevel@tonic-gate 	 * aiocb.
17110Sstevel@tonic-gate 	 *
17120Sstevel@tonic-gate 	 */
17130Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
17140Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return,
17150Sstevel@tonic-gate 		    (ssize_t)-1);
17160Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
17170Sstevel@tonic-gate 	}
17180Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17190Sstevel@tonic-gate 	else {
17200Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
17210Sstevel@tonic-gate 		    (uint_t)-1);
17220Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
17230Sstevel@tonic-gate 	}
17240Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17250Sstevel@tonic-gate }
17260Sstevel@tonic-gate 
17270Sstevel@tonic-gate /*
17280Sstevel@tonic-gate  * do cleanup completion for all requests in list. memory for
17290Sstevel@tonic-gate  * each request is also freed.
17300Sstevel@tonic-gate  */
17310Sstevel@tonic-gate static void
17320Sstevel@tonic-gate alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
17330Sstevel@tonic-gate {
17340Sstevel@tonic-gate 	int i;
17350Sstevel@tonic-gate 	aio_req_t *reqp;
17360Sstevel@tonic-gate 	aio_result_t *resultp;
17371885Sraf 	aiocb64_32_t *aiocb_64;
17380Sstevel@tonic-gate 
17390Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
17400Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
17410Sstevel@tonic-gate 			if (cbp[i] == NULL)
17420Sstevel@tonic-gate 				continue;
17430Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
17440Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)cbp[i];
17451885Sraf 				resultp = (aio_result_t *)
17461885Sraf 				    &aiocb_64->aio_resultp;
17470Sstevel@tonic-gate 			} else
17480Sstevel@tonic-gate 				resultp = &cbp[i]->aio_resultp;
17490Sstevel@tonic-gate 		}
17500Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17510Sstevel@tonic-gate 		else {
17521885Sraf 			aiocb32_t *aiocb_32;
17531885Sraf 			caddr32_t *cbp32;
17540Sstevel@tonic-gate 
17550Sstevel@tonic-gate 			cbp32 = (caddr32_t *)cbp;
17560Sstevel@tonic-gate 			if (cbp32[i] == NULL)
17570Sstevel@tonic-gate 				continue;
17580Sstevel@tonic-gate 			if (run_mode == AIO_32) {
17590Sstevel@tonic-gate 				aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
17600Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_32->
17610Sstevel@tonic-gate 				    aio_resultp;
17620Sstevel@tonic-gate 			} else if (run_mode == AIO_LARGEFILE) {
17630Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
17640Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_64->
17650Sstevel@tonic-gate 				    aio_resultp;
17660Sstevel@tonic-gate 			}
17670Sstevel@tonic-gate 		}
17680Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17690Sstevel@tonic-gate 		/*
17700Sstevel@tonic-gate 		 * we need to get the aio_cleanupq_mutex since we call
17710Sstevel@tonic-gate 		 * aio_req_done().
17720Sstevel@tonic-gate 		 */
17730Sstevel@tonic-gate 		mutex_enter(&aiop->aio_cleanupq_mutex);
17740Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
17750Sstevel@tonic-gate 		reqp = aio_req_done(resultp);
17760Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
17770Sstevel@tonic-gate 		mutex_exit(&aiop->aio_cleanupq_mutex);
17780Sstevel@tonic-gate 		if (reqp != NULL) {
17790Sstevel@tonic-gate 			aphysio_unlock(reqp);
17800Sstevel@tonic-gate 			aio_copyout_result(reqp);
17810Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
17820Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
17830Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
17840Sstevel@tonic-gate 		}
17850Sstevel@tonic-gate 	}
17860Sstevel@tonic-gate }
17870Sstevel@tonic-gate 
17880Sstevel@tonic-gate /*
17891885Sraf  * Write out the results for an aio request that is done.
17900Sstevel@tonic-gate  */
17910Sstevel@tonic-gate static int
17920Sstevel@tonic-gate aioerror(void *cb, int run_mode)
17930Sstevel@tonic-gate {
17940Sstevel@tonic-gate 	aio_result_t *resultp;
17950Sstevel@tonic-gate 	aio_t *aiop;
17960Sstevel@tonic-gate 	aio_req_t *reqp;
17970Sstevel@tonic-gate 	int retval;
17980Sstevel@tonic-gate 
17990Sstevel@tonic-gate 	aiop = curproc->p_aio;
18000Sstevel@tonic-gate 	if (aiop == NULL || cb == NULL)
18010Sstevel@tonic-gate 		return (EINVAL);
18020Sstevel@tonic-gate 
18030Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
18040Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18050Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18060Sstevel@tonic-gate 			    aio_resultp;
18070Sstevel@tonic-gate 		else
18080Sstevel@tonic-gate 			resultp = &((aiocb_t *)cb)->aio_resultp;
18090Sstevel@tonic-gate 	}
18100Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18110Sstevel@tonic-gate 	else {
18120Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18130Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18140Sstevel@tonic-gate 			    aio_resultp;
18150Sstevel@tonic-gate 		else if (run_mode == AIO_32)
18160Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb32_t *)cb)->
18170Sstevel@tonic-gate 			    aio_resultp;
18180Sstevel@tonic-gate 	}
18190Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
18200Sstevel@tonic-gate 	/*
18210Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
18220Sstevel@tonic-gate 	 * aio_req_find().
18230Sstevel@tonic-gate 	 */
18240Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
18250Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18260Sstevel@tonic-gate 	retval = aio_req_find(resultp, &reqp);
18270Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
18280Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
18290Sstevel@tonic-gate 	if (retval == 0) {
18300Sstevel@tonic-gate 		aphysio_unlock(reqp);
18310Sstevel@tonic-gate 		aio_copyout_result(reqp);
18320Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
18330Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
18340Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
18350Sstevel@tonic-gate 		return (0);
18360Sstevel@tonic-gate 	} else if (retval == 1)
18370Sstevel@tonic-gate 		return (EINPROGRESS);
18380Sstevel@tonic-gate 	else if (retval == 2)
18390Sstevel@tonic-gate 		return (EINVAL);
18400Sstevel@tonic-gate 	return (0);
18410Sstevel@tonic-gate }
18420Sstevel@tonic-gate 
18430Sstevel@tonic-gate /*
18440Sstevel@tonic-gate  * 	aio_cancel - if no requests outstanding,
18450Sstevel@tonic-gate  *			return AIO_ALLDONE
18460Sstevel@tonic-gate  *			else
18470Sstevel@tonic-gate  *			return AIO_NOTCANCELED
18480Sstevel@tonic-gate  */
18490Sstevel@tonic-gate static int
18500Sstevel@tonic-gate aio_cancel(
18510Sstevel@tonic-gate 	int	fildes,
18520Sstevel@tonic-gate 	void 	*cb,
18530Sstevel@tonic-gate 	long	*rval,
18540Sstevel@tonic-gate 	int	run_mode)
18550Sstevel@tonic-gate {
18560Sstevel@tonic-gate 	aio_t *aiop;
18570Sstevel@tonic-gate 	void *resultp;
18580Sstevel@tonic-gate 	int index;
18590Sstevel@tonic-gate 	aio_req_t **bucket;
18600Sstevel@tonic-gate 	aio_req_t *ent;
18610Sstevel@tonic-gate 
18620Sstevel@tonic-gate 
18630Sstevel@tonic-gate 	/*
18640Sstevel@tonic-gate 	 * Verify valid file descriptor
18650Sstevel@tonic-gate 	 */
18660Sstevel@tonic-gate 	if ((getf(fildes)) == NULL) {
18670Sstevel@tonic-gate 		return (EBADF);
18680Sstevel@tonic-gate 	}
18690Sstevel@tonic-gate 	releasef(fildes);
18700Sstevel@tonic-gate 
18710Sstevel@tonic-gate 	aiop = curproc->p_aio;
18720Sstevel@tonic-gate 	if (aiop == NULL)
18730Sstevel@tonic-gate 		return (EINVAL);
18740Sstevel@tonic-gate 
18750Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0) {
18760Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
18770Sstevel@tonic-gate 		return (0);
18780Sstevel@tonic-gate 	}
18790Sstevel@tonic-gate 
18800Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18810Sstevel@tonic-gate 	if (cb != NULL) {
18820Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
18830Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18840Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18850Sstevel@tonic-gate 				    ->aio_resultp;
18860Sstevel@tonic-gate 			else
18870Sstevel@tonic-gate 				resultp = &((aiocb_t *)cb)->aio_resultp;
18880Sstevel@tonic-gate 		}
18890Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18900Sstevel@tonic-gate 		else {
18910Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18920Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18930Sstevel@tonic-gate 				    ->aio_resultp;
18940Sstevel@tonic-gate 			else if (run_mode == AIO_32)
18950Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb32_t *)cb)
18960Sstevel@tonic-gate 				    ->aio_resultp;
18970Sstevel@tonic-gate 		}
18980Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
18990Sstevel@tonic-gate 		index = AIO_HASH(resultp);
19000Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19010Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19020Sstevel@tonic-gate 			if (ent->aio_req_resultp == resultp) {
19030Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) == 0) {
19040Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19050Sstevel@tonic-gate 					*rval = AIO_ALLDONE;
19060Sstevel@tonic-gate 					return (0);
19070Sstevel@tonic-gate 				}
19080Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
19090Sstevel@tonic-gate 				*rval = AIO_NOTCANCELED;
19100Sstevel@tonic-gate 				return (0);
19110Sstevel@tonic-gate 			}
19120Sstevel@tonic-gate 		}
19130Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
19140Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
19150Sstevel@tonic-gate 		return (0);
19160Sstevel@tonic-gate 	}
19170Sstevel@tonic-gate 
19180Sstevel@tonic-gate 	for (index = 0; index < AIO_HASHSZ; index++) {
19190Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19200Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19210Sstevel@tonic-gate 			if (ent->aio_req_fd == fildes) {
19220Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) != 0) {
19230Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19240Sstevel@tonic-gate 					*rval = AIO_NOTCANCELED;
19250Sstevel@tonic-gate 					return (0);
19260Sstevel@tonic-gate 				}
19270Sstevel@tonic-gate 			}
19280Sstevel@tonic-gate 		}
19290Sstevel@tonic-gate 	}
19300Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
19310Sstevel@tonic-gate 	*rval = AIO_ALLDONE;
19320Sstevel@tonic-gate 	return (0);
19330Sstevel@tonic-gate }
19340Sstevel@tonic-gate 
19350Sstevel@tonic-gate /*
19360Sstevel@tonic-gate  * solaris version of asynchronous read and write
19370Sstevel@tonic-gate  */
19380Sstevel@tonic-gate static int
19390Sstevel@tonic-gate arw(
19400Sstevel@tonic-gate 	int	opcode,
19410Sstevel@tonic-gate 	int	fdes,
19420Sstevel@tonic-gate 	char	*bufp,
19430Sstevel@tonic-gate 	int	bufsize,
19440Sstevel@tonic-gate 	offset_t	offset,
19450Sstevel@tonic-gate 	aio_result_t	*resultp,
19460Sstevel@tonic-gate 	int		mode)
19470Sstevel@tonic-gate {
19480Sstevel@tonic-gate 	file_t		*fp;
19490Sstevel@tonic-gate 	int		error;
19500Sstevel@tonic-gate 	struct vnode	*vp;
19510Sstevel@tonic-gate 	aio_req_t	*reqp;
19520Sstevel@tonic-gate 	aio_t		*aiop;
19530Sstevel@tonic-gate 	int		(*aio_func)();
19540Sstevel@tonic-gate #ifdef _LP64
19550Sstevel@tonic-gate 	aiocb_t		aiocb;
19560Sstevel@tonic-gate #else
19570Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
19580Sstevel@tonic-gate #endif
19590Sstevel@tonic-gate 
19600Sstevel@tonic-gate 	aiop = curproc->p_aio;
19610Sstevel@tonic-gate 	if (aiop == NULL)
19620Sstevel@tonic-gate 		return (EINVAL);
19630Sstevel@tonic-gate 
19640Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL) {
19650Sstevel@tonic-gate 		return (EBADF);
19660Sstevel@tonic-gate 	}
19670Sstevel@tonic-gate 
19680Sstevel@tonic-gate 	/*
19690Sstevel@tonic-gate 	 * check the permission of the partition
19700Sstevel@tonic-gate 	 */
19710Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
19720Sstevel@tonic-gate 		releasef(fdes);
19730Sstevel@tonic-gate 		return (EBADF);
19740Sstevel@tonic-gate 	}
19750Sstevel@tonic-gate 
19760Sstevel@tonic-gate 	vp = fp->f_vnode;
19770Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
19780Sstevel@tonic-gate 	if (aio_func == NULL) {
19790Sstevel@tonic-gate 		releasef(fdes);
19800Sstevel@tonic-gate 		return (EBADFD);
19810Sstevel@tonic-gate 	}
19820Sstevel@tonic-gate #ifdef _LP64
19830Sstevel@tonic-gate 	aiocb.aio_fildes = fdes;
19840Sstevel@tonic-gate 	aiocb.aio_buf = bufp;
19850Sstevel@tonic-gate 	aiocb.aio_nbytes = bufsize;
19860Sstevel@tonic-gate 	aiocb.aio_offset = offset;
19870Sstevel@tonic-gate 	aiocb.aio_sigevent.sigev_notify = 0;
19881885Sraf 	error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
19890Sstevel@tonic-gate #else
19900Sstevel@tonic-gate 	aiocb64.aio_fildes = fdes;
19910Sstevel@tonic-gate 	aiocb64.aio_buf = (caddr32_t)bufp;
19920Sstevel@tonic-gate 	aiocb64.aio_nbytes = bufsize;
19930Sstevel@tonic-gate 	aiocb64.aio_offset = offset;
19940Sstevel@tonic-gate 	aiocb64.aio_sigevent.sigev_notify = 0;
19951885Sraf 	error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
19960Sstevel@tonic-gate #endif
19970Sstevel@tonic-gate 	if (error) {
19980Sstevel@tonic-gate 		releasef(fdes);
19990Sstevel@tonic-gate 		return (error);
20000Sstevel@tonic-gate 	}
20010Sstevel@tonic-gate 
20020Sstevel@tonic-gate 	/*
20030Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
20040Sstevel@tonic-gate 	 * the AIO poll bit set
20050Sstevel@tonic-gate 	 */
20060Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
20070Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
20080Sstevel@tonic-gate 
20090Sstevel@tonic-gate 	if (bufsize == 0) {
20100Sstevel@tonic-gate 		clear_active_fd(fdes);
20110Sstevel@tonic-gate 		aio_zerolen(reqp);
20120Sstevel@tonic-gate 		return (0);
20130Sstevel@tonic-gate 	}
20140Sstevel@tonic-gate 	/*
20150Sstevel@tonic-gate 	 * send the request to driver.
20160Sstevel@tonic-gate 	 */
20170Sstevel@tonic-gate 	error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
20180Sstevel@tonic-gate 	/*
20190Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
20200Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
20210Sstevel@tonic-gate 	 * completed.
20220Sstevel@tonic-gate 	 */
20230Sstevel@tonic-gate 	if (error) {
20240Sstevel@tonic-gate 		releasef(fdes);
20250Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
20260Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
20270Sstevel@tonic-gate 		aiop->aio_pending--;
20280Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
20290Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
20300Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
20310Sstevel@tonic-gate 		return (error);
20320Sstevel@tonic-gate 	}
20330Sstevel@tonic-gate 	clear_active_fd(fdes);
20340Sstevel@tonic-gate 	return (0);
20350Sstevel@tonic-gate }
20360Sstevel@tonic-gate 
20370Sstevel@tonic-gate /*
20380Sstevel@tonic-gate  * posix version of asynchronous read and write
20390Sstevel@tonic-gate  */
20401885Sraf static int
20410Sstevel@tonic-gate aiorw(
20420Sstevel@tonic-gate 	int		opcode,
20430Sstevel@tonic-gate 	void		*aiocb_arg,
20440Sstevel@tonic-gate 	int		mode,
20450Sstevel@tonic-gate 	int		run_mode)
20460Sstevel@tonic-gate {
20470Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
20480Sstevel@tonic-gate 	aiocb32_t	aiocb32;
20490Sstevel@tonic-gate 	struct	sigevent32 *sigev32;
20500Sstevel@tonic-gate 	port_notify32_t	pntfy32;
20510Sstevel@tonic-gate #endif
20520Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
20530Sstevel@tonic-gate 	aiocb_t		aiocb;
20540Sstevel@tonic-gate 	file_t		*fp;
20550Sstevel@tonic-gate 	int		error, fd;
20560Sstevel@tonic-gate 	size_t		bufsize;
20570Sstevel@tonic-gate 	struct vnode	*vp;
20580Sstevel@tonic-gate 	aio_req_t	*reqp;
20590Sstevel@tonic-gate 	aio_t		*aiop;
20600Sstevel@tonic-gate 	int		(*aio_func)();
20610Sstevel@tonic-gate 	aio_result_t	*resultp;
20620Sstevel@tonic-gate 	struct	sigevent *sigev;
20630Sstevel@tonic-gate 	model_t		model;
20640Sstevel@tonic-gate 	int		aio_use_port = 0;
20650Sstevel@tonic-gate 	port_notify_t	pntfy;
20660Sstevel@tonic-gate 
20670Sstevel@tonic-gate 	model = get_udatamodel();
20680Sstevel@tonic-gate 	aiop = curproc->p_aio;
20690Sstevel@tonic-gate 	if (aiop == NULL)
20700Sstevel@tonic-gate 		return (EINVAL);
20710Sstevel@tonic-gate 
20720Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
20730Sstevel@tonic-gate 		if (run_mode != AIO_LARGEFILE) {
20740Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
20750Sstevel@tonic-gate 				return (EFAULT);
20760Sstevel@tonic-gate 			bufsize = aiocb.aio_nbytes;
20770Sstevel@tonic-gate 			resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
20780Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
20790Sstevel@tonic-gate 				return (EBADF);
20800Sstevel@tonic-gate 			}
20810Sstevel@tonic-gate 			sigev = &aiocb.aio_sigevent;
20820Sstevel@tonic-gate 		} else {
20830Sstevel@tonic-gate 			/*
20840Sstevel@tonic-gate 			 * We come here only when we make largefile
20850Sstevel@tonic-gate 			 * call on 32 bit kernel using 32 bit library.
20860Sstevel@tonic-gate 			 */
20870Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
20880Sstevel@tonic-gate 				return (EFAULT);
20890Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
20900Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
20910Sstevel@tonic-gate 			    ->aio_resultp);
20921885Sraf 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
20930Sstevel@tonic-gate 				return (EBADF);
20940Sstevel@tonic-gate 			sigev = (struct sigevent *)&aiocb64.aio_sigevent;
20950Sstevel@tonic-gate 		}
20960Sstevel@tonic-gate 
20970Sstevel@tonic-gate 		if (sigev->sigev_notify == SIGEV_PORT) {
20980Sstevel@tonic-gate 			if (copyin((void *)sigev->sigev_value.sival_ptr,
20990Sstevel@tonic-gate 			    &pntfy, sizeof (port_notify_t))) {
21000Sstevel@tonic-gate 				releasef(fd);
21010Sstevel@tonic-gate 				return (EFAULT);
21020Sstevel@tonic-gate 			}
21030Sstevel@tonic-gate 			aio_use_port = 1;
21041885Sraf 		} else if (sigev->sigev_notify == SIGEV_THREAD) {
21051885Sraf 			pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
21061885Sraf 			pntfy.portnfy_user =
21071885Sraf 			    aiocb.aio_sigevent.sigev_value.sival_ptr;
21081885Sraf 			aio_use_port = 1;
21090Sstevel@tonic-gate 		}
21100Sstevel@tonic-gate 	}
21110Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
21120Sstevel@tonic-gate 	else {
21130Sstevel@tonic-gate 		if (run_mode == AIO_32) {
21140Sstevel@tonic-gate 			/* 32 bit system call is being made on 64 bit kernel */
21150Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
21160Sstevel@tonic-gate 				return (EFAULT);
21170Sstevel@tonic-gate 
21180Sstevel@tonic-gate 			bufsize = aiocb32.aio_nbytes;
21190Sstevel@tonic-gate 			aiocb_32ton(&aiocb32, &aiocb);
21200Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
21210Sstevel@tonic-gate 			    aio_resultp);
21220Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
21230Sstevel@tonic-gate 				return (EBADF);
21240Sstevel@tonic-gate 			}
21250Sstevel@tonic-gate 			sigev32 = &aiocb32.aio_sigevent;
21260Sstevel@tonic-gate 		} else if (run_mode == AIO_LARGEFILE) {
21270Sstevel@tonic-gate 			/*
21280Sstevel@tonic-gate 			 * We come here only when we make largefile
21290Sstevel@tonic-gate 			 * call on 64 bit kernel using 32 bit library.
21300Sstevel@tonic-gate 			 */
21310Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
21320Sstevel@tonic-gate 				return (EFAULT);
21330Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
21340Sstevel@tonic-gate 			aiocb_LFton(&aiocb64, &aiocb);
21350Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
21360Sstevel@tonic-gate 			    ->aio_resultp);
21370Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
21380Sstevel@tonic-gate 				return (EBADF);
21390Sstevel@tonic-gate 			sigev32 = &aiocb64.aio_sigevent;
21400Sstevel@tonic-gate 		}
21410Sstevel@tonic-gate 
21420Sstevel@tonic-gate 		if (sigev32->sigev_notify == SIGEV_PORT) {
21430Sstevel@tonic-gate 			if (copyin(
21440Sstevel@tonic-gate 			    (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
21450Sstevel@tonic-gate 			    &pntfy32, sizeof (port_notify32_t))) {
21460Sstevel@tonic-gate 				releasef(fd);
21470Sstevel@tonic-gate 				return (EFAULT);
21480Sstevel@tonic-gate 			}
21490Sstevel@tonic-gate 			pntfy.portnfy_port = pntfy32.portnfy_port;
21501885Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
21511885Sraf 			    pntfy32.portnfy_user;
21521885Sraf 			aio_use_port = 1;
21531885Sraf 		} else if (sigev32->sigev_notify == SIGEV_THREAD) {
21541885Sraf 			pntfy.portnfy_port = sigev32->sigev_signo;
21551885Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
21561885Sraf 			    sigev32->sigev_value.sival_ptr;
21570Sstevel@tonic-gate 			aio_use_port = 1;
21580Sstevel@tonic-gate 		}
21590Sstevel@tonic-gate 	}
21600Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
21610Sstevel@tonic-gate 
21620Sstevel@tonic-gate 	/*
21630Sstevel@tonic-gate 	 * check the permission of the partition
21640Sstevel@tonic-gate 	 */
21650Sstevel@tonic-gate 
21660Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
21670Sstevel@tonic-gate 		releasef(fd);
21680Sstevel@tonic-gate 		return (EBADF);
21690Sstevel@tonic-gate 	}
21700Sstevel@tonic-gate 
21710Sstevel@tonic-gate 	vp = fp->f_vnode;
21720Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
21730Sstevel@tonic-gate 	if (aio_func == NULL) {
21740Sstevel@tonic-gate 		releasef(fd);
21750Sstevel@tonic-gate 		return (EBADFD);
21760Sstevel@tonic-gate 	}
21771885Sraf 	if (run_mode == AIO_LARGEFILE)
21781885Sraf 		error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
21790Sstevel@tonic-gate 	else
21801885Sraf 		error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
21810Sstevel@tonic-gate 
21820Sstevel@tonic-gate 	if (error) {
21830Sstevel@tonic-gate 		releasef(fd);
21840Sstevel@tonic-gate 		return (error);
21850Sstevel@tonic-gate 	}
21860Sstevel@tonic-gate 	/*
21870Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
21880Sstevel@tonic-gate 	 * the AIO poll bit set
21890Sstevel@tonic-gate 	 */
21900Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
21910Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
21920Sstevel@tonic-gate 
21930Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
21940Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = aiocb_arg;
21950Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
21960Sstevel@tonic-gate 	else
21970Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
21980Sstevel@tonic-gate #endif
21990Sstevel@tonic-gate 
22001885Sraf 	if (aio_use_port) {
22011885Sraf 		int event = (run_mode == AIO_LARGEFILE)?
22021885Sraf 		    ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
22031885Sraf 		    ((mode == FREAD)? AIOAREAD : AIOAWRITE);
22041885Sraf 		error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
22051885Sraf 	}
22060Sstevel@tonic-gate 
22070Sstevel@tonic-gate 	/*
22080Sstevel@tonic-gate 	 * send the request to driver.
22090Sstevel@tonic-gate 	 */
22100Sstevel@tonic-gate 	if (error == 0) {
22110Sstevel@tonic-gate 		if (bufsize == 0) {
22120Sstevel@tonic-gate 			clear_active_fd(fd);
22130Sstevel@tonic-gate 			aio_zerolen(reqp);
22140Sstevel@tonic-gate 			return (0);
22150Sstevel@tonic-gate 		}
22160Sstevel@tonic-gate 		error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
22170Sstevel@tonic-gate 	}
22180Sstevel@tonic-gate 
22190Sstevel@tonic-gate 	/*
22200Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
22210Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
22220Sstevel@tonic-gate 	 * completed.
22230Sstevel@tonic-gate 	 */
22240Sstevel@tonic-gate 	if (error) {
22250Sstevel@tonic-gate 		releasef(fd);
22260Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
22271885Sraf 		aio_deq(&aiop->aio_portpending, reqp);
22280Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
22290Sstevel@tonic-gate 		aiop->aio_pending--;
22300Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
22310Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
22320Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
22330Sstevel@tonic-gate 		return (error);
22340Sstevel@tonic-gate 	}
22350Sstevel@tonic-gate 	clear_active_fd(fd);
22360Sstevel@tonic-gate 	return (0);
22370Sstevel@tonic-gate }
22380Sstevel@tonic-gate 
22390Sstevel@tonic-gate 
22400Sstevel@tonic-gate /*
22410Sstevel@tonic-gate  * set error for a list IO entry that failed.
22420Sstevel@tonic-gate  */
22430Sstevel@tonic-gate static void
22440Sstevel@tonic-gate lio_set_error(aio_req_t *reqp)
22450Sstevel@tonic-gate {
22460Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22470Sstevel@tonic-gate 
22480Sstevel@tonic-gate 	if (aiop == NULL)
22490Sstevel@tonic-gate 		return;
22500Sstevel@tonic-gate 
22510Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
22521885Sraf 	aio_deq(&aiop->aio_portpending, reqp);
22530Sstevel@tonic-gate 	aiop->aio_pending--;
22540Sstevel@tonic-gate 	/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
22550Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PHYSIODONE;
22560Sstevel@tonic-gate 	/*
22570Sstevel@tonic-gate 	 * Need to free the request now as its never
22580Sstevel@tonic-gate 	 * going to get on the done queue
22590Sstevel@tonic-gate 	 *
22600Sstevel@tonic-gate 	 * Note: aio_outstanding is decremented in
22610Sstevel@tonic-gate 	 *	 aio_req_free()
22620Sstevel@tonic-gate 	 */
22630Sstevel@tonic-gate 	aio_req_free(aiop, reqp);
22640Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK)
22650Sstevel@tonic-gate 		cv_signal(&aiop->aio_cleanupcv);
22660Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
22670Sstevel@tonic-gate }
22680Sstevel@tonic-gate 
22690Sstevel@tonic-gate /*
22700Sstevel@tonic-gate  * check if a specified request is done, and remove it from
22710Sstevel@tonic-gate  * the done queue. otherwise remove anybody from the done queue
22720Sstevel@tonic-gate  * if NULL is specified.
22730Sstevel@tonic-gate  */
22740Sstevel@tonic-gate static aio_req_t *
22750Sstevel@tonic-gate aio_req_done(void *resultp)
22760Sstevel@tonic-gate {
22770Sstevel@tonic-gate 	aio_req_t **bucket;
22780Sstevel@tonic-gate 	aio_req_t *ent;
22790Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22800Sstevel@tonic-gate 	long index;
22810Sstevel@tonic-gate 
22820Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
22830Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
22840Sstevel@tonic-gate 
22850Sstevel@tonic-gate 	if (resultp) {
22860Sstevel@tonic-gate 		index = AIO_HASH(resultp);
22870Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
22880Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
22890Sstevel@tonic-gate 			if (ent->aio_req_resultp == (aio_result_t *)resultp) {
22900Sstevel@tonic-gate 				if (ent->aio_req_flags & AIO_DONEQ) {
22910Sstevel@tonic-gate 					return (aio_req_remove(ent));
22920Sstevel@tonic-gate 				}
22930Sstevel@tonic-gate 				return (NULL);
22940Sstevel@tonic-gate 			}
22950Sstevel@tonic-gate 		}
22960Sstevel@tonic-gate 		/* no match, resultp is invalid */
22970Sstevel@tonic-gate 		return (NULL);
22980Sstevel@tonic-gate 	}
22990Sstevel@tonic-gate 	return (aio_req_remove(NULL));
23000Sstevel@tonic-gate }
23010Sstevel@tonic-gate 
23020Sstevel@tonic-gate /*
23030Sstevel@tonic-gate  * determine if a user-level resultp pointer is associated with an
23040Sstevel@tonic-gate  * active IO request. Zero is returned when the request is done,
23050Sstevel@tonic-gate  * and the request is removed from the done queue. Only when the
23060Sstevel@tonic-gate  * return value is zero, is the "reqp" pointer valid. One is returned
23070Sstevel@tonic-gate  * when the request is inprogress. Two is returned when the request
23080Sstevel@tonic-gate  * is invalid.
23090Sstevel@tonic-gate  */
23100Sstevel@tonic-gate static int
23110Sstevel@tonic-gate aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
23120Sstevel@tonic-gate {
23130Sstevel@tonic-gate 	aio_req_t **bucket;
23140Sstevel@tonic-gate 	aio_req_t *ent;
23150Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23160Sstevel@tonic-gate 	long index;
23170Sstevel@tonic-gate 
23180Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
23190Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23200Sstevel@tonic-gate 
23210Sstevel@tonic-gate 	index = AIO_HASH(resultp);
23220Sstevel@tonic-gate 	bucket = &aiop->aio_hash[index];
23230Sstevel@tonic-gate 	for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
23240Sstevel@tonic-gate 		if (ent->aio_req_resultp == resultp) {
23250Sstevel@tonic-gate 			if (ent->aio_req_flags & AIO_DONEQ) {
23260Sstevel@tonic-gate 				*reqp = aio_req_remove(ent);
23270Sstevel@tonic-gate 				return (0);
23280Sstevel@tonic-gate 			}
23290Sstevel@tonic-gate 			return (1);
23300Sstevel@tonic-gate 		}
23310Sstevel@tonic-gate 	}
23320Sstevel@tonic-gate 	/* no match, resultp is invalid */
23330Sstevel@tonic-gate 	return (2);
23340Sstevel@tonic-gate }
23350Sstevel@tonic-gate 
23360Sstevel@tonic-gate /*
23370Sstevel@tonic-gate  * remove a request from the done queue.
23380Sstevel@tonic-gate  */
23390Sstevel@tonic-gate static aio_req_t *
23400Sstevel@tonic-gate aio_req_remove(aio_req_t *reqp)
23410Sstevel@tonic-gate {
23420Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23430Sstevel@tonic-gate 
23440Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23450Sstevel@tonic-gate 
23461885Sraf 	if (reqp != NULL) {
23470Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23480Sstevel@tonic-gate 		if (reqp->aio_req_next == reqp) {
23490Sstevel@tonic-gate 			/* only one request on queue */
23500Sstevel@tonic-gate 			if (reqp ==  aiop->aio_doneq) {
23510Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
23520Sstevel@tonic-gate 			} else {
23530Sstevel@tonic-gate 				ASSERT(reqp == aiop->aio_cleanupq);
23540Sstevel@tonic-gate 				aiop->aio_cleanupq = NULL;
23550Sstevel@tonic-gate 			}
23560Sstevel@tonic-gate 		} else {
23570Sstevel@tonic-gate 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23580Sstevel@tonic-gate 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23590Sstevel@tonic-gate 			/*
23600Sstevel@tonic-gate 			 * The request can be either on the aio_doneq or the
23610Sstevel@tonic-gate 			 * aio_cleanupq
23620Sstevel@tonic-gate 			 */
23630Sstevel@tonic-gate 			if (reqp == aiop->aio_doneq)
23640Sstevel@tonic-gate 				aiop->aio_doneq = reqp->aio_req_next;
23650Sstevel@tonic-gate 
23660Sstevel@tonic-gate 			if (reqp == aiop->aio_cleanupq)
23670Sstevel@tonic-gate 				aiop->aio_cleanupq = reqp->aio_req_next;
23680Sstevel@tonic-gate 		}
23690Sstevel@tonic-gate 		reqp->aio_req_flags &= ~AIO_DONEQ;
23701885Sraf 		reqp->aio_req_next = NULL;
23711885Sraf 		reqp->aio_req_prev = NULL;
23721885Sraf 	} else if ((reqp = aiop->aio_doneq) != NULL) {
23731885Sraf 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23741885Sraf 		if (reqp == reqp->aio_req_next) {
23750Sstevel@tonic-gate 			/* only one request on queue */
23760Sstevel@tonic-gate 			aiop->aio_doneq = NULL;
23770Sstevel@tonic-gate 		} else {
23781885Sraf 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23791885Sraf 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23801885Sraf 			aiop->aio_doneq = reqp->aio_req_next;
23810Sstevel@tonic-gate 		}
23821885Sraf 		reqp->aio_req_flags &= ~AIO_DONEQ;
23831885Sraf 		reqp->aio_req_next = NULL;
23841885Sraf 		reqp->aio_req_prev = NULL;
23850Sstevel@tonic-gate 	}
23861885Sraf 	if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
23871885Sraf 		cv_broadcast(&aiop->aio_waitcv);
23881885Sraf 	return (reqp);
23890Sstevel@tonic-gate }
23900Sstevel@tonic-gate 
23910Sstevel@tonic-gate static int
23920Sstevel@tonic-gate aio_req_setup(
23930Sstevel@tonic-gate 	aio_req_t	**reqpp,
23940Sstevel@tonic-gate 	aio_t 		*aiop,
23950Sstevel@tonic-gate 	aiocb_t 	*arg,
23960Sstevel@tonic-gate 	aio_result_t 	*resultp,
23970Sstevel@tonic-gate 	vnode_t		*vp)
23980Sstevel@tonic-gate {
23991885Sraf 	sigqueue_t	*sqp = NULL;
24000Sstevel@tonic-gate 	aio_req_t 	*reqp;
24010Sstevel@tonic-gate 	struct uio 	*uio;
24020Sstevel@tonic-gate 	struct sigevent *sigev;
24030Sstevel@tonic-gate 	int		error;
24040Sstevel@tonic-gate 
24050Sstevel@tonic-gate 	sigev = &arg->aio_sigevent;
24061885Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
24071885Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
24080Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
24090Sstevel@tonic-gate 		if (sqp == NULL)
24100Sstevel@tonic-gate 			return (EAGAIN);
24110Sstevel@tonic-gate 		sqp->sq_func = NULL;
24120Sstevel@tonic-gate 		sqp->sq_next = NULL;
24130Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
24140Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
24150Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
24160Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
24170Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
24180Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
24190Sstevel@tonic-gate 		sqp->sq_info.si_value = sigev->sigev_value;
24201885Sraf 	}
24210Sstevel@tonic-gate 
24220Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
24230Sstevel@tonic-gate 
24240Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
24250Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24260Sstevel@tonic-gate 		if (sqp)
24270Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24280Sstevel@tonic-gate 		return (EIO);
24290Sstevel@tonic-gate 	}
24300Sstevel@tonic-gate 	/*
24310Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
24320Sstevel@tonic-gate 	 * from dynamic memory.
24330Sstevel@tonic-gate 	 */
24340Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
24350Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24360Sstevel@tonic-gate 		if (sqp)
24370Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24380Sstevel@tonic-gate 		return (error);
24390Sstevel@tonic-gate 	}
24400Sstevel@tonic-gate 	aiop->aio_pending++;
24410Sstevel@tonic-gate 	aiop->aio_outstanding++;
24420Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
24431885Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
24441885Sraf 	    sigev->sigev_notify == SIGEV_PORT)
24451885Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
24460Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
24470Sstevel@tonic-gate 	/*
24480Sstevel@tonic-gate 	 * initialize aio request.
24490Sstevel@tonic-gate 	 */
24500Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
24510Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
24520Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
24531885Sraf 	reqp->aio_req_lio = NULL;
24540Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
24550Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
24560Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
24570Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
24580Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
24590Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
24600Sstevel@tonic-gate 	*reqpp = reqp;
24610Sstevel@tonic-gate 	return (0);
24620Sstevel@tonic-gate }
24630Sstevel@tonic-gate 
24640Sstevel@tonic-gate /*
24650Sstevel@tonic-gate  * Allocate p_aio struct.
24660Sstevel@tonic-gate  */
24670Sstevel@tonic-gate static aio_t *
24680Sstevel@tonic-gate aio_aiop_alloc(void)
24690Sstevel@tonic-gate {
24700Sstevel@tonic-gate 	aio_t	*aiop;
24710Sstevel@tonic-gate 
24720Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&curproc->p_lock));
24730Sstevel@tonic-gate 
24740Sstevel@tonic-gate 	aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
24750Sstevel@tonic-gate 	if (aiop) {
24760Sstevel@tonic-gate 		mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
24770Sstevel@tonic-gate 		mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
24780Sstevel@tonic-gate 									NULL);
24790Sstevel@tonic-gate 		mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
24800Sstevel@tonic-gate 	}
24810Sstevel@tonic-gate 	return (aiop);
24820Sstevel@tonic-gate }
24830Sstevel@tonic-gate 
24840Sstevel@tonic-gate /*
24850Sstevel@tonic-gate  * Allocate an aio_req struct.
24860Sstevel@tonic-gate  */
24870Sstevel@tonic-gate static int
24880Sstevel@tonic-gate aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
24890Sstevel@tonic-gate {
24900Sstevel@tonic-gate 	aio_req_t *reqp;
24910Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
24920Sstevel@tonic-gate 
24930Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
24940Sstevel@tonic-gate 
24950Sstevel@tonic-gate 	if ((reqp = aiop->aio_free) != NULL) {
24960Sstevel@tonic-gate 		aiop->aio_free = reqp->aio_req_next;
24971885Sraf 		bzero(reqp, sizeof (*reqp));
24980Sstevel@tonic-gate 	} else {
24990Sstevel@tonic-gate 		/*
25000Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25010Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25020Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25030Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25040Sstevel@tonic-gate 		 */
25050Sstevel@tonic-gate 		if (freemem < desfree)
25060Sstevel@tonic-gate 			return (EAGAIN);
25070Sstevel@tonic-gate 		reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
25080Sstevel@tonic-gate 		if (reqp == NULL)
25090Sstevel@tonic-gate 			return (EAGAIN);
25100Sstevel@tonic-gate 	}
25111885Sraf 	reqp->aio_req.aio_uio = &reqp->aio_req_uio;
25121885Sraf 	reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
25131885Sraf 	reqp->aio_req.aio_private = reqp;
25140Sstevel@tonic-gate 	reqp->aio_req_buf.b_offset = -1;
25150Sstevel@tonic-gate 	reqp->aio_req_resultp = resultp;
25160Sstevel@tonic-gate 	if (aio_hash_insert(reqp, aiop)) {
25170Sstevel@tonic-gate 		reqp->aio_req_next = aiop->aio_free;
25180Sstevel@tonic-gate 		aiop->aio_free = reqp;
25190Sstevel@tonic-gate 		return (EINVAL);
25200Sstevel@tonic-gate 	}
25210Sstevel@tonic-gate 	*nreqp = reqp;
25220Sstevel@tonic-gate 	return (0);
25230Sstevel@tonic-gate }
25240Sstevel@tonic-gate 
25250Sstevel@tonic-gate /*
25260Sstevel@tonic-gate  * Allocate an aio_lio_t struct.
25270Sstevel@tonic-gate  */
25280Sstevel@tonic-gate static int
25290Sstevel@tonic-gate aio_lio_alloc(aio_lio_t **head)
25300Sstevel@tonic-gate {
25310Sstevel@tonic-gate 	aio_lio_t *liop;
25320Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
25330Sstevel@tonic-gate 
25340Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
25350Sstevel@tonic-gate 
25360Sstevel@tonic-gate 	if ((liop = aiop->aio_lio_free) != NULL) {
25370Sstevel@tonic-gate 		aiop->aio_lio_free = liop->lio_next;
25380Sstevel@tonic-gate 	} else {
25390Sstevel@tonic-gate 		/*
25400Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25410Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25420Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25430Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25440Sstevel@tonic-gate 		 */
25450Sstevel@tonic-gate 		if (freemem < desfree)
25460Sstevel@tonic-gate 			return (EAGAIN);
25470Sstevel@tonic-gate 
25480Sstevel@tonic-gate 		liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
25490Sstevel@tonic-gate 		if (liop == NULL)
25500Sstevel@tonic-gate 			return (EAGAIN);
25510Sstevel@tonic-gate 	}
25520Sstevel@tonic-gate 	*head = liop;
25530Sstevel@tonic-gate 	return (0);
25540Sstevel@tonic-gate }
25550Sstevel@tonic-gate 
25560Sstevel@tonic-gate /*
25570Sstevel@tonic-gate  * this is a special per-process thread that is only activated if
25580Sstevel@tonic-gate  * the process is unmapping a segment with outstanding aio. normally,
25590Sstevel@tonic-gate  * the process will have completed the aio before unmapping the
25600Sstevel@tonic-gate  * segment. If the process does unmap a segment with outstanding aio,
25610Sstevel@tonic-gate  * this special thread will guarentee that the locked pages due to
25620Sstevel@tonic-gate  * aphysio() are released, thereby permitting the segment to be
2563304Spraks  * unmapped. In addition to this, the cleanup thread is woken up
2564304Spraks  * during DR operations to release the locked pages.
25650Sstevel@tonic-gate  */
25660Sstevel@tonic-gate 
25670Sstevel@tonic-gate static int
25680Sstevel@tonic-gate aio_cleanup_thread(aio_t *aiop)
25690Sstevel@tonic-gate {
25700Sstevel@tonic-gate 	proc_t *p = curproc;
25710Sstevel@tonic-gate 	struct as *as = p->p_as;
25720Sstevel@tonic-gate 	int poked = 0;
25730Sstevel@tonic-gate 	kcondvar_t *cvp;
25740Sstevel@tonic-gate 	int exit_flag = 0;
2575304Spraks 	int rqclnup = 0;
25760Sstevel@tonic-gate 
25770Sstevel@tonic-gate 	sigfillset(&curthread->t_hold);
25780Sstevel@tonic-gate 	sigdiffset(&curthread->t_hold, &cantmask);
25790Sstevel@tonic-gate 	for (;;) {
25800Sstevel@tonic-gate 		/*
25810Sstevel@tonic-gate 		 * if a segment is being unmapped, and the current
25820Sstevel@tonic-gate 		 * process's done queue is not empty, then every request
25830Sstevel@tonic-gate 		 * on the doneq with locked resources should be forced
25840Sstevel@tonic-gate 		 * to release their locks. By moving the doneq request
25850Sstevel@tonic-gate 		 * to the cleanupq, aio_cleanup() will process the cleanupq,
25860Sstevel@tonic-gate 		 * and place requests back onto the doneq. All requests
25870Sstevel@tonic-gate 		 * processed by aio_cleanup() will have their physical
25880Sstevel@tonic-gate 		 * resources unlocked.
25890Sstevel@tonic-gate 		 */
25900Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
25910Sstevel@tonic-gate 		if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
25920Sstevel@tonic-gate 			aiop->aio_flags |= AIO_CLEANUP;
25930Sstevel@tonic-gate 			mutex_enter(&as->a_contents);
2594304Spraks 			if (aiop->aio_rqclnup) {
2595304Spraks 				aiop->aio_rqclnup = 0;
2596304Spraks 				rqclnup = 1;
2597304Spraks 			}
2598304Spraks 
2599304Spraks 			if ((rqclnup || AS_ISUNMAPWAIT(as)) &&
26001885Sraf 			    aiop->aio_doneq) {
26010Sstevel@tonic-gate 				aio_req_t *doneqhead = aiop->aio_doneq;
26020Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26030Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
26040Sstevel@tonic-gate 				aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
26050Sstevel@tonic-gate 			} else {
26060Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26070Sstevel@tonic-gate 			}
26080Sstevel@tonic-gate 		}
26090Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26100Sstevel@tonic-gate 		aio_cleanup(AIO_CLEANUP_THREAD);
26110Sstevel@tonic-gate 		/*
26120Sstevel@tonic-gate 		 * thread should block on the cleanupcv while
26130Sstevel@tonic-gate 		 * AIO_CLEANUP is set.
26140Sstevel@tonic-gate 		 */
26150Sstevel@tonic-gate 		cvp = &aiop->aio_cleanupcv;
26160Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
26170Sstevel@tonic-gate 
26180Sstevel@tonic-gate 		if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
26190Sstevel@tonic-gate 		    aiop->aio_notifyq != NULL ||
26200Sstevel@tonic-gate 		    aiop->aio_portcleanupq != NULL) {
26210Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
26220Sstevel@tonic-gate 			continue;
26230Sstevel@tonic-gate 		}
26240Sstevel@tonic-gate 		mutex_enter(&as->a_contents);
26250Sstevel@tonic-gate 
26260Sstevel@tonic-gate 		/*
26270Sstevel@tonic-gate 		 * AIO_CLEANUP determines when the cleanup thread
2628304Spraks 		 * should be active. This flag is set when
2629304Spraks 		 * the cleanup thread is awakened by as_unmap() or
2630304Spraks 		 * due to DR operations.
26310Sstevel@tonic-gate 		 * The flag is cleared when the blocking as_unmap()
26320Sstevel@tonic-gate 		 * that originally awakened us is allowed to
26330Sstevel@tonic-gate 		 * complete. as_unmap() blocks when trying to
26340Sstevel@tonic-gate 		 * unmap a segment that has SOFTLOCKed pages. when
26350Sstevel@tonic-gate 		 * the segment's pages are all SOFTUNLOCKed,
2636304Spraks 		 * as->a_flags & AS_UNMAPWAIT should be zero.
2637304Spraks 		 *
2638304Spraks 		 * In case of cleanup request by DR, the flag is cleared
2639304Spraks 		 * once all the pending aio requests have been processed.
2640304Spraks 		 *
2641304Spraks 		 * The flag shouldn't be cleared right away if the
2642304Spraks 		 * cleanup thread was interrupted because the process
2643304Spraks 		 * is doing forkall(). This happens when cv_wait_sig()
2644304Spraks 		 * returns zero, because it was awakened by a pokelwps().
2645304Spraks 		 * If the process is not exiting, it must be doing forkall().
26460Sstevel@tonic-gate 		 */
26470Sstevel@tonic-gate 		if ((poked == 0) &&
2648304Spraks 			((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2649304Spraks 					(aiop->aio_pending == 0))) {
26500Sstevel@tonic-gate 			aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
26510Sstevel@tonic-gate 			cvp = &as->a_cv;
2652304Spraks 			rqclnup = 0;
26530Sstevel@tonic-gate 		}
26540Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26550Sstevel@tonic-gate 		if (poked) {
26560Sstevel@tonic-gate 			/*
26570Sstevel@tonic-gate 			 * If the process is exiting/killed, don't return
26580Sstevel@tonic-gate 			 * immediately without waiting for pending I/O's
26590Sstevel@tonic-gate 			 * and releasing the page locks.
26600Sstevel@tonic-gate 			 */
26610Sstevel@tonic-gate 			if (p->p_flag & (SEXITLWPS|SKILLED)) {
26620Sstevel@tonic-gate 				/*
26630Sstevel@tonic-gate 				 * If exit_flag is set, then it is
26640Sstevel@tonic-gate 				 * safe to exit because we have released
26650Sstevel@tonic-gate 				 * page locks of completed I/O's.
26660Sstevel@tonic-gate 				 */
26670Sstevel@tonic-gate 				if (exit_flag)
26680Sstevel@tonic-gate 					break;
26690Sstevel@tonic-gate 
26700Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26710Sstevel@tonic-gate 
26720Sstevel@tonic-gate 				/*
26730Sstevel@tonic-gate 				 * Wait for all the pending aio to complete.
26740Sstevel@tonic-gate 				 */
26750Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
26760Sstevel@tonic-gate 				aiop->aio_flags |= AIO_REQ_BLOCK;
26770Sstevel@tonic-gate 				while (aiop->aio_pending != 0)
26780Sstevel@tonic-gate 					cv_wait(&aiop->aio_cleanupcv,
26790Sstevel@tonic-gate 						&aiop->aio_mutex);
26800Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
26810Sstevel@tonic-gate 				exit_flag = 1;
26820Sstevel@tonic-gate 				continue;
26830Sstevel@tonic-gate 			} else if (p->p_flag &
26840Sstevel@tonic-gate 			    (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
26850Sstevel@tonic-gate 				/*
26860Sstevel@tonic-gate 				 * hold LWP until it
26870Sstevel@tonic-gate 				 * is continued.
26880Sstevel@tonic-gate 				 */
26890Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26900Sstevel@tonic-gate 				mutex_enter(&p->p_lock);
26910Sstevel@tonic-gate 				stop(PR_SUSPENDED, SUSPEND_NORMAL);
26920Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
26930Sstevel@tonic-gate 				poked = 0;
26940Sstevel@tonic-gate 				continue;
26950Sstevel@tonic-gate 			}
26960Sstevel@tonic-gate 		} else {
26970Sstevel@tonic-gate 			/*
26980Sstevel@tonic-gate 			 * When started this thread will sleep on as->a_cv.
26990Sstevel@tonic-gate 			 * as_unmap will awake this thread if the
27000Sstevel@tonic-gate 			 * segment has SOFTLOCKed pages (poked = 0).
27010Sstevel@tonic-gate 			 * 1. pokelwps() awakes this thread =>
27020Sstevel@tonic-gate 			 *    break the loop to check SEXITLWPS, SHOLDFORK, etc
27030Sstevel@tonic-gate 			 * 2. as_unmap awakes this thread =>
27040Sstevel@tonic-gate 			 *    to break the loop it is necessary that
27050Sstevel@tonic-gate 			 *    - AS_UNMAPWAIT is set (as_unmap is waiting for
27060Sstevel@tonic-gate 			 *	memory to be unlocked)
27070Sstevel@tonic-gate 			 *    - AIO_CLEANUP is not set
27080Sstevel@tonic-gate 			 *	(if AIO_CLEANUP is set we have to wait for
27090Sstevel@tonic-gate 			 *	pending requests. aio_done will send a signal
27100Sstevel@tonic-gate 			 *	for every request which completes to continue
27110Sstevel@tonic-gate 			 *	unmapping the corresponding address range)
2712304Spraks 			 * 3. A cleanup request will wake this thread up, ex.
2713304Spraks 			 *    by the DR operations. The aio_rqclnup flag will
2714304Spraks 			 *    be set.
27150Sstevel@tonic-gate 			 */
27160Sstevel@tonic-gate 			while (poked == 0) {
2717304Spraks 				/*
2718304Spraks 				 * we need to handle cleanup requests
2719304Spraks 				 * that come in after we had just cleaned up,
2720304Spraks 				 * so that we do cleanup of any new aio
2721304Spraks 				 * requests that got completed and have
2722304Spraks 				 * locked resources.
2723304Spraks 				 */
2724304Spraks 				if ((aiop->aio_rqclnup ||
2725304Spraks 					(AS_ISUNMAPWAIT(as) != 0)) &&
2726304Spraks 					(aiop->aio_flags & AIO_CLEANUP) == 0)
27270Sstevel@tonic-gate 					break;
27280Sstevel@tonic-gate 				poked = !cv_wait_sig(cvp, &as->a_contents);
27290Sstevel@tonic-gate 				if (AS_ISUNMAPWAIT(as) == 0)
27300Sstevel@tonic-gate 					cv_signal(cvp);
27310Sstevel@tonic-gate 				if (aiop->aio_outstanding != 0)
27320Sstevel@tonic-gate 					break;
27330Sstevel@tonic-gate 			}
27340Sstevel@tonic-gate 		}
27350Sstevel@tonic-gate 		mutex_exit(&as->a_contents);
27360Sstevel@tonic-gate 	}
27370Sstevel@tonic-gate exit:
27380Sstevel@tonic-gate 	mutex_exit(&as->a_contents);
27390Sstevel@tonic-gate 	ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
27400Sstevel@tonic-gate 	aston(curthread);	/* make thread do post_syscall */
27410Sstevel@tonic-gate 	return (0);
27420Sstevel@tonic-gate }
27430Sstevel@tonic-gate 
27440Sstevel@tonic-gate /*
27450Sstevel@tonic-gate  * save a reference to a user's outstanding aio in a hash list.
27460Sstevel@tonic-gate  */
27470Sstevel@tonic-gate static int
27480Sstevel@tonic-gate aio_hash_insert(
27490Sstevel@tonic-gate 	aio_req_t *aio_reqp,
27500Sstevel@tonic-gate 	aio_t *aiop)
27510Sstevel@tonic-gate {
27520Sstevel@tonic-gate 	long index;
27530Sstevel@tonic-gate 	aio_result_t *resultp = aio_reqp->aio_req_resultp;
27540Sstevel@tonic-gate 	aio_req_t *current;
27550Sstevel@tonic-gate 	aio_req_t **nextp;
27560Sstevel@tonic-gate 
27570Sstevel@tonic-gate 	index = AIO_HASH(resultp);
27580Sstevel@tonic-gate 	nextp = &aiop->aio_hash[index];
27590Sstevel@tonic-gate 	while ((current = *nextp) != NULL) {
27600Sstevel@tonic-gate 		if (current->aio_req_resultp == resultp)
27610Sstevel@tonic-gate 			return (DUPLICATE);
27620Sstevel@tonic-gate 		nextp = &current->aio_hash_next;
27630Sstevel@tonic-gate 	}
27640Sstevel@tonic-gate 	*nextp = aio_reqp;
27650Sstevel@tonic-gate 	aio_reqp->aio_hash_next = NULL;
27660Sstevel@tonic-gate 	return (0);
27670Sstevel@tonic-gate }
27680Sstevel@tonic-gate 
27690Sstevel@tonic-gate static int
27700Sstevel@tonic-gate (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
27710Sstevel@tonic-gate     cred_t *)
27720Sstevel@tonic-gate {
27730Sstevel@tonic-gate 	struct snode *sp;
27740Sstevel@tonic-gate 	dev_t		dev;
27750Sstevel@tonic-gate 	struct cb_ops  	*cb;
27760Sstevel@tonic-gate 	major_t		major;
27770Sstevel@tonic-gate 	int		(*aio_func)();
27780Sstevel@tonic-gate 
27790Sstevel@tonic-gate 	dev = vp->v_rdev;
27800Sstevel@tonic-gate 	major = getmajor(dev);
27810Sstevel@tonic-gate 
27820Sstevel@tonic-gate 	/*
27830Sstevel@tonic-gate 	 * return NULL for requests to files and STREAMs so
27840Sstevel@tonic-gate 	 * that libaio takes care of them.
27850Sstevel@tonic-gate 	 */
27860Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
27870Sstevel@tonic-gate 		/* no stream device for kaio */
27880Sstevel@tonic-gate 		if (STREAMSTAB(major)) {
27890Sstevel@tonic-gate 			return (NULL);
27900Sstevel@tonic-gate 		}
27910Sstevel@tonic-gate 	} else {
27920Sstevel@tonic-gate 		return (NULL);
27930Sstevel@tonic-gate 	}
27940Sstevel@tonic-gate 
27950Sstevel@tonic-gate 	/*
27960Sstevel@tonic-gate 	 * Check old drivers which do not have async I/O entry points.
27970Sstevel@tonic-gate 	 */
27980Sstevel@tonic-gate 	if (devopsp[major]->devo_rev < 3)
27990Sstevel@tonic-gate 		return (NULL);
28000Sstevel@tonic-gate 
28010Sstevel@tonic-gate 	cb = devopsp[major]->devo_cb_ops;
28020Sstevel@tonic-gate 
28030Sstevel@tonic-gate 	if (cb->cb_rev < 1)
28040Sstevel@tonic-gate 		return (NULL);
28050Sstevel@tonic-gate 
28060Sstevel@tonic-gate 	/*
28070Sstevel@tonic-gate 	 * Check whether this device is a block device.
28080Sstevel@tonic-gate 	 * Kaio is not supported for devices like tty.
28090Sstevel@tonic-gate 	 */
28100Sstevel@tonic-gate 	if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
28110Sstevel@tonic-gate 		return (NULL);
28120Sstevel@tonic-gate 
28130Sstevel@tonic-gate 	/*
28140Sstevel@tonic-gate 	 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
28150Sstevel@tonic-gate 	 * We cannot call the driver directly. Instead return the
28160Sstevel@tonic-gate 	 * PXFS functions.
28170Sstevel@tonic-gate 	 */
28180Sstevel@tonic-gate 
28190Sstevel@tonic-gate 	if (IS_PXFSVP(vp)) {
28200Sstevel@tonic-gate 		if (mode & FREAD)
28210Sstevel@tonic-gate 			return (clpxfs_aio_read);
28220Sstevel@tonic-gate 		else
28230Sstevel@tonic-gate 			return (clpxfs_aio_write);
28240Sstevel@tonic-gate 	}
28250Sstevel@tonic-gate 	if (mode & FREAD)
28260Sstevel@tonic-gate 		aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
28270Sstevel@tonic-gate 	else
28280Sstevel@tonic-gate 		aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
28290Sstevel@tonic-gate 
28300Sstevel@tonic-gate 	/*
28310Sstevel@tonic-gate 	 * Do we need this ?
28320Sstevel@tonic-gate 	 * nodev returns ENXIO anyway.
28330Sstevel@tonic-gate 	 */
28340Sstevel@tonic-gate 	if (aio_func == nodev)
28350Sstevel@tonic-gate 		return (NULL);
28360Sstevel@tonic-gate 
28370Sstevel@tonic-gate 	sp = VTOS(vp);
28380Sstevel@tonic-gate 	smark(sp, SACC);
28390Sstevel@tonic-gate 	return (aio_func);
28400Sstevel@tonic-gate }
28410Sstevel@tonic-gate 
28420Sstevel@tonic-gate /*
28430Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28440Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28450Sstevel@tonic-gate  * We define this intermediate function that will do the right
28460Sstevel@tonic-gate  * thing for driver cases.
28470Sstevel@tonic-gate  */
28480Sstevel@tonic-gate 
28490Sstevel@tonic-gate static int
28500Sstevel@tonic-gate driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28510Sstevel@tonic-gate {
28520Sstevel@tonic-gate 	dev_t dev;
28530Sstevel@tonic-gate 	struct cb_ops  	*cb;
28540Sstevel@tonic-gate 
28550Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28560Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28570Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28580Sstevel@tonic-gate 	ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
28590Sstevel@tonic-gate 
28600Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28610Sstevel@tonic-gate 
28620Sstevel@tonic-gate 	ASSERT(cb->cb_awrite != nodev);
28630Sstevel@tonic-gate 	return ((*cb->cb_awrite)(dev, aio, cred_p));
28640Sstevel@tonic-gate }
28650Sstevel@tonic-gate 
28660Sstevel@tonic-gate /*
28670Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28680Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28690Sstevel@tonic-gate  * We define this intermediate function that will do the right
28700Sstevel@tonic-gate  * thing for driver cases.
28710Sstevel@tonic-gate  */
28720Sstevel@tonic-gate 
28730Sstevel@tonic-gate static int
28740Sstevel@tonic-gate driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28750Sstevel@tonic-gate {
28760Sstevel@tonic-gate 	dev_t dev;
28770Sstevel@tonic-gate 	struct cb_ops  	*cb;
28780Sstevel@tonic-gate 
28790Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28800Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28810Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28820Sstevel@tonic-gate 	ASSERT(!STREAMSTAB(getmajor(dev)));
28830Sstevel@tonic-gate 
28840Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28850Sstevel@tonic-gate 
28860Sstevel@tonic-gate 	ASSERT(cb->cb_aread != nodev);
28870Sstevel@tonic-gate 	return ((*cb->cb_aread)(dev, aio, cred_p));
28880Sstevel@tonic-gate }
28890Sstevel@tonic-gate 
28900Sstevel@tonic-gate /*
28910Sstevel@tonic-gate  * This routine is called when a largefile call is made by a 32bit
28920Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel. All 64bit processes are large
28930Sstevel@tonic-gate  * file by definition and will call alio() instead.
28940Sstevel@tonic-gate  */
28950Sstevel@tonic-gate static int
28960Sstevel@tonic-gate alioLF(
28970Sstevel@tonic-gate 	int		mode_arg,
28980Sstevel@tonic-gate 	void		*aiocb_arg,
28990Sstevel@tonic-gate 	int		nent,
29000Sstevel@tonic-gate 	void		*sigev)
29010Sstevel@tonic-gate {
29020Sstevel@tonic-gate 	file_t		*fp;
29030Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
29040Sstevel@tonic-gate 	int		prev_mode = -1;
29050Sstevel@tonic-gate 	struct vnode	*vp;
29060Sstevel@tonic-gate 	aio_lio_t	*head;
29070Sstevel@tonic-gate 	aio_req_t	*reqp;
29080Sstevel@tonic-gate 	aio_t		*aiop;
29090Sstevel@tonic-gate 	caddr_t		cbplist;
29101885Sraf 	aiocb64_32_t	cb64;
29111885Sraf 	aiocb64_32_t	*aiocb = &cb64;
29120Sstevel@tonic-gate 	aiocb64_32_t	*cbp;
29130Sstevel@tonic-gate 	caddr32_t	*ucbp;
29140Sstevel@tonic-gate #ifdef _LP64
29150Sstevel@tonic-gate 	aiocb_t		aiocb_n;
29160Sstevel@tonic-gate #endif
29170Sstevel@tonic-gate 	struct sigevent32	sigevk;
29180Sstevel@tonic-gate 	sigqueue_t	*sqp;
29190Sstevel@tonic-gate 	int		(*aio_func)();
29200Sstevel@tonic-gate 	int		mode;
29211885Sraf 	int		error = 0;
29221885Sraf 	int		aio_errors = 0;
29230Sstevel@tonic-gate 	int		i;
29240Sstevel@tonic-gate 	size_t		ssize;
29250Sstevel@tonic-gate 	int		deadhead = 0;
29260Sstevel@tonic-gate 	int		aio_notsupported = 0;
29271885Sraf 	int		lio_head_port;
29281885Sraf 	int		aio_port;
29291885Sraf 	int		aio_thread;
29300Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
29310Sstevel@tonic-gate 	port_notify32_t	pnotify;
29321885Sraf 	int		event;
29330Sstevel@tonic-gate 
29340Sstevel@tonic-gate 	aiop = curproc->p_aio;
29350Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
29360Sstevel@tonic-gate 		return (EINVAL);
29370Sstevel@tonic-gate 
29380Sstevel@tonic-gate 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
29390Sstevel@tonic-gate 
29400Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
29410Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
29420Sstevel@tonic-gate 	ucbp = (caddr32_t *)cbplist;
29430Sstevel@tonic-gate 
29441885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
29451885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
29460Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
29470Sstevel@tonic-gate 		return (EFAULT);
29480Sstevel@tonic-gate 	}
29490Sstevel@tonic-gate 
29501885Sraf 	/* Event Ports  */
29511885Sraf 	if (sigev &&
29521885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
29531885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
29541885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
29551885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
29561885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
29571885Sraf 		} else if (copyin(
29581885Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
29591885Sraf 		    &pnotify, sizeof (pnotify))) {
29600Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
29610Sstevel@tonic-gate 			return (EFAULT);
29620Sstevel@tonic-gate 		}
29631885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
29641885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
29651885Sraf 		if (error) {
29661885Sraf 			if (error == ENOMEM || error == EAGAIN)
29671885Sraf 				error = EAGAIN;
29681885Sraf 			else
29691885Sraf 				error = EINVAL;
29701885Sraf 			kmem_free(cbplist, ssize);
29711885Sraf 			return (error);
29721885Sraf 		}
29731885Sraf 		lio_head_port = pnotify.portnfy_port;
29740Sstevel@tonic-gate 	}
29750Sstevel@tonic-gate 
29760Sstevel@tonic-gate 	/*
29770Sstevel@tonic-gate 	 * a list head should be allocated if notification is
29780Sstevel@tonic-gate 	 * enabled for this list.
29790Sstevel@tonic-gate 	 */
29800Sstevel@tonic-gate 	head = NULL;
29810Sstevel@tonic-gate 
29821885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
29830Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
29840Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
29850Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
29860Sstevel@tonic-gate 		if (error)
29870Sstevel@tonic-gate 			goto done;
29880Sstevel@tonic-gate 		deadhead = 1;
29890Sstevel@tonic-gate 		head->lio_nent = nent;
29900Sstevel@tonic-gate 		head->lio_refcnt = nent;
29911885Sraf 		head->lio_port = -1;
29921885Sraf 		head->lio_portkev = NULL;
29931885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
29941885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
29950Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
29960Sstevel@tonic-gate 			if (sqp == NULL) {
29970Sstevel@tonic-gate 				error = EAGAIN;
29980Sstevel@tonic-gate 				goto done;
29990Sstevel@tonic-gate 			}
30000Sstevel@tonic-gate 			sqp->sq_func = NULL;
30010Sstevel@tonic-gate 			sqp->sq_next = NULL;
30020Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
30030Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
30040Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
30050Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
30060Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
30070Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
30080Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
30090Sstevel@tonic-gate 			    sigevk.sigev_value.sival_int;
30100Sstevel@tonic-gate 			head->lio_sigqp = sqp;
30110Sstevel@tonic-gate 		} else {
30120Sstevel@tonic-gate 			head->lio_sigqp = NULL;
30130Sstevel@tonic-gate 		}
30141885Sraf 		if (pkevtp) {
30151885Sraf 			/*
30161885Sraf 			 * Prepare data to send when list of aiocb's
30171885Sraf 			 * has completed.
30181885Sraf 			 */
30191885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
30201885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
30211885Sraf 			    NULL, head);
30221885Sraf 			pkevtp->portkev_events = AIOLIO64;
30231885Sraf 			head->lio_portkev = pkevtp;
30241885Sraf 			head->lio_port = pnotify.portnfy_port;
30251885Sraf 		}
30260Sstevel@tonic-gate 	}
30270Sstevel@tonic-gate 
30280Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
30290Sstevel@tonic-gate 
30300Sstevel@tonic-gate 		cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
30310Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
30321885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
30330Sstevel@tonic-gate 			if (head) {
30340Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30350Sstevel@tonic-gate 				head->lio_nent--;
30360Sstevel@tonic-gate 				head->lio_refcnt--;
30370Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30380Sstevel@tonic-gate 			}
30390Sstevel@tonic-gate 			continue;
30400Sstevel@tonic-gate 		}
30410Sstevel@tonic-gate 
30420Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
30430Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
30440Sstevel@tonic-gate 		if (mode == LIO_NOP) {
30450Sstevel@tonic-gate 			cbp = NULL;
30460Sstevel@tonic-gate 			if (head) {
30470Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30480Sstevel@tonic-gate 				head->lio_nent--;
30490Sstevel@tonic-gate 				head->lio_refcnt--;
30500Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30510Sstevel@tonic-gate 			}
30520Sstevel@tonic-gate 			continue;
30530Sstevel@tonic-gate 		}
30540Sstevel@tonic-gate 
30550Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
30560Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
30570Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30580Sstevel@tonic-gate 			if (head) {
30590Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30600Sstevel@tonic-gate 				head->lio_nent--;
30610Sstevel@tonic-gate 				head->lio_refcnt--;
30620Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30630Sstevel@tonic-gate 			}
30640Sstevel@tonic-gate 			aio_errors++;
30650Sstevel@tonic-gate 			continue;
30660Sstevel@tonic-gate 		}
30670Sstevel@tonic-gate 
30680Sstevel@tonic-gate 		/*
30690Sstevel@tonic-gate 		 * check the permission of the partition
30700Sstevel@tonic-gate 		 */
30710Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
30720Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
30730Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30740Sstevel@tonic-gate 			if (head) {
30750Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30760Sstevel@tonic-gate 				head->lio_nent--;
30770Sstevel@tonic-gate 				head->lio_refcnt--;
30780Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30790Sstevel@tonic-gate 			}
30800Sstevel@tonic-gate 			aio_errors++;
30810Sstevel@tonic-gate 			continue;
30820Sstevel@tonic-gate 		}
30830Sstevel@tonic-gate 
30840Sstevel@tonic-gate 		/*
30850Sstevel@tonic-gate 		 * common case where requests are to the same fd
30860Sstevel@tonic-gate 		 * for the same r/w operation
30870Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
30880Sstevel@tonic-gate 		 */
30891885Sraf 		vp = fp->f_vnode;
30901885Sraf 		if (fp != prev_fp || mode != prev_mode) {
30910Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
30920Sstevel@tonic-gate 			if (aio_func == NULL) {
30930Sstevel@tonic-gate 				prev_fp = NULL;
30940Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
30950Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
30960Sstevel@tonic-gate 				aio_notsupported++;
30970Sstevel@tonic-gate 				if (head) {
30980Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
30990Sstevel@tonic-gate 					head->lio_nent--;
31000Sstevel@tonic-gate 					head->lio_refcnt--;
31010Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
31020Sstevel@tonic-gate 				}
31030Sstevel@tonic-gate 				continue;
31040Sstevel@tonic-gate 			} else {
31050Sstevel@tonic-gate 				prev_fp = fp;
31060Sstevel@tonic-gate 				prev_mode = mode;
31070Sstevel@tonic-gate 			}
31080Sstevel@tonic-gate 		}
31091885Sraf 
31100Sstevel@tonic-gate #ifdef	_LP64
31110Sstevel@tonic-gate 		aiocb_LFton(aiocb, &aiocb_n);
31120Sstevel@tonic-gate 		error = aio_req_setup(&reqp, aiop, &aiocb_n,
31131885Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
31140Sstevel@tonic-gate #else
31150Sstevel@tonic-gate 		error = aio_req_setupLF(&reqp, aiop, aiocb,
31161885Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
31170Sstevel@tonic-gate #endif  /* _LP64 */
31180Sstevel@tonic-gate 		if (error) {
31190Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
31201885Sraf 			lio_set_uerror(&cbp->aio_resultp, error);
31210Sstevel@tonic-gate 			if (head) {
31220Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
31230Sstevel@tonic-gate 				head->lio_nent--;
31240Sstevel@tonic-gate 				head->lio_refcnt--;
31250Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
31260Sstevel@tonic-gate 			}
31270Sstevel@tonic-gate 			aio_errors++;
31280Sstevel@tonic-gate 			continue;
31290Sstevel@tonic-gate 		}
31300Sstevel@tonic-gate 
31310Sstevel@tonic-gate 		reqp->aio_req_lio = head;
31320Sstevel@tonic-gate 		deadhead = 0;
31330Sstevel@tonic-gate 
31340Sstevel@tonic-gate 		/*
31350Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
31360Sstevel@tonic-gate 		 * the driver to avoid a race condition
31370Sstevel@tonic-gate 		 */
31380Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
31390Sstevel@tonic-gate 		    EINPROGRESS);
31400Sstevel@tonic-gate 
31410Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = *ucbp;
31420Sstevel@tonic-gate 
31431885Sraf 		event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
31441885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
31451885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
31461885Sraf 		if (aio_port | aio_thread) {
31471885Sraf 			port_kevent_t *lpkevp;
31481885Sraf 			/*
31491885Sraf 			 * Prepare data to send with each aiocb completed.
31501885Sraf 			 */
31511885Sraf 			if (aio_port) {
31521885Sraf 				void *paddr = (void *)(uintptr_t)
31531885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
31541885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
31551885Sraf 					error = EFAULT;
31561885Sraf 			} else {	/* aio_thread */
31571885Sraf 				pnotify.portnfy_port =
31581885Sraf 				    aiocb->aio_sigevent.sigev_signo;
31591885Sraf 				pnotify.portnfy_user =
31601885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
31611885Sraf 			}
31621885Sraf 			if (error)
31631885Sraf 				/* EMPTY */;
31641885Sraf 			else if (pkevtp != NULL &&
31651885Sraf 			    pnotify.portnfy_port == lio_head_port)
31661885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
31671885Sraf 				    PORT_ALLOC_DEFAULT);
31681885Sraf 			else
31691885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
31701885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
31711885Sraf 				    &lpkevp);
31721885Sraf 			if (error == 0) {
31731885Sraf 				port_init_event(lpkevp, (uintptr_t)*ucbp,
31741885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
31751885Sraf 				    aio_port_callback, reqp);
31761885Sraf 				lpkevp->portkev_events = event;
31771885Sraf 				reqp->aio_req_portkev = lpkevp;
31781885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
31791885Sraf 			}
31800Sstevel@tonic-gate 		}
31810Sstevel@tonic-gate 
31820Sstevel@tonic-gate 		/*
31830Sstevel@tonic-gate 		 * send the request to driver.
31840Sstevel@tonic-gate 		 */
31850Sstevel@tonic-gate 		if (error == 0) {
31860Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
31870Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
31880Sstevel@tonic-gate 				aio_zerolen(reqp);
31890Sstevel@tonic-gate 				continue;
31900Sstevel@tonic-gate 			}
31910Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
31920Sstevel@tonic-gate 			    CRED());
31930Sstevel@tonic-gate 		}
31940Sstevel@tonic-gate 
31950Sstevel@tonic-gate 		/*
31960Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
31970Sstevel@tonic-gate 		 * completed unless there was an error.
31980Sstevel@tonic-gate 		 */
31990Sstevel@tonic-gate 		if (error) {
32000Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
32010Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
32020Sstevel@tonic-gate 			if (head) {
32030Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
32040Sstevel@tonic-gate 				head->lio_nent--;
32050Sstevel@tonic-gate 				head->lio_refcnt--;
32060Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32070Sstevel@tonic-gate 			}
32080Sstevel@tonic-gate 			if (error == ENOTSUP)
32090Sstevel@tonic-gate 				aio_notsupported++;
32100Sstevel@tonic-gate 			else
32110Sstevel@tonic-gate 				aio_errors++;
32120Sstevel@tonic-gate 			lio_set_error(reqp);
32130Sstevel@tonic-gate 		} else {
32140Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
32150Sstevel@tonic-gate 		}
32160Sstevel@tonic-gate 	}
32170Sstevel@tonic-gate 
32180Sstevel@tonic-gate 	if (aio_notsupported) {
32190Sstevel@tonic-gate 		error = ENOTSUP;
32200Sstevel@tonic-gate 	} else if (aio_errors) {
32210Sstevel@tonic-gate 		/*
32220Sstevel@tonic-gate 		 * return EIO if any request failed
32230Sstevel@tonic-gate 		 */
32240Sstevel@tonic-gate 		error = EIO;
32250Sstevel@tonic-gate 	}
32260Sstevel@tonic-gate 
32270Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
32280Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
32290Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
32300Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
32310Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32320Sstevel@tonic-gate 				error = EINTR;
32330Sstevel@tonic-gate 				goto done;
32340Sstevel@tonic-gate 			}
32350Sstevel@tonic-gate 		}
32360Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
32370Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
32380Sstevel@tonic-gate 	}
32390Sstevel@tonic-gate 
32400Sstevel@tonic-gate done:
32410Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
32420Sstevel@tonic-gate 	if (deadhead) {
32430Sstevel@tonic-gate 		if (head->lio_sigqp)
32440Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
32451885Sraf 		if (head->lio_portkev)
32461885Sraf 			port_free_event(head->lio_portkev);
32470Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
32480Sstevel@tonic-gate 	}
32490Sstevel@tonic-gate 	return (error);
32500Sstevel@tonic-gate }
32510Sstevel@tonic-gate 
32520Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
32530Sstevel@tonic-gate static void
32540Sstevel@tonic-gate aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
32550Sstevel@tonic-gate {
32560Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
32570Sstevel@tonic-gate 	dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
32580Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
32590Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
32600Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
32610Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
32620Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
32630Sstevel@tonic-gate 
32640Sstevel@tonic-gate 	/*
32650Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
32660Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
32670Sstevel@tonic-gate 	 */
32680Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
32690Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
32700Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
32710Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
32720Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
32730Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
32740Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
32750Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
32760Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
32770Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
32780Sstevel@tonic-gate }
32790Sstevel@tonic-gate #endif
32800Sstevel@tonic-gate 
32810Sstevel@tonic-gate /*
32820Sstevel@tonic-gate  * This function is used only for largefile calls made by
32831885Sraf  * 32 bit applications.
32840Sstevel@tonic-gate  */
32850Sstevel@tonic-gate static int
32860Sstevel@tonic-gate aio_req_setupLF(
32870Sstevel@tonic-gate 	aio_req_t	**reqpp,
32880Sstevel@tonic-gate 	aio_t		*aiop,
32890Sstevel@tonic-gate 	aiocb64_32_t	*arg,
32900Sstevel@tonic-gate 	aio_result_t	*resultp,
32910Sstevel@tonic-gate 	vnode_t		*vp)
32920Sstevel@tonic-gate {
32931885Sraf 	sigqueue_t	*sqp = NULL;
32940Sstevel@tonic-gate 	aio_req_t	*reqp;
32951885Sraf 	struct uio	*uio;
32961885Sraf 	struct sigevent32 *sigev;
32970Sstevel@tonic-gate 	int 		error;
32980Sstevel@tonic-gate 
32991885Sraf 	sigev = &arg->aio_sigevent;
33001885Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
33011885Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
33020Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
33030Sstevel@tonic-gate 		if (sqp == NULL)
33040Sstevel@tonic-gate 			return (EAGAIN);
33050Sstevel@tonic-gate 		sqp->sq_func = NULL;
33060Sstevel@tonic-gate 		sqp->sq_next = NULL;
33070Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
33080Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
33090Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
33100Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
33110Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
33120Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
33131885Sraf 		sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
33141885Sraf 	}
33150Sstevel@tonic-gate 
33160Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
33170Sstevel@tonic-gate 
33180Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
33190Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33200Sstevel@tonic-gate 		if (sqp)
33210Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33220Sstevel@tonic-gate 		return (EIO);
33230Sstevel@tonic-gate 	}
33240Sstevel@tonic-gate 	/*
33250Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
33260Sstevel@tonic-gate 	 * from dynamic memory.
33270Sstevel@tonic-gate 	 */
33280Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
33290Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33300Sstevel@tonic-gate 		if (sqp)
33310Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33320Sstevel@tonic-gate 		return (error);
33330Sstevel@tonic-gate 	}
33340Sstevel@tonic-gate 	aiop->aio_pending++;
33350Sstevel@tonic-gate 	aiop->aio_outstanding++;
33360Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
33371885Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
33381885Sraf 	    sigev->sigev_notify == SIGEV_PORT)
33391885Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
33400Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
33410Sstevel@tonic-gate 	/*
33420Sstevel@tonic-gate 	 * initialize aio request.
33430Sstevel@tonic-gate 	 */
33440Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
33450Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
33460Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
33471885Sraf 	reqp->aio_req_lio = NULL;
33480Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
33490Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
33500Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
33510Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
33520Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
33530Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
33540Sstevel@tonic-gate 	*reqpp = reqp;
33550Sstevel@tonic-gate 	return (0);
33560Sstevel@tonic-gate }
33570Sstevel@tonic-gate 
33580Sstevel@tonic-gate /*
33590Sstevel@tonic-gate  * This routine is called when a non largefile call is made by a 32bit
33600Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel.
33610Sstevel@tonic-gate  */
33620Sstevel@tonic-gate static int
33630Sstevel@tonic-gate alio32(
33640Sstevel@tonic-gate 	int		mode_arg,
33650Sstevel@tonic-gate 	void		*aiocb_arg,
33660Sstevel@tonic-gate 	int		nent,
33671885Sraf 	void		*sigev)
33680Sstevel@tonic-gate {
33690Sstevel@tonic-gate 	file_t		*fp;
33700Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
33710Sstevel@tonic-gate 	int		prev_mode = -1;
33720Sstevel@tonic-gate 	struct vnode	*vp;
33730Sstevel@tonic-gate 	aio_lio_t	*head;
33740Sstevel@tonic-gate 	aio_req_t	*reqp;
33750Sstevel@tonic-gate 	aio_t		*aiop;
33761885Sraf 	caddr_t		cbplist;
33770Sstevel@tonic-gate 	aiocb_t		cb;
33780Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
33790Sstevel@tonic-gate #ifdef	_LP64
33800Sstevel@tonic-gate 	aiocb32_t	*cbp;
33810Sstevel@tonic-gate 	caddr32_t	*ucbp;
33820Sstevel@tonic-gate 	aiocb32_t	cb32;
33830Sstevel@tonic-gate 	aiocb32_t	*aiocb32 = &cb32;
33841885Sraf 	struct sigevent32	sigevk;
33850Sstevel@tonic-gate #else
33860Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
33871885Sraf 	struct sigevent	sigevk;
33880Sstevel@tonic-gate #endif
33890Sstevel@tonic-gate 	sigqueue_t	*sqp;
33900Sstevel@tonic-gate 	int		(*aio_func)();
33910Sstevel@tonic-gate 	int		mode;
33921885Sraf 	int		error = 0;
33931885Sraf 	int		aio_errors = 0;
33940Sstevel@tonic-gate 	int		i;
33950Sstevel@tonic-gate 	size_t		ssize;
33960Sstevel@tonic-gate 	int		deadhead = 0;
33970Sstevel@tonic-gate 	int		aio_notsupported = 0;
33981885Sraf 	int		lio_head_port;
33991885Sraf 	int		aio_port;
34001885Sraf 	int		aio_thread;
34010Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
34020Sstevel@tonic-gate #ifdef	_LP64
34030Sstevel@tonic-gate 	port_notify32_t	pnotify;
34040Sstevel@tonic-gate #else
34050Sstevel@tonic-gate 	port_notify_t	pnotify;
34060Sstevel@tonic-gate #endif
34071885Sraf 	int		event;
34081885Sraf 
34090Sstevel@tonic-gate 	aiop = curproc->p_aio;
34100Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
34110Sstevel@tonic-gate 		return (EINVAL);
34120Sstevel@tonic-gate 
34130Sstevel@tonic-gate #ifdef	_LP64
34140Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
34150Sstevel@tonic-gate #else
34160Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
34170Sstevel@tonic-gate #endif
34180Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
34190Sstevel@tonic-gate 	ucbp = (void *)cbplist;
34200Sstevel@tonic-gate 
34211885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
34221885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
34230Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
34240Sstevel@tonic-gate 		return (EFAULT);
34250Sstevel@tonic-gate 	}
34260Sstevel@tonic-gate 
34271885Sraf 	/* Event Ports  */
34281885Sraf 	if (sigev &&
34291885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
34301885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
34311885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
34321885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
34331885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
34341885Sraf 		} else if (copyin(
34351885Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
34361885Sraf 		    &pnotify, sizeof (pnotify))) {
34370Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
34380Sstevel@tonic-gate 			return (EFAULT);
34390Sstevel@tonic-gate 		}
34401885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
34411885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
34421885Sraf 		if (error) {
34431885Sraf 			if (error == ENOMEM || error == EAGAIN)
34441885Sraf 				error = EAGAIN;
34451885Sraf 			else
34461885Sraf 				error = EINVAL;
34471885Sraf 			kmem_free(cbplist, ssize);
34481885Sraf 			return (error);
34491885Sraf 		}
34501885Sraf 		lio_head_port = pnotify.portnfy_port;
34510Sstevel@tonic-gate 	}
34520Sstevel@tonic-gate 
34530Sstevel@tonic-gate 	/*
34540Sstevel@tonic-gate 	 * a list head should be allocated if notification is
34550Sstevel@tonic-gate 	 * enabled for this list.
34560Sstevel@tonic-gate 	 */
34570Sstevel@tonic-gate 	head = NULL;
34580Sstevel@tonic-gate 
34591885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
34600Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
34610Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
34620Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
34630Sstevel@tonic-gate 		if (error)
34640Sstevel@tonic-gate 			goto done;
34650Sstevel@tonic-gate 		deadhead = 1;
34660Sstevel@tonic-gate 		head->lio_nent = nent;
34670Sstevel@tonic-gate 		head->lio_refcnt = nent;
34681885Sraf 		head->lio_port = -1;
34691885Sraf 		head->lio_portkev = NULL;
34701885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
34711885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
34720Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
34730Sstevel@tonic-gate 			if (sqp == NULL) {
34740Sstevel@tonic-gate 				error = EAGAIN;
34750Sstevel@tonic-gate 				goto done;
34760Sstevel@tonic-gate 			}
34770Sstevel@tonic-gate 			sqp->sq_func = NULL;
34780Sstevel@tonic-gate 			sqp->sq_next = NULL;
34790Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
34800Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
34810Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
34820Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
34830Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
34841885Sraf 			sqp->sq_info.si_signo = sigevk.sigev_signo;
34850Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
34861885Sraf 			    sigevk.sigev_value.sival_int;
34870Sstevel@tonic-gate 			head->lio_sigqp = sqp;
34880Sstevel@tonic-gate 		} else {
34890Sstevel@tonic-gate 			head->lio_sigqp = NULL;
34900Sstevel@tonic-gate 		}
34911885Sraf 		if (pkevtp) {
34921885Sraf 			/*
34931885Sraf 			 * Prepare data to send when list of aiocb's has
34941885Sraf 			 * completed.
34951885Sraf 			 */
34961885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
34971885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
34981885Sraf 			    NULL, head);
34991885Sraf 			pkevtp->portkev_events = AIOLIO;
35001885Sraf 			head->lio_portkev = pkevtp;
35011885Sraf 			head->lio_port = pnotify.portnfy_port;
35021885Sraf 		}
35030Sstevel@tonic-gate 	}
35040Sstevel@tonic-gate 
35050Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
35060Sstevel@tonic-gate 
35070Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
35080Sstevel@tonic-gate #ifdef	_LP64
35090Sstevel@tonic-gate 		cbp = (aiocb32_t *)(uintptr_t)*ucbp;
35101885Sraf 		if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
35110Sstevel@tonic-gate #else
35120Sstevel@tonic-gate 		cbp = (aiocb_t *)*ucbp;
35131885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
35140Sstevel@tonic-gate #endif
35151885Sraf 		{
35160Sstevel@tonic-gate 			if (head) {
35170Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35180Sstevel@tonic-gate 				head->lio_nent--;
35190Sstevel@tonic-gate 				head->lio_refcnt--;
35200Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35210Sstevel@tonic-gate 			}
35220Sstevel@tonic-gate 			continue;
35230Sstevel@tonic-gate 		}
35240Sstevel@tonic-gate #ifdef	_LP64
35250Sstevel@tonic-gate 		/*
35260Sstevel@tonic-gate 		 * copy 32 bit structure into 64 bit structure
35270Sstevel@tonic-gate 		 */
35280Sstevel@tonic-gate 		aiocb_32ton(aiocb32, aiocb);
35290Sstevel@tonic-gate #endif /* _LP64 */
35300Sstevel@tonic-gate 
35310Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
35320Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
35330Sstevel@tonic-gate 		if (mode == LIO_NOP) {
35340Sstevel@tonic-gate 			cbp = NULL;
35350Sstevel@tonic-gate 			if (head) {
35360Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35370Sstevel@tonic-gate 				head->lio_nent--;
35380Sstevel@tonic-gate 				head->lio_refcnt--;
35390Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35400Sstevel@tonic-gate 			}
35410Sstevel@tonic-gate 			continue;
35420Sstevel@tonic-gate 		}
35430Sstevel@tonic-gate 
35440Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
35450Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
35460Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35470Sstevel@tonic-gate 			if (head) {
35480Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35490Sstevel@tonic-gate 				head->lio_nent--;
35500Sstevel@tonic-gate 				head->lio_refcnt--;
35510Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35520Sstevel@tonic-gate 			}
35530Sstevel@tonic-gate 			aio_errors++;
35540Sstevel@tonic-gate 			continue;
35550Sstevel@tonic-gate 		}
35560Sstevel@tonic-gate 
35570Sstevel@tonic-gate 		/*
35580Sstevel@tonic-gate 		 * check the permission of the partition
35590Sstevel@tonic-gate 		 */
35600Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
35610Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
35620Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35630Sstevel@tonic-gate 			if (head) {
35640Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35650Sstevel@tonic-gate 				head->lio_nent--;
35660Sstevel@tonic-gate 				head->lio_refcnt--;
35670Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35680Sstevel@tonic-gate 			}
35690Sstevel@tonic-gate 			aio_errors++;
35700Sstevel@tonic-gate 			continue;
35710Sstevel@tonic-gate 		}
35720Sstevel@tonic-gate 
35730Sstevel@tonic-gate 		/*
35740Sstevel@tonic-gate 		 * common case where requests are to the same fd
35750Sstevel@tonic-gate 		 * for the same r/w operation
35760Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
35770Sstevel@tonic-gate 		 */
35781885Sraf 		vp = fp->f_vnode;
35791885Sraf 		if (fp != prev_fp || mode != prev_mode) {
35800Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
35810Sstevel@tonic-gate 			if (aio_func == NULL) {
35820Sstevel@tonic-gate 				prev_fp = NULL;
35830Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
35841885Sraf 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
35850Sstevel@tonic-gate 				aio_notsupported++;
35860Sstevel@tonic-gate 				if (head) {
35870Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
35880Sstevel@tonic-gate 					head->lio_nent--;
35890Sstevel@tonic-gate 					head->lio_refcnt--;
35900Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
35910Sstevel@tonic-gate 				}
35920Sstevel@tonic-gate 				continue;
35930Sstevel@tonic-gate 			} else {
35940Sstevel@tonic-gate 				prev_fp = fp;
35950Sstevel@tonic-gate 				prev_mode = mode;
35960Sstevel@tonic-gate 			}
35970Sstevel@tonic-gate 		}
35981885Sraf 
35991885Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
36001885Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
36011885Sraf 		if (error) {
36020Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
36030Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
36040Sstevel@tonic-gate 			if (head) {
36050Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
36060Sstevel@tonic-gate 				head->lio_nent--;
36070Sstevel@tonic-gate 				head->lio_refcnt--;
36080Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
36090Sstevel@tonic-gate 			}
36100Sstevel@tonic-gate 			aio_errors++;
36110Sstevel@tonic-gate 			continue;
36120Sstevel@tonic-gate 		}
36130Sstevel@tonic-gate 
36140Sstevel@tonic-gate 		reqp->aio_req_lio = head;
36150Sstevel@tonic-gate 		deadhead = 0;
36160Sstevel@tonic-gate 
36170Sstevel@tonic-gate 		/*
36180Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
36190Sstevel@tonic-gate 		 * the driver to avoid a race condition
36200Sstevel@tonic-gate 		 */
36210Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
36220Sstevel@tonic-gate 		    EINPROGRESS);
36230Sstevel@tonic-gate 
36241885Sraf 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
36251885Sraf 
36261885Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
36271885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
36281885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
36291885Sraf 		if (aio_port | aio_thread) {
36301885Sraf 			port_kevent_t *lpkevp;
36311885Sraf 			/*
36321885Sraf 			 * Prepare data to send with each aiocb completed.
36331885Sraf 			 */
36340Sstevel@tonic-gate #ifdef _LP64
36351885Sraf 			if (aio_port) {
36361885Sraf 				void *paddr = (void  *)(uintptr_t)
36371885Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
36381885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
36391885Sraf 					error = EFAULT;
36401885Sraf 			} else {	/* aio_thread */
36411885Sraf 				pnotify.portnfy_port =
36421885Sraf 				    aiocb32->aio_sigevent.sigev_signo;
36431885Sraf 				pnotify.portnfy_user =
36441885Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
36451885Sraf 			}
36460Sstevel@tonic-gate #else
36471885Sraf 			if (aio_port) {
36481885Sraf 				void *paddr =
36491885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
36501885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
36511885Sraf 					error = EFAULT;
36521885Sraf 			} else {	/* aio_thread */
36531885Sraf 				pnotify.portnfy_port =
36541885Sraf 				    aiocb->aio_sigevent.sigev_signo;
36551885Sraf 				pnotify.portnfy_user =
36561885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
36571885Sraf 			}
36580Sstevel@tonic-gate #endif
36591885Sraf 			if (error)
36601885Sraf 				/* EMPTY */;
36611885Sraf 			else if (pkevtp != NULL &&
36621885Sraf 			    pnotify.portnfy_port == lio_head_port)
36631885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
36641885Sraf 				    PORT_ALLOC_DEFAULT);
36651885Sraf 			else
36661885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
36671885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
36681885Sraf 				    &lpkevp);
36691885Sraf 			if (error == 0) {
36701885Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
36711885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
36721885Sraf 				    aio_port_callback, reqp);
36731885Sraf 				lpkevp->portkev_events = event;
36741885Sraf 				reqp->aio_req_portkev = lpkevp;
36751885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
36761885Sraf 			}
36770Sstevel@tonic-gate 		}
36780Sstevel@tonic-gate 
36790Sstevel@tonic-gate 		/*
36800Sstevel@tonic-gate 		 * send the request to driver.
36810Sstevel@tonic-gate 		 */
36820Sstevel@tonic-gate 		if (error == 0) {
36830Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
36840Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
36850Sstevel@tonic-gate 				aio_zerolen(reqp);
36860Sstevel@tonic-gate 				continue;
36870Sstevel@tonic-gate 			}
36880Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
36890Sstevel@tonic-gate 			    CRED());
36900Sstevel@tonic-gate 		}
36910Sstevel@tonic-gate 
36920Sstevel@tonic-gate 		/*
36930Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
36940Sstevel@tonic-gate 		 * completed unless there was an error.
36950Sstevel@tonic-gate 		 */
36960Sstevel@tonic-gate 		if (error) {
36970Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
36980Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
36990Sstevel@tonic-gate 			if (head) {
37000Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
37010Sstevel@tonic-gate 				head->lio_nent--;
37020Sstevel@tonic-gate 				head->lio_refcnt--;
37030Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37040Sstevel@tonic-gate 			}
37050Sstevel@tonic-gate 			if (error == ENOTSUP)
37060Sstevel@tonic-gate 				aio_notsupported++;
37070Sstevel@tonic-gate 			else
37080Sstevel@tonic-gate 				aio_errors++;
37090Sstevel@tonic-gate 			lio_set_error(reqp);
37100Sstevel@tonic-gate 		} else {
37110Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
37120Sstevel@tonic-gate 		}
37130Sstevel@tonic-gate 	}
37140Sstevel@tonic-gate 
37150Sstevel@tonic-gate 	if (aio_notsupported) {
37160Sstevel@tonic-gate 		error = ENOTSUP;
37170Sstevel@tonic-gate 	} else if (aio_errors) {
37180Sstevel@tonic-gate 		/*
37190Sstevel@tonic-gate 		 * return EIO if any request failed
37200Sstevel@tonic-gate 		 */
37210Sstevel@tonic-gate 		error = EIO;
37220Sstevel@tonic-gate 	}
37230Sstevel@tonic-gate 
37240Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
37250Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
37260Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
37270Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
37280Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37290Sstevel@tonic-gate 				error = EINTR;
37300Sstevel@tonic-gate 				goto done;
37310Sstevel@tonic-gate 			}
37320Sstevel@tonic-gate 		}
37330Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
37340Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
37350Sstevel@tonic-gate 	}
37360Sstevel@tonic-gate 
37370Sstevel@tonic-gate done:
37380Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
37390Sstevel@tonic-gate 	if (deadhead) {
37400Sstevel@tonic-gate 		if (head->lio_sigqp)
37410Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
37421885Sraf 		if (head->lio_portkev)
37431885Sraf 			port_free_event(head->lio_portkev);
37440Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
37450Sstevel@tonic-gate 	}
37460Sstevel@tonic-gate 	return (error);
37470Sstevel@tonic-gate }
37480Sstevel@tonic-gate 
37490Sstevel@tonic-gate 
37500Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
37510Sstevel@tonic-gate void
37520Sstevel@tonic-gate aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
37530Sstevel@tonic-gate {
37540Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
37550Sstevel@tonic-gate 	dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
37560Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
37570Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
37580Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
37590Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
37600Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
37610Sstevel@tonic-gate 
37620Sstevel@tonic-gate 	/*
37630Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
37640Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
37650Sstevel@tonic-gate 	 */
37660Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
37670Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
37680Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
37690Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
37700Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
37710Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
37720Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
37730Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
37740Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
37750Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
37760Sstevel@tonic-gate }
37770Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
37780Sstevel@tonic-gate 
37790Sstevel@tonic-gate /*
37800Sstevel@tonic-gate  * aio_port_callback() is called just before the event is retrieved from the
37810Sstevel@tonic-gate  * port. The task of this callback function is to finish the work of the
37820Sstevel@tonic-gate  * transaction for the application, it means :
37830Sstevel@tonic-gate  * - copyout transaction data to the application
37840Sstevel@tonic-gate  *	(this thread is running in the right process context)
37850Sstevel@tonic-gate  * - keep trace of the transaction (update of counters).
37860Sstevel@tonic-gate  * - free allocated buffers
37870Sstevel@tonic-gate  * The aiocb pointer is the object element of the port_kevent_t structure.
37880Sstevel@tonic-gate  *
37890Sstevel@tonic-gate  * flag :
37900Sstevel@tonic-gate  *	PORT_CALLBACK_DEFAULT : do copyout and free resources
37910Sstevel@tonic-gate  *	PORT_CALLBACK_CLOSE   : don't do copyout, free resources
37920Sstevel@tonic-gate  */
37930Sstevel@tonic-gate 
37940Sstevel@tonic-gate /*ARGSUSED*/
37950Sstevel@tonic-gate int
37960Sstevel@tonic-gate aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
37970Sstevel@tonic-gate {
37980Sstevel@tonic-gate 	aio_t		*aiop = curproc->p_aio;
37990Sstevel@tonic-gate 	aio_req_t	*reqp = arg;
38000Sstevel@tonic-gate 	struct	iovec	*iov;
38010Sstevel@tonic-gate 	struct	buf	*bp;
38020Sstevel@tonic-gate 	void		*resultp;
38030Sstevel@tonic-gate 
38040Sstevel@tonic-gate 	if (pid != curproc->p_pid) {
38050Sstevel@tonic-gate 		/* wrong proc !!, can not deliver data here ... */
38060Sstevel@tonic-gate 		return (EACCES);
38070Sstevel@tonic-gate 	}
38080Sstevel@tonic-gate 
38090Sstevel@tonic-gate 	mutex_enter(&aiop->aio_portq_mutex);
38100Sstevel@tonic-gate 	reqp->aio_req_portkev = NULL;
38110Sstevel@tonic-gate 	aio_req_remove_portq(aiop, reqp); /* remove request from portq */
38120Sstevel@tonic-gate 	mutex_exit(&aiop->aio_portq_mutex);
38130Sstevel@tonic-gate 	aphysio_unlock(reqp);		/* unlock used pages */
38140Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
38150Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
38160Sstevel@tonic-gate 		aio_req_free_port(aiop, reqp);	/* back to free list */
38170Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
38180Sstevel@tonic-gate 		return (0);
38190Sstevel@tonic-gate 	}
38200Sstevel@tonic-gate 
38210Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
38220Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
38230Sstevel@tonic-gate 	resultp = (void *)reqp->aio_req_resultp;
38240Sstevel@tonic-gate 	aio_req_free_port(aiop, reqp);	/* request struct back to free list */
38250Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
38260Sstevel@tonic-gate 	if (flag == PORT_CALLBACK_DEFAULT)
38270Sstevel@tonic-gate 		aio_copyout_result_port(iov, bp, resultp);
38280Sstevel@tonic-gate 	return (0);
38290Sstevel@tonic-gate }
3830