xref: /onnv-gate/usr/src/uts/common/os/aio.c (revision 4123:e5cb484f034e)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51885Sraf  * Common Development and Distribution License (the "License").
61885Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211885Sraf 
220Sstevel@tonic-gate /*
23*4123Sdm120769  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * Kernel asynchronous I/O.
310Sstevel@tonic-gate  * This is only for raw devices now (as of Nov. 1993).
320Sstevel@tonic-gate  */
330Sstevel@tonic-gate 
340Sstevel@tonic-gate #include <sys/types.h>
350Sstevel@tonic-gate #include <sys/errno.h>
360Sstevel@tonic-gate #include <sys/conf.h>
370Sstevel@tonic-gate #include <sys/file.h>
380Sstevel@tonic-gate #include <sys/fs/snode.h>
390Sstevel@tonic-gate #include <sys/unistd.h>
400Sstevel@tonic-gate #include <sys/cmn_err.h>
410Sstevel@tonic-gate #include <vm/as.h>
420Sstevel@tonic-gate #include <vm/faultcode.h>
430Sstevel@tonic-gate #include <sys/sysmacros.h>
440Sstevel@tonic-gate #include <sys/procfs.h>
450Sstevel@tonic-gate #include <sys/kmem.h>
460Sstevel@tonic-gate #include <sys/autoconf.h>
470Sstevel@tonic-gate #include <sys/ddi_impldefs.h>
480Sstevel@tonic-gate #include <sys/sunddi.h>
490Sstevel@tonic-gate #include <sys/aio_impl.h>
500Sstevel@tonic-gate #include <sys/debug.h>
510Sstevel@tonic-gate #include <sys/param.h>
520Sstevel@tonic-gate #include <sys/systm.h>
530Sstevel@tonic-gate #include <sys/vmsystm.h>
540Sstevel@tonic-gate #include <sys/fs/pxfs_ki.h>
550Sstevel@tonic-gate #include <sys/contract/process_impl.h>
560Sstevel@tonic-gate 
570Sstevel@tonic-gate /*
580Sstevel@tonic-gate  * external entry point.
590Sstevel@tonic-gate  */
600Sstevel@tonic-gate #ifdef _LP64
610Sstevel@tonic-gate static int64_t kaioc(long, long, long, long, long, long);
620Sstevel@tonic-gate #endif
630Sstevel@tonic-gate static int kaio(ulong_t *, rval_t *);
640Sstevel@tonic-gate 
650Sstevel@tonic-gate 
660Sstevel@tonic-gate #define	AIO_64	0
670Sstevel@tonic-gate #define	AIO_32	1
680Sstevel@tonic-gate #define	AIO_LARGEFILE	2
690Sstevel@tonic-gate 
700Sstevel@tonic-gate /*
710Sstevel@tonic-gate  * implementation specific functions (private)
720Sstevel@tonic-gate  */
730Sstevel@tonic-gate #ifdef _LP64
741885Sraf static int alio(int, aiocb_t **, int, struct sigevent *);
750Sstevel@tonic-gate #endif
760Sstevel@tonic-gate static int aionotify(void);
770Sstevel@tonic-gate static int aioinit(void);
780Sstevel@tonic-gate static int aiostart(void);
790Sstevel@tonic-gate static void alio_cleanup(aio_t *, aiocb_t **, int, int);
800Sstevel@tonic-gate static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
810Sstevel@tonic-gate     cred_t *);
820Sstevel@tonic-gate static void lio_set_error(aio_req_t *);
830Sstevel@tonic-gate static aio_t *aio_aiop_alloc();
840Sstevel@tonic-gate static int aio_req_alloc(aio_req_t **, aio_result_t *);
850Sstevel@tonic-gate static int aio_lio_alloc(aio_lio_t **);
860Sstevel@tonic-gate static aio_req_t *aio_req_done(void *);
870Sstevel@tonic-gate static aio_req_t *aio_req_remove(aio_req_t *);
880Sstevel@tonic-gate static int aio_req_find(aio_result_t *, aio_req_t **);
890Sstevel@tonic-gate static int aio_hash_insert(struct aio_req_t *, aio_t *);
900Sstevel@tonic-gate static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
911885Sraf     aio_result_t *, vnode_t *);
920Sstevel@tonic-gate static int aio_cleanup_thread(aio_t *);
930Sstevel@tonic-gate static aio_lio_t *aio_list_get(aio_result_t *);
940Sstevel@tonic-gate static void lio_set_uerror(void *, int);
950Sstevel@tonic-gate extern void aio_zerolen(aio_req_t *);
960Sstevel@tonic-gate static int aiowait(struct timeval *, int, long	*);
970Sstevel@tonic-gate static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
980Sstevel@tonic-gate static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
990Sstevel@tonic-gate     aio_req_t *reqlist, aio_t *aiop, model_t model);
1000Sstevel@tonic-gate static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
1010Sstevel@tonic-gate static int aiosuspend(void *, int, struct  timespec *, int,
1020Sstevel@tonic-gate     long	*, int);
1030Sstevel@tonic-gate static int aliowait(int, void *, int, void *, int);
1040Sstevel@tonic-gate static int aioerror(void *, int);
1050Sstevel@tonic-gate static int aio_cancel(int, void *, long	*, int);
1060Sstevel@tonic-gate static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
1070Sstevel@tonic-gate static int aiorw(int, void *, int, int);
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate static int alioLF(int, void *, int, void *);
1101885Sraf static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
1111885Sraf     aio_result_t *, vnode_t *);
1120Sstevel@tonic-gate static int alio32(int, void *, int, void *);
1130Sstevel@tonic-gate static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1140Sstevel@tonic-gate static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1170Sstevel@tonic-gate static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
1180Sstevel@tonic-gate void	aiocb_32ton(aiocb32_t *, aiocb_t *);
1190Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
1200Sstevel@tonic-gate 
1210Sstevel@tonic-gate /*
1220Sstevel@tonic-gate  * implementation specific functions (external)
1230Sstevel@tonic-gate  */
1240Sstevel@tonic-gate void aio_req_free(aio_t *, aio_req_t *);
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate /*
1270Sstevel@tonic-gate  * Event Port framework
1280Sstevel@tonic-gate  */
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate void aio_req_free_port(aio_t *, aio_req_t *);
1310Sstevel@tonic-gate static int aio_port_callback(void *, int *, pid_t, int, void *);
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate /*
1340Sstevel@tonic-gate  * This is the loadable module wrapper.
1350Sstevel@tonic-gate  */
1360Sstevel@tonic-gate #include <sys/modctl.h>
1370Sstevel@tonic-gate #include <sys/syscall.h>
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate #ifdef _LP64
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate static struct sysent kaio_sysent = {
1420Sstevel@tonic-gate 	6,
1430Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
1440Sstevel@tonic-gate 	(int (*)())kaioc
1450Sstevel@tonic-gate };
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1480Sstevel@tonic-gate static struct sysent kaio_sysent32 = {
1490Sstevel@tonic-gate 	7,
1500Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL,
1510Sstevel@tonic-gate 	kaio
1520Sstevel@tonic-gate };
1530Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate #else   /* _LP64 */
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate static struct sysent kaio_sysent = {
1580Sstevel@tonic-gate 	7,
1590Sstevel@tonic-gate 	SE_NOUNLOAD | SE_32RVAL1,
1600Sstevel@tonic-gate 	kaio
1610Sstevel@tonic-gate };
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate #endif  /* _LP64 */
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate /*
1660Sstevel@tonic-gate  * Module linkage information for the kernel.
1670Sstevel@tonic-gate  */
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate static struct modlsys modlsys = {
1700Sstevel@tonic-gate 	&mod_syscallops,
1710Sstevel@tonic-gate 	"kernel Async I/O",
1720Sstevel@tonic-gate 	&kaio_sysent
1730Sstevel@tonic-gate };
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1760Sstevel@tonic-gate static struct modlsys modlsys32 = {
1770Sstevel@tonic-gate 	&mod_syscallops32,
1780Sstevel@tonic-gate 	"kernel Async I/O for 32 bit compatibility",
1790Sstevel@tonic-gate 	&kaio_sysent32
1800Sstevel@tonic-gate };
1810Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate static struct modlinkage modlinkage = {
1850Sstevel@tonic-gate 	MODREV_1,
1860Sstevel@tonic-gate 	&modlsys,
1870Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1880Sstevel@tonic-gate 	&modlsys32,
1890Sstevel@tonic-gate #endif
1900Sstevel@tonic-gate 	NULL
1910Sstevel@tonic-gate };
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate int
1940Sstevel@tonic-gate _init(void)
1950Sstevel@tonic-gate {
1960Sstevel@tonic-gate 	int retval;
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate 	if ((retval = mod_install(&modlinkage)) != 0)
1990Sstevel@tonic-gate 		return (retval);
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 	return (0);
2020Sstevel@tonic-gate }
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate int
2050Sstevel@tonic-gate _fini(void)
2060Sstevel@tonic-gate {
2070Sstevel@tonic-gate 	int retval;
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate 	retval = mod_remove(&modlinkage);
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate 	return (retval);
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate int
2150Sstevel@tonic-gate _info(struct modinfo *modinfop)
2160Sstevel@tonic-gate {
2170Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
2180Sstevel@tonic-gate }
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate #ifdef	_LP64
2210Sstevel@tonic-gate static int64_t
2220Sstevel@tonic-gate kaioc(
2230Sstevel@tonic-gate 	long	a0,
2240Sstevel@tonic-gate 	long	a1,
2250Sstevel@tonic-gate 	long	a2,
2260Sstevel@tonic-gate 	long	a3,
2270Sstevel@tonic-gate 	long	a4,
2280Sstevel@tonic-gate 	long	a5)
2290Sstevel@tonic-gate {
2300Sstevel@tonic-gate 	int	error;
2310Sstevel@tonic-gate 	long	rval = 0;
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	switch ((int)a0 & ~AIO_POLL_BIT) {
2340Sstevel@tonic-gate 	case AIOREAD:
2350Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2360Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FREAD);
2370Sstevel@tonic-gate 		break;
2380Sstevel@tonic-gate 	case AIOWRITE:
2390Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2400Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FWRITE);
2410Sstevel@tonic-gate 		break;
2420Sstevel@tonic-gate 	case AIOWAIT:
2430Sstevel@tonic-gate 		error = aiowait((struct timeval *)a1, (int)a2, &rval);
2440Sstevel@tonic-gate 		break;
2450Sstevel@tonic-gate 	case AIOWAITN:
2460Sstevel@tonic-gate 		error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
2470Sstevel@tonic-gate 		    (timespec_t *)a4);
2480Sstevel@tonic-gate 		break;
2490Sstevel@tonic-gate 	case AIONOTIFY:
2500Sstevel@tonic-gate 		error = aionotify();
2510Sstevel@tonic-gate 		break;
2520Sstevel@tonic-gate 	case AIOINIT:
2530Sstevel@tonic-gate 		error = aioinit();
2540Sstevel@tonic-gate 		break;
2550Sstevel@tonic-gate 	case AIOSTART:
2560Sstevel@tonic-gate 		error = aiostart();
2570Sstevel@tonic-gate 		break;
2580Sstevel@tonic-gate 	case AIOLIO:
2591885Sraf 		error = alio((int)a1, (aiocb_t **)a2, (int)a3,
2600Sstevel@tonic-gate 		    (struct sigevent *)a4);
2610Sstevel@tonic-gate 		break;
2620Sstevel@tonic-gate 	case AIOLIOWAIT:
2630Sstevel@tonic-gate 		error = aliowait((int)a1, (void *)a2, (int)a3,
2640Sstevel@tonic-gate 		    (struct sigevent *)a4, AIO_64);
2650Sstevel@tonic-gate 		break;
2660Sstevel@tonic-gate 	case AIOSUSPEND:
2670Sstevel@tonic-gate 		error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
2680Sstevel@tonic-gate 		    (int)a4, &rval, AIO_64);
2690Sstevel@tonic-gate 		break;
2700Sstevel@tonic-gate 	case AIOERROR:
2710Sstevel@tonic-gate 		error = aioerror((void *)a1, AIO_64);
2720Sstevel@tonic-gate 		break;
2730Sstevel@tonic-gate 	case AIOAREAD:
2740Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
2750Sstevel@tonic-gate 		break;
2760Sstevel@tonic-gate 	case AIOAWRITE:
2770Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
2780Sstevel@tonic-gate 		break;
2790Sstevel@tonic-gate 	case AIOCANCEL:
2800Sstevel@tonic-gate 		error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
2810Sstevel@tonic-gate 		break;
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 	/*
2840Sstevel@tonic-gate 	 * The large file related stuff is valid only for
2850Sstevel@tonic-gate 	 * 32 bit kernel and not for 64 bit kernel
2860Sstevel@tonic-gate 	 * On 64 bit kernel we convert large file calls
2870Sstevel@tonic-gate 	 * to regular 64bit calls.
2880Sstevel@tonic-gate 	 */
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate 	default:
2910Sstevel@tonic-gate 		error = EINVAL;
2920Sstevel@tonic-gate 	}
2930Sstevel@tonic-gate 	if (error)
2940Sstevel@tonic-gate 		return ((int64_t)set_errno(error));
2950Sstevel@tonic-gate 	return (rval);
2960Sstevel@tonic-gate }
2970Sstevel@tonic-gate #endif
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate static int
3000Sstevel@tonic-gate kaio(
3010Sstevel@tonic-gate 	ulong_t *uap,
3020Sstevel@tonic-gate 	rval_t *rvp)
3030Sstevel@tonic-gate {
3040Sstevel@tonic-gate 	long rval = 0;
3050Sstevel@tonic-gate 	int	error = 0;
3060Sstevel@tonic-gate 	offset_t	off;
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate 		rvp->r_vals = 0;
3100Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
3110Sstevel@tonic-gate 	off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
3120Sstevel@tonic-gate #else
3130Sstevel@tonic-gate 	off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
3140Sstevel@tonic-gate #endif
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	switch (uap[0] & ~AIO_POLL_BIT) {
3170Sstevel@tonic-gate 	/*
3180Sstevel@tonic-gate 	 * It must be the 32 bit system call on 64 bit kernel
3190Sstevel@tonic-gate 	 */
3200Sstevel@tonic-gate 	case AIOREAD:
3210Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3220Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
3230Sstevel@tonic-gate 	case AIOWRITE:
3240Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3250Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
3260Sstevel@tonic-gate 	case AIOWAIT:
3270Sstevel@tonic-gate 		error = aiowait((struct	timeval *)uap[1], (int)uap[2],
3280Sstevel@tonic-gate 		    &rval);
3290Sstevel@tonic-gate 		break;
3300Sstevel@tonic-gate 	case AIOWAITN:
3310Sstevel@tonic-gate 		error = aiowaitn((void *)uap[1], (uint_t)uap[2],
3320Sstevel@tonic-gate 		    (uint_t *)uap[3], (timespec_t *)uap[4]);
3330Sstevel@tonic-gate 		break;
3340Sstevel@tonic-gate 	case AIONOTIFY:
3350Sstevel@tonic-gate 		return (aionotify());
3360Sstevel@tonic-gate 	case AIOINIT:
3370Sstevel@tonic-gate 		return (aioinit());
3380Sstevel@tonic-gate 	case AIOSTART:
3390Sstevel@tonic-gate 		return (aiostart());
3400Sstevel@tonic-gate 	case AIOLIO:
3410Sstevel@tonic-gate 		return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
3420Sstevel@tonic-gate 		    (void *)uap[4]));
3430Sstevel@tonic-gate 	case AIOLIOWAIT:
3440Sstevel@tonic-gate 		return (aliowait((int)uap[1], (void *)uap[2],
3450Sstevel@tonic-gate 		    (int)uap[3], (struct sigevent *)uap[4], AIO_32));
3460Sstevel@tonic-gate 	case AIOSUSPEND:
3470Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3480Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4],
3490Sstevel@tonic-gate 		    &rval, AIO_32);
3500Sstevel@tonic-gate 		break;
3510Sstevel@tonic-gate 	case AIOERROR:
3520Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_32));
3530Sstevel@tonic-gate 	case AIOAREAD:
3540Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3550Sstevel@tonic-gate 		    FREAD, AIO_32));
3560Sstevel@tonic-gate 	case AIOAWRITE:
3570Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3580Sstevel@tonic-gate 		    FWRITE, AIO_32));
3590Sstevel@tonic-gate 	case AIOCANCEL:
3600Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
3610Sstevel@tonic-gate 		    AIO_32));
3620Sstevel@tonic-gate 		break;
3630Sstevel@tonic-gate 	case AIOLIO64:
3640Sstevel@tonic-gate 		return (alioLF((int)uap[1], (void *)uap[2],
3650Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4]));
3660Sstevel@tonic-gate 	case AIOLIOWAIT64:
3670Sstevel@tonic-gate 		return (aliowait(uap[1], (void *)uap[2],
3680Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
3690Sstevel@tonic-gate 	case AIOSUSPEND64:
3700Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3710Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4], &rval,
3720Sstevel@tonic-gate 		    AIO_LARGEFILE);
3730Sstevel@tonic-gate 		break;
3740Sstevel@tonic-gate 	case AIOERROR64:
3750Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_LARGEFILE));
3760Sstevel@tonic-gate 	case AIOAREAD64:
3770Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FREAD,
3780Sstevel@tonic-gate 		    AIO_LARGEFILE));
3790Sstevel@tonic-gate 	case AIOAWRITE64:
3800Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
3810Sstevel@tonic-gate 		    AIO_LARGEFILE));
3820Sstevel@tonic-gate 	case AIOCANCEL64:
3830Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2],
3840Sstevel@tonic-gate 		    &rval, AIO_LARGEFILE));
3850Sstevel@tonic-gate 		break;
3860Sstevel@tonic-gate 	default:
3870Sstevel@tonic-gate 		return (EINVAL);
3880Sstevel@tonic-gate 	}
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate 	rvp->r_val1 = rval;
3910Sstevel@tonic-gate 	return (error);
3920Sstevel@tonic-gate }
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate /*
3950Sstevel@tonic-gate  * wake up LWPs in this process that are sleeping in
3960Sstevel@tonic-gate  * aiowait().
3970Sstevel@tonic-gate  */
3980Sstevel@tonic-gate static int
3990Sstevel@tonic-gate aionotify(void)
4000Sstevel@tonic-gate {
4010Sstevel@tonic-gate 	aio_t	*aiop;
4020Sstevel@tonic-gate 
4030Sstevel@tonic-gate 	aiop = curproc->p_aio;
4040Sstevel@tonic-gate 	if (aiop == NULL)
4050Sstevel@tonic-gate 		return (0);
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
4080Sstevel@tonic-gate 	aiop->aio_notifycnt++;
4090Sstevel@tonic-gate 	cv_broadcast(&aiop->aio_waitcv);
4100Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 	return (0);
4130Sstevel@tonic-gate }
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate static int
4160Sstevel@tonic-gate timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
4170Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4180Sstevel@tonic-gate {
4190Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4200Sstevel@tonic-gate 	struct timeval32 wait_time_32;
4210Sstevel@tonic-gate #endif
4220Sstevel@tonic-gate 	struct timeval wait_time;
4230Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate 	*rqtp = NULL;
4260Sstevel@tonic-gate 	if (timout == NULL) {		/* wait indefinitely */
4270Sstevel@tonic-gate 		*blocking = 1;
4280Sstevel@tonic-gate 		return (0);
4290Sstevel@tonic-gate 	}
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate 	/*
4320Sstevel@tonic-gate 	 * Need to correctly compare with the -1 passed in for a user
4330Sstevel@tonic-gate 	 * address pointer, with both 32 bit and 64 bit apps.
4340Sstevel@tonic-gate 	 */
4350Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4360Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)-1) {	/* don't wait */
4370Sstevel@tonic-gate 			*blocking = 0;
4380Sstevel@tonic-gate 			return (0);
4390Sstevel@tonic-gate 		}
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 		if (copyin(timout, &wait_time, sizeof (wait_time)))
4420Sstevel@tonic-gate 			return (EFAULT);
4430Sstevel@tonic-gate 	}
4440Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4450Sstevel@tonic-gate 	else {
4460Sstevel@tonic-gate 		/*
4470Sstevel@tonic-gate 		 * -1 from a 32bit app. It will not get sign extended.
4480Sstevel@tonic-gate 		 * don't wait if -1.
4490Sstevel@tonic-gate 		 */
4500Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
4510Sstevel@tonic-gate 			*blocking = 0;
4520Sstevel@tonic-gate 			return (0);
4530Sstevel@tonic-gate 		}
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
4560Sstevel@tonic-gate 			return (EFAULT);
4570Sstevel@tonic-gate 		TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
4580Sstevel@tonic-gate 	}
4590Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate 	if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {	/* don't wait */
4620Sstevel@tonic-gate 		*blocking = 0;
4630Sstevel@tonic-gate 		return (0);
4640Sstevel@tonic-gate 	}
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate 	if (wait_time.tv_sec < 0 ||
4670Sstevel@tonic-gate 	    wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
4680Sstevel@tonic-gate 		return (EINVAL);
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 	rqtime->tv_sec = wait_time.tv_sec;
4710Sstevel@tonic-gate 	rqtime->tv_nsec = wait_time.tv_usec * 1000;
4720Sstevel@tonic-gate 	*rqtp = rqtime;
4730Sstevel@tonic-gate 	*blocking = 1;
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate 	return (0);
4760Sstevel@tonic-gate }
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate static int
4790Sstevel@tonic-gate timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
4800Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4810Sstevel@tonic-gate {
4820Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4830Sstevel@tonic-gate 	timespec32_t wait_time_32;
4840Sstevel@tonic-gate #endif
4850Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 	*rqtp = NULL;
4880Sstevel@tonic-gate 	if (timout == NULL) {
4890Sstevel@tonic-gate 		*blocking = 1;
4900Sstevel@tonic-gate 		return (0);
4910Sstevel@tonic-gate 	}
4920Sstevel@tonic-gate 
4930Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4940Sstevel@tonic-gate 		if (copyin(timout, rqtime, sizeof (*rqtime)))
4950Sstevel@tonic-gate 			return (EFAULT);
4960Sstevel@tonic-gate 	}
4970Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4980Sstevel@tonic-gate 	else {
4990Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
5000Sstevel@tonic-gate 			return (EFAULT);
5010Sstevel@tonic-gate 		TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
5020Sstevel@tonic-gate 	}
5030Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
5060Sstevel@tonic-gate 		*blocking = 0;
5070Sstevel@tonic-gate 		return (0);
5080Sstevel@tonic-gate 	}
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate 	if (rqtime->tv_sec < 0 ||
5110Sstevel@tonic-gate 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
5120Sstevel@tonic-gate 		return (EINVAL);
5130Sstevel@tonic-gate 
5140Sstevel@tonic-gate 	*rqtp = rqtime;
5150Sstevel@tonic-gate 	*blocking = 1;
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate 	return (0);
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate /*ARGSUSED*/
5210Sstevel@tonic-gate static int
5220Sstevel@tonic-gate aiowait(
5230Sstevel@tonic-gate 	struct timeval	*timout,
5240Sstevel@tonic-gate 	int	dontblockflg,
5250Sstevel@tonic-gate 	long	*rval)
5260Sstevel@tonic-gate {
5270Sstevel@tonic-gate 	int 		error;
5280Sstevel@tonic-gate 	aio_t		*aiop;
5290Sstevel@tonic-gate 	aio_req_t	*reqp;
5300Sstevel@tonic-gate 	clock_t		status;
5310Sstevel@tonic-gate 	int		blocking;
532*4123Sdm120769 	int		timecheck;
5330Sstevel@tonic-gate 	timestruc_t	rqtime;
5340Sstevel@tonic-gate 	timestruc_t	*rqtp;
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 	aiop = curproc->p_aio;
5370Sstevel@tonic-gate 	if (aiop == NULL)
5380Sstevel@tonic-gate 		return (EINVAL);
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 	/*
5410Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
5420Sstevel@tonic-gate 	 */
5430Sstevel@tonic-gate 	error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
5440Sstevel@tonic-gate 	if (error)
5450Sstevel@tonic-gate 		return (error);
5460Sstevel@tonic-gate 	if (rqtp) {
5470Sstevel@tonic-gate 		timestruc_t now;
548*4123Sdm120769 		timecheck = timechanged;
5490Sstevel@tonic-gate 		gethrestime(&now);
5500Sstevel@tonic-gate 		timespecadd(rqtp, &now);
5510Sstevel@tonic-gate 	}
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
5540Sstevel@tonic-gate 	for (;;) {
5550Sstevel@tonic-gate 		/* process requests on poll queue */
5560Sstevel@tonic-gate 		if (aiop->aio_pollq) {
5570Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
5580Sstevel@tonic-gate 			aio_cleanup(0);
5590Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
5600Sstevel@tonic-gate 		}
5610Sstevel@tonic-gate 		if ((reqp = aio_req_remove(NULL)) != NULL) {
5620Sstevel@tonic-gate 			*rval = (long)reqp->aio_req_resultp;
5630Sstevel@tonic-gate 			break;
5640Sstevel@tonic-gate 		}
5650Sstevel@tonic-gate 		/* user-level done queue might not be empty */
5660Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
5670Sstevel@tonic-gate 			aiop->aio_notifycnt--;
5680Sstevel@tonic-gate 			*rval = 1;
5690Sstevel@tonic-gate 			break;
5700Sstevel@tonic-gate 		}
5710Sstevel@tonic-gate 		/* don't block if no outstanding aio */
5720Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0 && dontblockflg) {
5730Sstevel@tonic-gate 			error = EINVAL;
5740Sstevel@tonic-gate 			break;
5750Sstevel@tonic-gate 		}
5760Sstevel@tonic-gate 		if (blocking) {
5770Sstevel@tonic-gate 			status = cv_waituntil_sig(&aiop->aio_waitcv,
578*4123Sdm120769 			    &aiop->aio_mutex, rqtp, timecheck);
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate 			if (status > 0)		/* check done queue again */
5810Sstevel@tonic-gate 				continue;
5820Sstevel@tonic-gate 			if (status == 0) {	/* interrupted by a signal */
5830Sstevel@tonic-gate 				error = EINTR;
5840Sstevel@tonic-gate 				*rval = -1;
5850Sstevel@tonic-gate 			} else {		/* timer expired */
5860Sstevel@tonic-gate 				error = ETIME;
5870Sstevel@tonic-gate 			}
5880Sstevel@tonic-gate 		}
5890Sstevel@tonic-gate 		break;
5900Sstevel@tonic-gate 	}
5910Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
5920Sstevel@tonic-gate 	if (reqp) {
5930Sstevel@tonic-gate 		aphysio_unlock(reqp);
5940Sstevel@tonic-gate 		aio_copyout_result(reqp);
5950Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
5960Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
5970Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
5980Sstevel@tonic-gate 	}
5990Sstevel@tonic-gate 	return (error);
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate /*
6030Sstevel@tonic-gate  * aiowaitn can be used to reap completed asynchronous requests submitted with
6040Sstevel@tonic-gate  * lio_listio, aio_read or aio_write.
6050Sstevel@tonic-gate  * This function only reaps asynchronous raw I/Os.
6060Sstevel@tonic-gate  */
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate /*ARGSUSED*/
6090Sstevel@tonic-gate static int
6100Sstevel@tonic-gate aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
6110Sstevel@tonic-gate {
6120Sstevel@tonic-gate 	int 		error = 0;
6130Sstevel@tonic-gate 	aio_t		*aiop;
6140Sstevel@tonic-gate 	aio_req_t	*reqlist = NULL;
6150Sstevel@tonic-gate 	caddr_t		iocblist = NULL;	/* array of iocb ptr's */
6160Sstevel@tonic-gate 	uint_t		waitcnt, cnt = 0;	/* iocb cnt */
6170Sstevel@tonic-gate 	size_t		iocbsz;			/* users iocb size */
6180Sstevel@tonic-gate 	size_t		riocbsz;		/* returned iocb size */
6190Sstevel@tonic-gate 	int		iocb_index = 0;
6200Sstevel@tonic-gate 	model_t		model = get_udatamodel();
6210Sstevel@tonic-gate 	int		blocking = 1;
622*4123Sdm120769 	int		timecheck;
6230Sstevel@tonic-gate 	timestruc_t	rqtime;
6240Sstevel@tonic-gate 	timestruc_t	*rqtp;
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	aiop = curproc->p_aio;
6270Sstevel@tonic-gate 	if (aiop == NULL)
6280Sstevel@tonic-gate 		return (EINVAL);
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0)
6310Sstevel@tonic-gate 		return (EAGAIN);
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate 	if (copyin(nwait, &waitcnt, sizeof (uint_t)))
6340Sstevel@tonic-gate 		return (EFAULT);
6350Sstevel@tonic-gate 
6360Sstevel@tonic-gate 	/* set *nwait to zero, if we must return prematurely */
6370Sstevel@tonic-gate 	if (copyout(&cnt, nwait, sizeof (uint_t)))
6380Sstevel@tonic-gate 		return (EFAULT);
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate 	if (waitcnt == 0) {
6410Sstevel@tonic-gate 		blocking = 0;
6420Sstevel@tonic-gate 		rqtp = NULL;
6430Sstevel@tonic-gate 		waitcnt = nent;
6440Sstevel@tonic-gate 	} else {
6450Sstevel@tonic-gate 		error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
6460Sstevel@tonic-gate 		if (error)
6470Sstevel@tonic-gate 			return (error);
6480Sstevel@tonic-gate 	}
6490Sstevel@tonic-gate 
6500Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
6510Sstevel@tonic-gate 		iocbsz = (sizeof (aiocb_t *) * nent);
6520Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
6530Sstevel@tonic-gate 	else
6540Sstevel@tonic-gate 		iocbsz = (sizeof (caddr32_t) * nent);
6550Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
6560Sstevel@tonic-gate 
6570Sstevel@tonic-gate 	/*
6580Sstevel@tonic-gate 	 * Only one aio_waitn call is allowed at a time.
6590Sstevel@tonic-gate 	 * The active aio_waitn will collect all requests
6600Sstevel@tonic-gate 	 * out of the "done" list and if necessary it will wait
6610Sstevel@tonic-gate 	 * for some/all pending requests to fulfill the nwait
6620Sstevel@tonic-gate 	 * parameter.
6630Sstevel@tonic-gate 	 * A second or further aio_waitn calls will sleep here
6640Sstevel@tonic-gate 	 * until the active aio_waitn finishes and leaves the kernel
6650Sstevel@tonic-gate 	 * If the second call does not block (poll), then return
6660Sstevel@tonic-gate 	 * immediately with the error code : EAGAIN.
6670Sstevel@tonic-gate 	 * If the second call should block, then sleep here, but
6680Sstevel@tonic-gate 	 * do not touch the timeout. The timeout starts when this
6690Sstevel@tonic-gate 	 * aio_waitn-call becomes active.
6700Sstevel@tonic-gate 	 */
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
6730Sstevel@tonic-gate 
6740Sstevel@tonic-gate 	while (aiop->aio_flags & AIO_WAITN) {
6750Sstevel@tonic-gate 		if (blocking == 0) {
6760Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6770Sstevel@tonic-gate 			return (EAGAIN);
6780Sstevel@tonic-gate 		}
6790Sstevel@tonic-gate 
6800Sstevel@tonic-gate 		/* block, no timeout */
6810Sstevel@tonic-gate 		aiop->aio_flags |= AIO_WAITN_PENDING;
6820Sstevel@tonic-gate 		if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
6830Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6840Sstevel@tonic-gate 			return (EINTR);
6850Sstevel@tonic-gate 		}
6860Sstevel@tonic-gate 	}
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate 	/*
6890Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
6900Sstevel@tonic-gate 	 */
6910Sstevel@tonic-gate 	if (rqtp) {
6920Sstevel@tonic-gate 		timestruc_t now;
693*4123Sdm120769 		timecheck = timechanged;
6940Sstevel@tonic-gate 		gethrestime(&now);
6950Sstevel@tonic-gate 		timespecadd(rqtp, &now);
6960Sstevel@tonic-gate 	}
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 	if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
6990Sstevel@tonic-gate 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
7000Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
7010Sstevel@tonic-gate 	}
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate 	if (aiop->aio_iocb == NULL) {
7040Sstevel@tonic-gate 		iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
7050Sstevel@tonic-gate 		if (iocblist == NULL) {
7060Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7070Sstevel@tonic-gate 			return (ENOMEM);
7080Sstevel@tonic-gate 		}
7090Sstevel@tonic-gate 		aiop->aio_iocb = (aiocb_t **)iocblist;
7100Sstevel@tonic-gate 		aiop->aio_iocbsz = iocbsz;
7110Sstevel@tonic-gate 	} else {
7120Sstevel@tonic-gate 		iocblist = (char *)aiop->aio_iocb;
7130Sstevel@tonic-gate 	}
7140Sstevel@tonic-gate 
7150Sstevel@tonic-gate 	aiop->aio_waitncnt = waitcnt;
7160Sstevel@tonic-gate 	aiop->aio_flags |= AIO_WAITN;
7170Sstevel@tonic-gate 
7180Sstevel@tonic-gate 	for (;;) {
7190Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
7200Sstevel@tonic-gate 		if (aiop->aio_pollq) {
7210Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7220Sstevel@tonic-gate 			aio_cleanup(0);
7230Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
7240Sstevel@tonic-gate 		}
7250Sstevel@tonic-gate 
7260Sstevel@tonic-gate 		/* check for requests on done queue */
7270Sstevel@tonic-gate 		if (aiop->aio_doneq) {
7280Sstevel@tonic-gate 			cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
7290Sstevel@tonic-gate 			aiop->aio_waitncnt = waitcnt - cnt;
7300Sstevel@tonic-gate 		}
7310Sstevel@tonic-gate 
7320Sstevel@tonic-gate 		/* user-level done queue might not be empty */
7330Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
7340Sstevel@tonic-gate 			aiop->aio_notifycnt--;
7350Sstevel@tonic-gate 			error = 0;
7360Sstevel@tonic-gate 			break;
7370Sstevel@tonic-gate 		}
7380Sstevel@tonic-gate 
7390Sstevel@tonic-gate 		/*
7400Sstevel@tonic-gate 		 * if we are here second time as a result of timer
7410Sstevel@tonic-gate 		 * expiration, we reset error if there are enough
7420Sstevel@tonic-gate 		 * aiocb's to satisfy request.
7430Sstevel@tonic-gate 		 * We return also if all requests are already done
7440Sstevel@tonic-gate 		 * and we picked up the whole done queue.
7450Sstevel@tonic-gate 		 */
7460Sstevel@tonic-gate 
7470Sstevel@tonic-gate 		if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
7480Sstevel@tonic-gate 		    aiop->aio_doneq == NULL)) {
7490Sstevel@tonic-gate 			error = 0;
7500Sstevel@tonic-gate 			break;
7510Sstevel@tonic-gate 		}
7520Sstevel@tonic-gate 
7530Sstevel@tonic-gate 		if ((cnt < waitcnt) && blocking) {
7540Sstevel@tonic-gate 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
755*4123Sdm120769 				&aiop->aio_mutex, rqtp, timecheck);
7560Sstevel@tonic-gate 			if (rval > 0)
7570Sstevel@tonic-gate 				continue;
7580Sstevel@tonic-gate 			if (rval < 0) {
7590Sstevel@tonic-gate 				error = ETIME;
7600Sstevel@tonic-gate 				blocking = 0;
7610Sstevel@tonic-gate 				continue;
7620Sstevel@tonic-gate 			}
7630Sstevel@tonic-gate 			error = EINTR;
7640Sstevel@tonic-gate 		}
7650Sstevel@tonic-gate 		break;
7660Sstevel@tonic-gate 	}
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate 	if (cnt > 0) {
7710Sstevel@tonic-gate 
7720Sstevel@tonic-gate 		iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
7730Sstevel@tonic-gate 		    aiop, model);
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE)
7760Sstevel@tonic-gate 			riocbsz = (sizeof (aiocb_t *) * cnt);
7770Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
7780Sstevel@tonic-gate 		else
7790Sstevel@tonic-gate 			riocbsz = (sizeof (caddr32_t) * cnt);
7800Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate 		if (copyout(iocblist, uiocb, riocbsz) ||
7830Sstevel@tonic-gate 		    copyout(&cnt, nwait, sizeof (uint_t)))
7840Sstevel@tonic-gate 			error = EFAULT;
7850Sstevel@tonic-gate 	}
7860Sstevel@tonic-gate 
7870Sstevel@tonic-gate 	if (aiop->aio_iocbsz > AIO_IOCB_MAX) {
7880Sstevel@tonic-gate 		kmem_free(iocblist, aiop->aio_iocbsz);
7890Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
7900Sstevel@tonic-gate 	}
7910Sstevel@tonic-gate 
7920Sstevel@tonic-gate 	/* check if there is another thread waiting for execution */
7930Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
7940Sstevel@tonic-gate 	aiop->aio_flags &= ~AIO_WAITN;
7950Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_WAITN_PENDING) {
7960Sstevel@tonic-gate 		aiop->aio_flags &= ~AIO_WAITN_PENDING;
7970Sstevel@tonic-gate 		cv_signal(&aiop->aio_waitncv);
7980Sstevel@tonic-gate 	}
7990Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate 	return (error);
8020Sstevel@tonic-gate }
8030Sstevel@tonic-gate 
8040Sstevel@tonic-gate /*
8050Sstevel@tonic-gate  * aio_unlock_requests
8060Sstevel@tonic-gate  * copyouts the result of the request as well as the return value.
8070Sstevel@tonic-gate  * It builds the list of completed asynchronous requests,
8080Sstevel@tonic-gate  * unlocks the allocated memory ranges and
8090Sstevel@tonic-gate  * put the aio request structure back into the free list.
8100Sstevel@tonic-gate  */
8110Sstevel@tonic-gate 
8120Sstevel@tonic-gate static int
8130Sstevel@tonic-gate aio_unlock_requests(
8140Sstevel@tonic-gate 	caddr_t	iocblist,
8150Sstevel@tonic-gate 	int	iocb_index,
8160Sstevel@tonic-gate 	aio_req_t *reqlist,
8170Sstevel@tonic-gate 	aio_t	*aiop,
8180Sstevel@tonic-gate 	model_t	model)
8190Sstevel@tonic-gate {
8200Sstevel@tonic-gate 	aio_req_t	*reqp, *nreqp;
8210Sstevel@tonic-gate 
8220Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
8230Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8240Sstevel@tonic-gate 			(((caddr_t *)iocblist)[iocb_index++]) =
8250Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb;
8260Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8270Sstevel@tonic-gate 			aphysio_unlock(reqp);
8280Sstevel@tonic-gate 			aio_copyout_result(reqp);
8290Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8300Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8310Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8320Sstevel@tonic-gate 		}
8330Sstevel@tonic-gate 	}
8340Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
8350Sstevel@tonic-gate 	else {
8360Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8370Sstevel@tonic-gate 			((caddr32_t *)iocblist)[iocb_index++] =
8380Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb32;
8390Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8400Sstevel@tonic-gate 			aphysio_unlock(reqp);
8410Sstevel@tonic-gate 			aio_copyout_result(reqp);
8420Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8430Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8440Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8450Sstevel@tonic-gate 		}
8460Sstevel@tonic-gate 	}
8470Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
8480Sstevel@tonic-gate 	return (iocb_index);
8490Sstevel@tonic-gate }
8500Sstevel@tonic-gate 
8510Sstevel@tonic-gate /*
8520Sstevel@tonic-gate  * aio_reqlist_concat
8530Sstevel@tonic-gate  * moves "max" elements from the done queue to the reqlist queue and removes
8540Sstevel@tonic-gate  * the AIO_DONEQ flag.
8550Sstevel@tonic-gate  * - reqlist queue is a simple linked list
8560Sstevel@tonic-gate  * - done queue is a double linked list
8570Sstevel@tonic-gate  */
8580Sstevel@tonic-gate 
8590Sstevel@tonic-gate static int
8600Sstevel@tonic-gate aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
8610Sstevel@tonic-gate {
8620Sstevel@tonic-gate 	aio_req_t *q2, *q2work, *list;
8630Sstevel@tonic-gate 	int count = 0;
8640Sstevel@tonic-gate 
8650Sstevel@tonic-gate 	list = *reqlist;
8660Sstevel@tonic-gate 	q2 = aiop->aio_doneq;
8670Sstevel@tonic-gate 	q2work = q2;
8680Sstevel@tonic-gate 	while (max-- > 0) {
8690Sstevel@tonic-gate 		q2work->aio_req_flags &= ~AIO_DONEQ;
8700Sstevel@tonic-gate 		q2work = q2work->aio_req_next;
8710Sstevel@tonic-gate 		count++;
8720Sstevel@tonic-gate 		if (q2work == q2)
8730Sstevel@tonic-gate 			break;
8740Sstevel@tonic-gate 	}
8750Sstevel@tonic-gate 
8760Sstevel@tonic-gate 	if (q2work == q2) {
8770Sstevel@tonic-gate 		/* all elements revised */
8780Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = list;
8790Sstevel@tonic-gate 		list = q2;
8800Sstevel@tonic-gate 		aiop->aio_doneq = NULL;
8810Sstevel@tonic-gate 	} else {
8820Sstevel@tonic-gate 		/*
8830Sstevel@tonic-gate 		 * max < elements in the doneq
8840Sstevel@tonic-gate 		 * detach only the required amount of elements
8850Sstevel@tonic-gate 		 * out of the doneq
8860Sstevel@tonic-gate 		 */
8870Sstevel@tonic-gate 		q2work->aio_req_prev->aio_req_next = list;
8880Sstevel@tonic-gate 		list = q2;
8890Sstevel@tonic-gate 
8900Sstevel@tonic-gate 		aiop->aio_doneq = q2work;
8910Sstevel@tonic-gate 		q2work->aio_req_prev = q2->aio_req_prev;
8920Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = q2work;
8930Sstevel@tonic-gate 	}
8940Sstevel@tonic-gate 	*reqlist = list;
8950Sstevel@tonic-gate 	return (count);
8960Sstevel@tonic-gate }
8970Sstevel@tonic-gate 
8980Sstevel@tonic-gate /*ARGSUSED*/
8990Sstevel@tonic-gate static int
9000Sstevel@tonic-gate aiosuspend(
9010Sstevel@tonic-gate 	void	*aiocb,
9020Sstevel@tonic-gate 	int	nent,
9030Sstevel@tonic-gate 	struct	timespec	*timout,
9040Sstevel@tonic-gate 	int	flag,
9050Sstevel@tonic-gate 	long	*rval,
9060Sstevel@tonic-gate 	int	run_mode)
9070Sstevel@tonic-gate {
9080Sstevel@tonic-gate 	int 		error;
9090Sstevel@tonic-gate 	aio_t		*aiop;
9100Sstevel@tonic-gate 	aio_req_t	*reqp, *found, *next;
9110Sstevel@tonic-gate 	caddr_t		cbplist = NULL;
9120Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
9130Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9140Sstevel@tonic-gate 	aiocb32_t	*cbp32;
9150Sstevel@tonic-gate 	caddr32_t	*ucbp32;
9160Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9170Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
9180Sstevel@tonic-gate 	int		rv;
9190Sstevel@tonic-gate 	int		i;
9200Sstevel@tonic-gate 	size_t		ssize;
9210Sstevel@tonic-gate 	model_t		model = get_udatamodel();
9220Sstevel@tonic-gate 	int		blocking;
923*4123Sdm120769 	int		timecheck;
9240Sstevel@tonic-gate 	timestruc_t	rqtime;
9250Sstevel@tonic-gate 	timestruc_t	*rqtp;
9260Sstevel@tonic-gate 
9270Sstevel@tonic-gate 	aiop = curproc->p_aio;
9280Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0)
9290Sstevel@tonic-gate 		return (EINVAL);
9300Sstevel@tonic-gate 
9310Sstevel@tonic-gate 	/*
9320Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
9330Sstevel@tonic-gate 	 */
9340Sstevel@tonic-gate 	error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
9350Sstevel@tonic-gate 	if (error)
9360Sstevel@tonic-gate 		return (error);
9370Sstevel@tonic-gate 	if (rqtp) {
9380Sstevel@tonic-gate 		timestruc_t now;
939*4123Sdm120769 		timecheck = timechanged;
9400Sstevel@tonic-gate 		gethrestime(&now);
9410Sstevel@tonic-gate 		timespecadd(rqtp, &now);
9420Sstevel@tonic-gate 	}
9430Sstevel@tonic-gate 
9440Sstevel@tonic-gate 	/*
9450Sstevel@tonic-gate 	 * If we are not blocking and there's no IO complete
9460Sstevel@tonic-gate 	 * skip aiocb copyin.
9470Sstevel@tonic-gate 	 */
9480Sstevel@tonic-gate 	if (!blocking && (aiop->aio_pollq == NULL) &&
9490Sstevel@tonic-gate 	    (aiop->aio_doneq == NULL)) {
9500Sstevel@tonic-gate 		return (EAGAIN);
9510Sstevel@tonic-gate 	}
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
9540Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
9550Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9560Sstevel@tonic-gate 	else
9570Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
9580Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9590Sstevel@tonic-gate 
9600Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_NOSLEEP);
9610Sstevel@tonic-gate 	if (cbplist == NULL)
9620Sstevel@tonic-gate 		return (ENOMEM);
9630Sstevel@tonic-gate 
9640Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
9650Sstevel@tonic-gate 		error = EFAULT;
9660Sstevel@tonic-gate 		goto done;
9670Sstevel@tonic-gate 	}
9680Sstevel@tonic-gate 
9690Sstevel@tonic-gate 	found = NULL;
9700Sstevel@tonic-gate 	/*
9710Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
9720Sstevel@tonic-gate 	 * aio_req_done().
9730Sstevel@tonic-gate 	 */
9740Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
9750Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
9760Sstevel@tonic-gate 	for (;;) {
9770Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
9780Sstevel@tonic-gate 		if (aiop->aio_pollq) {
9790Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
9800Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
9810Sstevel@tonic-gate 			aio_cleanup(0);
9820Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
9830Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9840Sstevel@tonic-gate 		}
9850Sstevel@tonic-gate 		/* check for requests on done queue */
9860Sstevel@tonic-gate 		if (aiop->aio_doneq) {
9870Sstevel@tonic-gate 			if (model == DATAMODEL_NATIVE)
9880Sstevel@tonic-gate 				ucbp = (aiocb_t **)cbplist;
9890Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9900Sstevel@tonic-gate 			else
9910Sstevel@tonic-gate 				ucbp32 = (caddr32_t *)cbplist;
9920Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9930Sstevel@tonic-gate 			for (i = 0; i < nent; i++) {
9940Sstevel@tonic-gate 				if (model == DATAMODEL_NATIVE) {
9950Sstevel@tonic-gate 					if ((cbp = *ucbp++) == NULL)
9960Sstevel@tonic-gate 						continue;
9970Sstevel@tonic-gate 					if (run_mode != AIO_LARGEFILE)
9980Sstevel@tonic-gate 						reqp = aio_req_done(
9990Sstevel@tonic-gate 						    &cbp->aio_resultp);
10000Sstevel@tonic-gate 					else {
10010Sstevel@tonic-gate 						cbp64 = (aiocb64_32_t *)cbp;
10020Sstevel@tonic-gate 						reqp = aio_req_done(
10030Sstevel@tonic-gate 						    &cbp64->aio_resultp);
10040Sstevel@tonic-gate 					}
10050Sstevel@tonic-gate 				}
10060Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
10070Sstevel@tonic-gate 				else {
10080Sstevel@tonic-gate 					if (run_mode == AIO_32) {
10090Sstevel@tonic-gate 						if ((cbp32 =
10100Sstevel@tonic-gate 						    (aiocb32_t *)(uintptr_t)
10110Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10120Sstevel@tonic-gate 							continue;
10130Sstevel@tonic-gate 						reqp = aio_req_done(
10140Sstevel@tonic-gate 						    &cbp32->aio_resultp);
10150Sstevel@tonic-gate 					} else if (run_mode == AIO_LARGEFILE) {
10160Sstevel@tonic-gate 						if ((cbp64 =
10170Sstevel@tonic-gate 						    (aiocb64_32_t *)(uintptr_t)
10180Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10190Sstevel@tonic-gate 							continue;
10200Sstevel@tonic-gate 						    reqp = aio_req_done(
10210Sstevel@tonic-gate 							&cbp64->aio_resultp);
10220Sstevel@tonic-gate 					}
10230Sstevel@tonic-gate 
10240Sstevel@tonic-gate 				}
10250Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
10260Sstevel@tonic-gate 				if (reqp) {
10270Sstevel@tonic-gate 					reqp->aio_req_next = found;
10280Sstevel@tonic-gate 					found = reqp;
10290Sstevel@tonic-gate 				}
10300Sstevel@tonic-gate 				if (aiop->aio_doneq == NULL)
10310Sstevel@tonic-gate 					break;
10320Sstevel@tonic-gate 			}
10330Sstevel@tonic-gate 			if (found)
10340Sstevel@tonic-gate 				break;
10350Sstevel@tonic-gate 		}
10360Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
10370Sstevel@tonic-gate 			/*
10380Sstevel@tonic-gate 			 * nothing on the kernel's queue. the user
10390Sstevel@tonic-gate 			 * has notified the kernel that it has items
10400Sstevel@tonic-gate 			 * on a user-level queue.
10410Sstevel@tonic-gate 			 */
10420Sstevel@tonic-gate 			aiop->aio_notifycnt--;
10430Sstevel@tonic-gate 			*rval = 1;
10440Sstevel@tonic-gate 			error = 0;
10450Sstevel@tonic-gate 			break;
10460Sstevel@tonic-gate 		}
10470Sstevel@tonic-gate 		/* don't block if nothing is outstanding */
10480Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0) {
10490Sstevel@tonic-gate 			error = EAGAIN;
10500Sstevel@tonic-gate 			break;
10510Sstevel@tonic-gate 		}
10520Sstevel@tonic-gate 		if (blocking) {
10530Sstevel@tonic-gate 			/*
10540Sstevel@tonic-gate 			 * drop the aio_cleanupq_mutex as we are
10550Sstevel@tonic-gate 			 * going to block.
10560Sstevel@tonic-gate 			 */
10570Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
10580Sstevel@tonic-gate 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
1059*4123Sdm120769 				&aiop->aio_mutex, rqtp, timecheck);
10600Sstevel@tonic-gate 			/*
10610Sstevel@tonic-gate 			 * we have to drop aio_mutex and
10620Sstevel@tonic-gate 			 * grab it in the right order.
10630Sstevel@tonic-gate 			 */
10640Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
10650Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
10660Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
10670Sstevel@tonic-gate 			if (rv > 0)	/* check done queue again */
10680Sstevel@tonic-gate 				continue;
10690Sstevel@tonic-gate 			if (rv == 0)	/* interrupted by a signal */
10700Sstevel@tonic-gate 				error = EINTR;
10710Sstevel@tonic-gate 			else		/* timer expired */
10720Sstevel@tonic-gate 				error = ETIME;
10730Sstevel@tonic-gate 		} else {
10740Sstevel@tonic-gate 			error = EAGAIN;
10750Sstevel@tonic-gate 		}
10760Sstevel@tonic-gate 		break;
10770Sstevel@tonic-gate 	}
10780Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
10790Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
10800Sstevel@tonic-gate 	for (reqp = found; reqp != NULL; reqp = next) {
10810Sstevel@tonic-gate 		next = reqp->aio_req_next;
10820Sstevel@tonic-gate 		aphysio_unlock(reqp);
10830Sstevel@tonic-gate 		aio_copyout_result(reqp);
10840Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
10850Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
10860Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
10870Sstevel@tonic-gate 	}
10880Sstevel@tonic-gate done:
10890Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
10900Sstevel@tonic-gate 	return (error);
10910Sstevel@tonic-gate }
10920Sstevel@tonic-gate 
10930Sstevel@tonic-gate /*
10940Sstevel@tonic-gate  * initialize aio by allocating an aio_t struct for this
10950Sstevel@tonic-gate  * process.
10960Sstevel@tonic-gate  */
10970Sstevel@tonic-gate static int
10980Sstevel@tonic-gate aioinit(void)
10990Sstevel@tonic-gate {
11000Sstevel@tonic-gate 	proc_t *p = curproc;
11010Sstevel@tonic-gate 	aio_t *aiop;
11020Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11030Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL) {
11040Sstevel@tonic-gate 		aiop = aio_aiop_alloc();
11050Sstevel@tonic-gate 		p->p_aio = aiop;
11060Sstevel@tonic-gate 	}
11070Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11080Sstevel@tonic-gate 	if (aiop == NULL)
11090Sstevel@tonic-gate 		return (ENOMEM);
11100Sstevel@tonic-gate 	return (0);
11110Sstevel@tonic-gate }
11120Sstevel@tonic-gate 
11130Sstevel@tonic-gate /*
11140Sstevel@tonic-gate  * start a special thread that will cleanup after aio requests
11150Sstevel@tonic-gate  * that are preventing a segment from being unmapped. as_unmap()
11160Sstevel@tonic-gate  * blocks until all phsyio to this segment is completed. this
11170Sstevel@tonic-gate  * doesn't happen until all the pages in this segment are not
11180Sstevel@tonic-gate  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
11190Sstevel@tonic-gate  * requests still outstanding. this special thread will make sure
11200Sstevel@tonic-gate  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
11210Sstevel@tonic-gate  *
11220Sstevel@tonic-gate  * this function will return an error if the process has only
11230Sstevel@tonic-gate  * one LWP. the assumption is that the caller is a separate LWP
11240Sstevel@tonic-gate  * that remains blocked in the kernel for the life of this process.
11250Sstevel@tonic-gate  */
11260Sstevel@tonic-gate static int
11270Sstevel@tonic-gate aiostart(void)
11280Sstevel@tonic-gate {
11290Sstevel@tonic-gate 	proc_t *p = curproc;
11300Sstevel@tonic-gate 	aio_t *aiop;
11310Sstevel@tonic-gate 	int first, error = 0;
11320Sstevel@tonic-gate 
11330Sstevel@tonic-gate 	if (p->p_lwpcnt == 1)
11340Sstevel@tonic-gate 		return (EDEADLK);
11350Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11360Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL)
11370Sstevel@tonic-gate 		error = EINVAL;
11380Sstevel@tonic-gate 	else {
11390Sstevel@tonic-gate 		first = aiop->aio_ok;
11400Sstevel@tonic-gate 		if (aiop->aio_ok == 0)
11410Sstevel@tonic-gate 			aiop->aio_ok = 1;
11420Sstevel@tonic-gate 	}
11430Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11440Sstevel@tonic-gate 	if (error == 0 && first == 0) {
11450Sstevel@tonic-gate 		return (aio_cleanup_thread(aiop));
11460Sstevel@tonic-gate 		/* should return only to exit */
11470Sstevel@tonic-gate 	}
11480Sstevel@tonic-gate 	return (error);
11490Sstevel@tonic-gate }
11500Sstevel@tonic-gate 
11510Sstevel@tonic-gate /*
11520Sstevel@tonic-gate  * Associate an aiocb with a port.
11530Sstevel@tonic-gate  * This function is used by aiorw() to associate a transaction with a port.
11540Sstevel@tonic-gate  * Allocate an event port structure (port_alloc_event()) and store the
11550Sstevel@tonic-gate  * delivered user pointer (portnfy_user) in the portkev_user field of the
11560Sstevel@tonic-gate  * port_kevent_t structure..
11570Sstevel@tonic-gate  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
11580Sstevel@tonic-gate  * the port association.
11590Sstevel@tonic-gate  */
11600Sstevel@tonic-gate 
11610Sstevel@tonic-gate static int
11621885Sraf aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
11631885Sraf 	aio_req_t *reqp, int event)
11640Sstevel@tonic-gate {
11650Sstevel@tonic-gate 	port_kevent_t	*pkevp = NULL;
11660Sstevel@tonic-gate 	int		error;
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate 	error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
11690Sstevel@tonic-gate 	    PORT_SOURCE_AIO, &pkevp);
11700Sstevel@tonic-gate 	if (error) {
11710Sstevel@tonic-gate 		if ((error == ENOMEM) || (error == EAGAIN))
11720Sstevel@tonic-gate 			error = EAGAIN;
11730Sstevel@tonic-gate 		else
11740Sstevel@tonic-gate 			error = EINVAL;
11750Sstevel@tonic-gate 	} else {
11760Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
11770Sstevel@tonic-gate 		    aio_port_callback, reqp);
11781885Sraf 		pkevp->portkev_events = event;
11790Sstevel@tonic-gate 		reqp->aio_req_portkev = pkevp;
11800Sstevel@tonic-gate 		reqp->aio_req_port = pntfy->portnfy_port;
11810Sstevel@tonic-gate 	}
11820Sstevel@tonic-gate 	return (error);
11830Sstevel@tonic-gate }
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate #ifdef _LP64
11860Sstevel@tonic-gate 
11870Sstevel@tonic-gate /*
11880Sstevel@tonic-gate  * Asynchronous list IO. A chain of aiocb's are copied in
11890Sstevel@tonic-gate  * one at a time. If the aiocb is invalid, it is skipped.
11900Sstevel@tonic-gate  * For each aiocb, the appropriate driver entry point is
11910Sstevel@tonic-gate  * called. Optimize for the common case where the list
11920Sstevel@tonic-gate  * of requests is to the same file descriptor.
11930Sstevel@tonic-gate  *
11940Sstevel@tonic-gate  * One possible optimization is to define a new driver entry
11950Sstevel@tonic-gate  * point that supports a list of IO requests. Whether this
11960Sstevel@tonic-gate  * improves performance depends somewhat on the driver's
11970Sstevel@tonic-gate  * locking strategy. Processing a list could adversely impact
11980Sstevel@tonic-gate  * the driver's interrupt latency.
11990Sstevel@tonic-gate  */
12000Sstevel@tonic-gate static int
12010Sstevel@tonic-gate alio(
12021885Sraf 	int		mode_arg,
12031885Sraf 	aiocb_t		**aiocb_arg,
12041885Sraf 	int		nent,
12051885Sraf 	struct sigevent	*sigev)
12060Sstevel@tonic-gate {
12070Sstevel@tonic-gate 	file_t		*fp;
12080Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
12090Sstevel@tonic-gate 	int		prev_mode = -1;
12100Sstevel@tonic-gate 	struct vnode	*vp;
12110Sstevel@tonic-gate 	aio_lio_t	*head;
12120Sstevel@tonic-gate 	aio_req_t	*reqp;
12130Sstevel@tonic-gate 	aio_t		*aiop;
12140Sstevel@tonic-gate 	caddr_t		cbplist;
12150Sstevel@tonic-gate 	aiocb_t		cb;
12160Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
12171885Sraf 	aiocb_t		*cbp;
12181885Sraf 	aiocb_t		**ucbp;
12190Sstevel@tonic-gate 	struct sigevent sigevk;
12200Sstevel@tonic-gate 	sigqueue_t	*sqp;
12210Sstevel@tonic-gate 	int		(*aio_func)();
12220Sstevel@tonic-gate 	int		mode;
12230Sstevel@tonic-gate 	int		error = 0;
12240Sstevel@tonic-gate 	int		aio_errors = 0;
12250Sstevel@tonic-gate 	int		i;
12260Sstevel@tonic-gate 	size_t		ssize;
12270Sstevel@tonic-gate 	int		deadhead = 0;
12280Sstevel@tonic-gate 	int		aio_notsupported = 0;
12291885Sraf 	int		lio_head_port;
12301885Sraf 	int		aio_port;
12311885Sraf 	int		aio_thread;
12320Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
12330Sstevel@tonic-gate 	port_notify_t	pnotify;
12341885Sraf 	int		event;
12350Sstevel@tonic-gate 
12360Sstevel@tonic-gate 	aiop = curproc->p_aio;
12370Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
12380Sstevel@tonic-gate 		return (EINVAL);
12390Sstevel@tonic-gate 
12400Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
12410Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
12420Sstevel@tonic-gate 	ucbp = (aiocb_t **)cbplist;
12430Sstevel@tonic-gate 
12441885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
12451885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
12460Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
12470Sstevel@tonic-gate 		return (EFAULT);
12480Sstevel@tonic-gate 	}
12490Sstevel@tonic-gate 
12501885Sraf 	/* Event Ports  */
12511885Sraf 	if (sigev &&
12521885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
12531885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
12541885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
12551885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
12561885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
12571885Sraf 		} else if (copyin(sigevk.sigev_value.sival_ptr,
12581885Sraf 		    &pnotify, sizeof (pnotify))) {
12590Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
12600Sstevel@tonic-gate 			return (EFAULT);
12610Sstevel@tonic-gate 		}
12621885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
12631885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
12641885Sraf 		if (error) {
12651885Sraf 			if (error == ENOMEM || error == EAGAIN)
12661885Sraf 				error = EAGAIN;
12671885Sraf 			else
12681885Sraf 				error = EINVAL;
12691885Sraf 			kmem_free(cbplist, ssize);
12701885Sraf 			return (error);
12711885Sraf 		}
12721885Sraf 		lio_head_port = pnotify.portnfy_port;
12730Sstevel@tonic-gate 	}
12740Sstevel@tonic-gate 
12750Sstevel@tonic-gate 	/*
12760Sstevel@tonic-gate 	 * a list head should be allocated if notification is
12770Sstevel@tonic-gate 	 * enabled for this list.
12780Sstevel@tonic-gate 	 */
12790Sstevel@tonic-gate 	head = NULL;
12800Sstevel@tonic-gate 
12811885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
12820Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
12830Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
12840Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
12850Sstevel@tonic-gate 		if (error)
12860Sstevel@tonic-gate 			goto done;
12870Sstevel@tonic-gate 		deadhead = 1;
12880Sstevel@tonic-gate 		head->lio_nent = nent;
12890Sstevel@tonic-gate 		head->lio_refcnt = nent;
12901885Sraf 		head->lio_port = -1;
12911885Sraf 		head->lio_portkev = NULL;
12921885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
12931885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
12940Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
12950Sstevel@tonic-gate 			if (sqp == NULL) {
12960Sstevel@tonic-gate 				error = EAGAIN;
12970Sstevel@tonic-gate 				goto done;
12980Sstevel@tonic-gate 			}
12990Sstevel@tonic-gate 			sqp->sq_func = NULL;
13000Sstevel@tonic-gate 			sqp->sq_next = NULL;
13010Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
13020Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
13030Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
13040Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
13050Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
13060Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
13070Sstevel@tonic-gate 			sqp->sq_info.si_value = sigevk.sigev_value;
13080Sstevel@tonic-gate 			head->lio_sigqp = sqp;
13090Sstevel@tonic-gate 		} else {
13100Sstevel@tonic-gate 			head->lio_sigqp = NULL;
13110Sstevel@tonic-gate 		}
13121885Sraf 		if (pkevtp) {
13131885Sraf 			/*
13141885Sraf 			 * Prepare data to send when list of aiocb's
13151885Sraf 			 * has completed.
13161885Sraf 			 */
13171885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
13181885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
13191885Sraf 			    NULL, head);
13201885Sraf 			pkevtp->portkev_events = AIOLIO;
13211885Sraf 			head->lio_portkev = pkevtp;
13221885Sraf 			head->lio_port = pnotify.portnfy_port;
13231885Sraf 		}
13240Sstevel@tonic-gate 	}
13250Sstevel@tonic-gate 
13260Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
13270Sstevel@tonic-gate 
13280Sstevel@tonic-gate 		cbp = *ucbp;
13290Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
13301885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
13310Sstevel@tonic-gate 			if (head) {
13320Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13330Sstevel@tonic-gate 				head->lio_nent--;
13340Sstevel@tonic-gate 				head->lio_refcnt--;
13350Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13360Sstevel@tonic-gate 			}
13370Sstevel@tonic-gate 			continue;
13380Sstevel@tonic-gate 		}
13390Sstevel@tonic-gate 
13400Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
13410Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
13420Sstevel@tonic-gate 		if (mode == LIO_NOP) {
13430Sstevel@tonic-gate 			cbp = NULL;
13440Sstevel@tonic-gate 			if (head) {
13450Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13460Sstevel@tonic-gate 				head->lio_nent--;
13470Sstevel@tonic-gate 				head->lio_refcnt--;
13480Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13490Sstevel@tonic-gate 			}
13500Sstevel@tonic-gate 			continue;
13510Sstevel@tonic-gate 		}
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
13540Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
13550Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13560Sstevel@tonic-gate 			if (head) {
13570Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13580Sstevel@tonic-gate 				head->lio_nent--;
13590Sstevel@tonic-gate 				head->lio_refcnt--;
13600Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13610Sstevel@tonic-gate 			}
13620Sstevel@tonic-gate 			aio_errors++;
13630Sstevel@tonic-gate 			continue;
13640Sstevel@tonic-gate 		}
13650Sstevel@tonic-gate 
13660Sstevel@tonic-gate 		/*
13670Sstevel@tonic-gate 		 * check the permission of the partition
13680Sstevel@tonic-gate 		 */
13690Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
13700Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
13710Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13720Sstevel@tonic-gate 			if (head) {
13730Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13740Sstevel@tonic-gate 				head->lio_nent--;
13750Sstevel@tonic-gate 				head->lio_refcnt--;
13760Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13770Sstevel@tonic-gate 			}
13780Sstevel@tonic-gate 			aio_errors++;
13790Sstevel@tonic-gate 			continue;
13800Sstevel@tonic-gate 		}
13810Sstevel@tonic-gate 
13820Sstevel@tonic-gate 		/*
13831885Sraf 		 * common case where requests are to the same fd
13841885Sraf 		 * for the same r/w operation.
13850Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
13860Sstevel@tonic-gate 		 */
13871885Sraf 		vp = fp->f_vnode;
13881885Sraf 		if (fp != prev_fp || mode != prev_mode) {
13890Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
13900Sstevel@tonic-gate 			if (aio_func == NULL) {
13910Sstevel@tonic-gate 				prev_fp = NULL;
13920Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
13930Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
13940Sstevel@tonic-gate 				aio_notsupported++;
13950Sstevel@tonic-gate 				if (head) {
13960Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
13970Sstevel@tonic-gate 					head->lio_nent--;
13980Sstevel@tonic-gate 					head->lio_refcnt--;
13990Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
14000Sstevel@tonic-gate 				}
14010Sstevel@tonic-gate 				continue;
14020Sstevel@tonic-gate 			} else {
14030Sstevel@tonic-gate 				prev_fp = fp;
14040Sstevel@tonic-gate 				prev_mode = mode;
14050Sstevel@tonic-gate 			}
14060Sstevel@tonic-gate 		}
14070Sstevel@tonic-gate 
14081885Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
14091885Sraf 		    &cbp->aio_resultp, vp);
14101885Sraf 		if (error) {
14110Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14120Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14130Sstevel@tonic-gate 			if (head) {
14140Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14150Sstevel@tonic-gate 				head->lio_nent--;
14160Sstevel@tonic-gate 				head->lio_refcnt--;
14170Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14180Sstevel@tonic-gate 			}
14190Sstevel@tonic-gate 			aio_errors++;
14200Sstevel@tonic-gate 			continue;
14210Sstevel@tonic-gate 		}
14220Sstevel@tonic-gate 
14230Sstevel@tonic-gate 		reqp->aio_req_lio = head;
14240Sstevel@tonic-gate 		deadhead = 0;
14250Sstevel@tonic-gate 
14260Sstevel@tonic-gate 		/*
14270Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
14280Sstevel@tonic-gate 		 * the driver to avoid a race condition
14290Sstevel@tonic-gate 		 */
14300Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
14310Sstevel@tonic-gate 		    EINPROGRESS);
14320Sstevel@tonic-gate 
14330Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = (caddr_t)cbp;
14340Sstevel@tonic-gate 
14351885Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
14361885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
14371885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
14381885Sraf 		if (aio_port | aio_thread) {
14391885Sraf 			port_kevent_t *lpkevp;
14401885Sraf 			/*
14411885Sraf 			 * Prepare data to send with each aiocb completed.
14421885Sraf 			 */
14431885Sraf 			if (aio_port) {
14441885Sraf 				void *paddr =
14451885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
14461885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
14471885Sraf 					error = EFAULT;
14481885Sraf 			} else {	/* aio_thread */
14491885Sraf 				pnotify.portnfy_port =
14501885Sraf 				    aiocb->aio_sigevent.sigev_signo;
14511885Sraf 				pnotify.portnfy_user =
14521885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
14531885Sraf 			}
14541885Sraf 			if (error)
14551885Sraf 				/* EMPTY */;
14561885Sraf 			else if (pkevtp != NULL &&
14571885Sraf 			    pnotify.portnfy_port == lio_head_port)
14581885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
14591885Sraf 				    PORT_ALLOC_DEFAULT);
14601885Sraf 			else
14611885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
14621885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
14631885Sraf 				    &lpkevp);
14641885Sraf 			if (error == 0) {
14651885Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
14661885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
14671885Sraf 				    aio_port_callback, reqp);
14681885Sraf 				lpkevp->portkev_events = event;
14691885Sraf 				reqp->aio_req_portkev = lpkevp;
14701885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
14711885Sraf 			}
14720Sstevel@tonic-gate 		}
14730Sstevel@tonic-gate 
14740Sstevel@tonic-gate 		/*
14750Sstevel@tonic-gate 		 * send the request to driver.
14760Sstevel@tonic-gate 		 */
14770Sstevel@tonic-gate 		if (error == 0) {
14780Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
14790Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
14800Sstevel@tonic-gate 				aio_zerolen(reqp);
14810Sstevel@tonic-gate 				continue;
14820Sstevel@tonic-gate 			}
14830Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
14840Sstevel@tonic-gate 			    CRED());
14850Sstevel@tonic-gate 		}
14861885Sraf 
14870Sstevel@tonic-gate 		/*
14880Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
14890Sstevel@tonic-gate 		 * completed unless there was an error.
14900Sstevel@tonic-gate 		 */
14910Sstevel@tonic-gate 		if (error) {
14920Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14930Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14940Sstevel@tonic-gate 			if (head) {
14950Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14960Sstevel@tonic-gate 				head->lio_nent--;
14970Sstevel@tonic-gate 				head->lio_refcnt--;
14980Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14990Sstevel@tonic-gate 			}
15000Sstevel@tonic-gate 			if (error == ENOTSUP)
15010Sstevel@tonic-gate 				aio_notsupported++;
15020Sstevel@tonic-gate 			else
15030Sstevel@tonic-gate 				aio_errors++;
15040Sstevel@tonic-gate 			lio_set_error(reqp);
15050Sstevel@tonic-gate 		} else {
15060Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
15070Sstevel@tonic-gate 		}
15080Sstevel@tonic-gate 	}
15090Sstevel@tonic-gate 
15100Sstevel@tonic-gate 	if (aio_notsupported) {
15110Sstevel@tonic-gate 		error = ENOTSUP;
15120Sstevel@tonic-gate 	} else if (aio_errors) {
15130Sstevel@tonic-gate 		/*
15140Sstevel@tonic-gate 		 * return EIO if any request failed
15150Sstevel@tonic-gate 		 */
15160Sstevel@tonic-gate 		error = EIO;
15170Sstevel@tonic-gate 	}
15180Sstevel@tonic-gate 
15190Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
15200Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
15210Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
15220Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
15230Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
15240Sstevel@tonic-gate 				error = EINTR;
15250Sstevel@tonic-gate 				goto done;
15260Sstevel@tonic-gate 			}
15270Sstevel@tonic-gate 		}
15280Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
15290Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
15300Sstevel@tonic-gate 	}
15310Sstevel@tonic-gate 
15320Sstevel@tonic-gate done:
15330Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
15340Sstevel@tonic-gate 	if (deadhead) {
15350Sstevel@tonic-gate 		if (head->lio_sigqp)
15360Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
15371885Sraf 		if (head->lio_portkev)
15381885Sraf 			port_free_event(head->lio_portkev);
15390Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
15400Sstevel@tonic-gate 	}
15410Sstevel@tonic-gate 	return (error);
15420Sstevel@tonic-gate }
15430Sstevel@tonic-gate 
15440Sstevel@tonic-gate #endif /* _LP64 */
15450Sstevel@tonic-gate 
15460Sstevel@tonic-gate /*
15470Sstevel@tonic-gate  * Asynchronous list IO.
15480Sstevel@tonic-gate  * If list I/O is called with LIO_WAIT it can still return
15490Sstevel@tonic-gate  * before all the I/O's are completed if a signal is caught
15500Sstevel@tonic-gate  * or if the list include UFS I/O requests. If this happens,
15510Sstevel@tonic-gate  * libaio will call aliowait() to wait for the I/O's to
15520Sstevel@tonic-gate  * complete
15530Sstevel@tonic-gate  */
15540Sstevel@tonic-gate /*ARGSUSED*/
15550Sstevel@tonic-gate static int
15560Sstevel@tonic-gate aliowait(
15570Sstevel@tonic-gate 	int	mode,
15580Sstevel@tonic-gate 	void	*aiocb,
15590Sstevel@tonic-gate 	int	nent,
15600Sstevel@tonic-gate 	void	*sigev,
15610Sstevel@tonic-gate 	int	run_mode)
15620Sstevel@tonic-gate {
15630Sstevel@tonic-gate 	aio_lio_t	*head;
15640Sstevel@tonic-gate 	aio_t		*aiop;
15650Sstevel@tonic-gate 	caddr_t		cbplist;
15660Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
15670Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15680Sstevel@tonic-gate 	aiocb32_t	*cbp32;
15690Sstevel@tonic-gate 	caddr32_t	*ucbp32;
15700Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
15710Sstevel@tonic-gate #endif
15720Sstevel@tonic-gate 	int		error = 0;
15730Sstevel@tonic-gate 	int		i;
15740Sstevel@tonic-gate 	size_t		ssize = 0;
15750Sstevel@tonic-gate 	model_t		model = get_udatamodel();
15760Sstevel@tonic-gate 
15770Sstevel@tonic-gate 	aiop = curproc->p_aio;
15780Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
15790Sstevel@tonic-gate 		return (EINVAL);
15800Sstevel@tonic-gate 
15810Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15820Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
15830Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15840Sstevel@tonic-gate 	else
15850Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
15860Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15870Sstevel@tonic-gate 
15880Sstevel@tonic-gate 	if (ssize == 0)
15890Sstevel@tonic-gate 		return (EINVAL);
15900Sstevel@tonic-gate 
15910Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
15920Sstevel@tonic-gate 
15930Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15940Sstevel@tonic-gate 		ucbp = (aiocb_t **)cbplist;
15950Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15960Sstevel@tonic-gate 	else
15970Sstevel@tonic-gate 		ucbp32 = (caddr32_t *)cbplist;
15980Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15990Sstevel@tonic-gate 
16000Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
16010Sstevel@tonic-gate 		error = EFAULT;
16020Sstevel@tonic-gate 		goto done;
16030Sstevel@tonic-gate 	}
16040Sstevel@tonic-gate 
16050Sstevel@tonic-gate 	/*
16060Sstevel@tonic-gate 	 * To find the list head, we go through the
16070Sstevel@tonic-gate 	 * list of aiocb structs, find the request
16080Sstevel@tonic-gate 	 * its for, then get the list head that reqp
16090Sstevel@tonic-gate 	 * points to
16100Sstevel@tonic-gate 	 */
16110Sstevel@tonic-gate 	head = NULL;
16120Sstevel@tonic-gate 
16130Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
16140Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE) {
16150Sstevel@tonic-gate 			/*
16160Sstevel@tonic-gate 			 * Since we are only checking for a NULL pointer
16170Sstevel@tonic-gate 			 * Following should work on both native data sizes
16180Sstevel@tonic-gate 			 * as well as for largefile aiocb.
16190Sstevel@tonic-gate 			 */
16200Sstevel@tonic-gate 			if ((cbp = *ucbp++) == NULL)
16210Sstevel@tonic-gate 				continue;
16220Sstevel@tonic-gate 			if (run_mode != AIO_LARGEFILE)
16230Sstevel@tonic-gate 				if (head = aio_list_get(&cbp->aio_resultp))
16240Sstevel@tonic-gate 					break;
16250Sstevel@tonic-gate 			else {
16260Sstevel@tonic-gate 				/*
16270Sstevel@tonic-gate 				 * This is a case when largefile call is
16280Sstevel@tonic-gate 				 * made on 32 bit kernel.
16290Sstevel@tonic-gate 				 * Treat each pointer as pointer to
16300Sstevel@tonic-gate 				 * aiocb64_32
16310Sstevel@tonic-gate 				 */
16320Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16330Sstevel@tonic-gate 				    &(((aiocb64_32_t *)cbp)->aio_resultp)))
16340Sstevel@tonic-gate 					break;
16350Sstevel@tonic-gate 			}
16360Sstevel@tonic-gate 		}
16370Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
16380Sstevel@tonic-gate 		else {
16390Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
16400Sstevel@tonic-gate 				if ((cbp64 = (aiocb64_32_t *)
16410Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16420Sstevel@tonic-gate 					continue;
16430Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16440Sstevel@tonic-gate 				    &cbp64->aio_resultp))
16450Sstevel@tonic-gate 					break;
16460Sstevel@tonic-gate 			} else if (run_mode == AIO_32) {
16470Sstevel@tonic-gate 				if ((cbp32 = (aiocb32_t *)
16480Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16490Sstevel@tonic-gate 					continue;
16500Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16510Sstevel@tonic-gate 				    &cbp32->aio_resultp))
16520Sstevel@tonic-gate 					break;
16530Sstevel@tonic-gate 			}
16540Sstevel@tonic-gate 		}
16550Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
16560Sstevel@tonic-gate 	}
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate 	if (head == NULL) {
16590Sstevel@tonic-gate 		error = EINVAL;
16600Sstevel@tonic-gate 		goto done;
16610Sstevel@tonic-gate 	}
16620Sstevel@tonic-gate 
16630Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
16640Sstevel@tonic-gate 	while (head->lio_refcnt > 0) {
16650Sstevel@tonic-gate 		if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
16660Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
16670Sstevel@tonic-gate 			error = EINTR;
16680Sstevel@tonic-gate 			goto done;
16690Sstevel@tonic-gate 		}
16700Sstevel@tonic-gate 	}
16710Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
16720Sstevel@tonic-gate 	alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
16730Sstevel@tonic-gate done:
16740Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
16750Sstevel@tonic-gate 	return (error);
16760Sstevel@tonic-gate }
16770Sstevel@tonic-gate 
16780Sstevel@tonic-gate aio_lio_t *
16790Sstevel@tonic-gate aio_list_get(aio_result_t *resultp)
16800Sstevel@tonic-gate {
16810Sstevel@tonic-gate 	aio_lio_t	*head = NULL;
16820Sstevel@tonic-gate 	aio_t		*aiop;
16830Sstevel@tonic-gate 	aio_req_t 	**bucket;
16840Sstevel@tonic-gate 	aio_req_t 	*reqp;
16850Sstevel@tonic-gate 	long		index;
16860Sstevel@tonic-gate 
16870Sstevel@tonic-gate 	aiop = curproc->p_aio;
16880Sstevel@tonic-gate 	if (aiop == NULL)
16890Sstevel@tonic-gate 		return (NULL);
16900Sstevel@tonic-gate 
16910Sstevel@tonic-gate 	if (resultp) {
16920Sstevel@tonic-gate 		index = AIO_HASH(resultp);
16930Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
16940Sstevel@tonic-gate 		for (reqp = *bucket; reqp != NULL;
16950Sstevel@tonic-gate 		    reqp = reqp->aio_hash_next) {
16960Sstevel@tonic-gate 			if (reqp->aio_req_resultp == resultp) {
16970Sstevel@tonic-gate 				head = reqp->aio_req_lio;
16980Sstevel@tonic-gate 				return (head);
16990Sstevel@tonic-gate 			}
17000Sstevel@tonic-gate 		}
17010Sstevel@tonic-gate 	}
17020Sstevel@tonic-gate 	return (NULL);
17030Sstevel@tonic-gate }
17040Sstevel@tonic-gate 
17050Sstevel@tonic-gate 
17060Sstevel@tonic-gate static void
17070Sstevel@tonic-gate lio_set_uerror(void *resultp, int error)
17080Sstevel@tonic-gate {
17090Sstevel@tonic-gate 	/*
17100Sstevel@tonic-gate 	 * the resultp field is a pointer to where the
17110Sstevel@tonic-gate 	 * error should be written out to the user's
17120Sstevel@tonic-gate 	 * aiocb.
17130Sstevel@tonic-gate 	 *
17140Sstevel@tonic-gate 	 */
17150Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
17160Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return,
17170Sstevel@tonic-gate 		    (ssize_t)-1);
17180Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
17190Sstevel@tonic-gate 	}
17200Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17210Sstevel@tonic-gate 	else {
17220Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
17230Sstevel@tonic-gate 		    (uint_t)-1);
17240Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
17250Sstevel@tonic-gate 	}
17260Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17270Sstevel@tonic-gate }
17280Sstevel@tonic-gate 
17290Sstevel@tonic-gate /*
17300Sstevel@tonic-gate  * do cleanup completion for all requests in list. memory for
17310Sstevel@tonic-gate  * each request is also freed.
17320Sstevel@tonic-gate  */
17330Sstevel@tonic-gate static void
17340Sstevel@tonic-gate alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
17350Sstevel@tonic-gate {
17360Sstevel@tonic-gate 	int i;
17370Sstevel@tonic-gate 	aio_req_t *reqp;
17380Sstevel@tonic-gate 	aio_result_t *resultp;
17391885Sraf 	aiocb64_32_t *aiocb_64;
17400Sstevel@tonic-gate 
17410Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
17420Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
17430Sstevel@tonic-gate 			if (cbp[i] == NULL)
17440Sstevel@tonic-gate 				continue;
17450Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
17460Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)cbp[i];
17471885Sraf 				resultp = (aio_result_t *)
17481885Sraf 				    &aiocb_64->aio_resultp;
17490Sstevel@tonic-gate 			} else
17500Sstevel@tonic-gate 				resultp = &cbp[i]->aio_resultp;
17510Sstevel@tonic-gate 		}
17520Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17530Sstevel@tonic-gate 		else {
17541885Sraf 			aiocb32_t *aiocb_32;
17551885Sraf 			caddr32_t *cbp32;
17560Sstevel@tonic-gate 
17570Sstevel@tonic-gate 			cbp32 = (caddr32_t *)cbp;
17580Sstevel@tonic-gate 			if (cbp32[i] == NULL)
17590Sstevel@tonic-gate 				continue;
17600Sstevel@tonic-gate 			if (run_mode == AIO_32) {
17610Sstevel@tonic-gate 				aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
17620Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_32->
17630Sstevel@tonic-gate 				    aio_resultp;
17640Sstevel@tonic-gate 			} else if (run_mode == AIO_LARGEFILE) {
17650Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
17660Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_64->
17670Sstevel@tonic-gate 				    aio_resultp;
17680Sstevel@tonic-gate 			}
17690Sstevel@tonic-gate 		}
17700Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17710Sstevel@tonic-gate 		/*
17720Sstevel@tonic-gate 		 * we need to get the aio_cleanupq_mutex since we call
17730Sstevel@tonic-gate 		 * aio_req_done().
17740Sstevel@tonic-gate 		 */
17750Sstevel@tonic-gate 		mutex_enter(&aiop->aio_cleanupq_mutex);
17760Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
17770Sstevel@tonic-gate 		reqp = aio_req_done(resultp);
17780Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
17790Sstevel@tonic-gate 		mutex_exit(&aiop->aio_cleanupq_mutex);
17800Sstevel@tonic-gate 		if (reqp != NULL) {
17810Sstevel@tonic-gate 			aphysio_unlock(reqp);
17820Sstevel@tonic-gate 			aio_copyout_result(reqp);
17830Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
17840Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
17850Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
17860Sstevel@tonic-gate 		}
17870Sstevel@tonic-gate 	}
17880Sstevel@tonic-gate }
17890Sstevel@tonic-gate 
17900Sstevel@tonic-gate /*
17911885Sraf  * Write out the results for an aio request that is done.
17920Sstevel@tonic-gate  */
17930Sstevel@tonic-gate static int
17940Sstevel@tonic-gate aioerror(void *cb, int run_mode)
17950Sstevel@tonic-gate {
17960Sstevel@tonic-gate 	aio_result_t *resultp;
17970Sstevel@tonic-gate 	aio_t *aiop;
17980Sstevel@tonic-gate 	aio_req_t *reqp;
17990Sstevel@tonic-gate 	int retval;
18000Sstevel@tonic-gate 
18010Sstevel@tonic-gate 	aiop = curproc->p_aio;
18020Sstevel@tonic-gate 	if (aiop == NULL || cb == NULL)
18030Sstevel@tonic-gate 		return (EINVAL);
18040Sstevel@tonic-gate 
18050Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
18060Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18070Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18080Sstevel@tonic-gate 			    aio_resultp;
18090Sstevel@tonic-gate 		else
18100Sstevel@tonic-gate 			resultp = &((aiocb_t *)cb)->aio_resultp;
18110Sstevel@tonic-gate 	}
18120Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18130Sstevel@tonic-gate 	else {
18140Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18150Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18160Sstevel@tonic-gate 			    aio_resultp;
18170Sstevel@tonic-gate 		else if (run_mode == AIO_32)
18180Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb32_t *)cb)->
18190Sstevel@tonic-gate 			    aio_resultp;
18200Sstevel@tonic-gate 	}
18210Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
18220Sstevel@tonic-gate 	/*
18230Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
18240Sstevel@tonic-gate 	 * aio_req_find().
18250Sstevel@tonic-gate 	 */
18260Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
18270Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18280Sstevel@tonic-gate 	retval = aio_req_find(resultp, &reqp);
18290Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
18300Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
18310Sstevel@tonic-gate 	if (retval == 0) {
18320Sstevel@tonic-gate 		aphysio_unlock(reqp);
18330Sstevel@tonic-gate 		aio_copyout_result(reqp);
18340Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
18350Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
18360Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
18370Sstevel@tonic-gate 		return (0);
18380Sstevel@tonic-gate 	} else if (retval == 1)
18390Sstevel@tonic-gate 		return (EINPROGRESS);
18400Sstevel@tonic-gate 	else if (retval == 2)
18410Sstevel@tonic-gate 		return (EINVAL);
18420Sstevel@tonic-gate 	return (0);
18430Sstevel@tonic-gate }
18440Sstevel@tonic-gate 
18450Sstevel@tonic-gate /*
18460Sstevel@tonic-gate  * 	aio_cancel - if no requests outstanding,
18470Sstevel@tonic-gate  *			return AIO_ALLDONE
18480Sstevel@tonic-gate  *			else
18490Sstevel@tonic-gate  *			return AIO_NOTCANCELED
18500Sstevel@tonic-gate  */
18510Sstevel@tonic-gate static int
18520Sstevel@tonic-gate aio_cancel(
18530Sstevel@tonic-gate 	int	fildes,
18540Sstevel@tonic-gate 	void 	*cb,
18550Sstevel@tonic-gate 	long	*rval,
18560Sstevel@tonic-gate 	int	run_mode)
18570Sstevel@tonic-gate {
18580Sstevel@tonic-gate 	aio_t *aiop;
18590Sstevel@tonic-gate 	void *resultp;
18600Sstevel@tonic-gate 	int index;
18610Sstevel@tonic-gate 	aio_req_t **bucket;
18620Sstevel@tonic-gate 	aio_req_t *ent;
18630Sstevel@tonic-gate 
18640Sstevel@tonic-gate 
18650Sstevel@tonic-gate 	/*
18660Sstevel@tonic-gate 	 * Verify valid file descriptor
18670Sstevel@tonic-gate 	 */
18680Sstevel@tonic-gate 	if ((getf(fildes)) == NULL) {
18690Sstevel@tonic-gate 		return (EBADF);
18700Sstevel@tonic-gate 	}
18710Sstevel@tonic-gate 	releasef(fildes);
18720Sstevel@tonic-gate 
18730Sstevel@tonic-gate 	aiop = curproc->p_aio;
18740Sstevel@tonic-gate 	if (aiop == NULL)
18750Sstevel@tonic-gate 		return (EINVAL);
18760Sstevel@tonic-gate 
18770Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0) {
18780Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
18790Sstevel@tonic-gate 		return (0);
18800Sstevel@tonic-gate 	}
18810Sstevel@tonic-gate 
18820Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18830Sstevel@tonic-gate 	if (cb != NULL) {
18840Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
18850Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18860Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18870Sstevel@tonic-gate 				    ->aio_resultp;
18880Sstevel@tonic-gate 			else
18890Sstevel@tonic-gate 				resultp = &((aiocb_t *)cb)->aio_resultp;
18900Sstevel@tonic-gate 		}
18910Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18920Sstevel@tonic-gate 		else {
18930Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18940Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18950Sstevel@tonic-gate 				    ->aio_resultp;
18960Sstevel@tonic-gate 			else if (run_mode == AIO_32)
18970Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb32_t *)cb)
18980Sstevel@tonic-gate 				    ->aio_resultp;
18990Sstevel@tonic-gate 		}
19000Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
19010Sstevel@tonic-gate 		index = AIO_HASH(resultp);
19020Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19030Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19040Sstevel@tonic-gate 			if (ent->aio_req_resultp == resultp) {
19050Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) == 0) {
19060Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19070Sstevel@tonic-gate 					*rval = AIO_ALLDONE;
19080Sstevel@tonic-gate 					return (0);
19090Sstevel@tonic-gate 				}
19100Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
19110Sstevel@tonic-gate 				*rval = AIO_NOTCANCELED;
19120Sstevel@tonic-gate 				return (0);
19130Sstevel@tonic-gate 			}
19140Sstevel@tonic-gate 		}
19150Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
19160Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
19170Sstevel@tonic-gate 		return (0);
19180Sstevel@tonic-gate 	}
19190Sstevel@tonic-gate 
19200Sstevel@tonic-gate 	for (index = 0; index < AIO_HASHSZ; index++) {
19210Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19220Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19230Sstevel@tonic-gate 			if (ent->aio_req_fd == fildes) {
19240Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) != 0) {
19250Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19260Sstevel@tonic-gate 					*rval = AIO_NOTCANCELED;
19270Sstevel@tonic-gate 					return (0);
19280Sstevel@tonic-gate 				}
19290Sstevel@tonic-gate 			}
19300Sstevel@tonic-gate 		}
19310Sstevel@tonic-gate 	}
19320Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
19330Sstevel@tonic-gate 	*rval = AIO_ALLDONE;
19340Sstevel@tonic-gate 	return (0);
19350Sstevel@tonic-gate }
19360Sstevel@tonic-gate 
19370Sstevel@tonic-gate /*
19380Sstevel@tonic-gate  * solaris version of asynchronous read and write
19390Sstevel@tonic-gate  */
19400Sstevel@tonic-gate static int
19410Sstevel@tonic-gate arw(
19420Sstevel@tonic-gate 	int	opcode,
19430Sstevel@tonic-gate 	int	fdes,
19440Sstevel@tonic-gate 	char	*bufp,
19450Sstevel@tonic-gate 	int	bufsize,
19460Sstevel@tonic-gate 	offset_t	offset,
19470Sstevel@tonic-gate 	aio_result_t	*resultp,
19480Sstevel@tonic-gate 	int		mode)
19490Sstevel@tonic-gate {
19500Sstevel@tonic-gate 	file_t		*fp;
19510Sstevel@tonic-gate 	int		error;
19520Sstevel@tonic-gate 	struct vnode	*vp;
19530Sstevel@tonic-gate 	aio_req_t	*reqp;
19540Sstevel@tonic-gate 	aio_t		*aiop;
19550Sstevel@tonic-gate 	int		(*aio_func)();
19560Sstevel@tonic-gate #ifdef _LP64
19570Sstevel@tonic-gate 	aiocb_t		aiocb;
19580Sstevel@tonic-gate #else
19590Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
19600Sstevel@tonic-gate #endif
19610Sstevel@tonic-gate 
19620Sstevel@tonic-gate 	aiop = curproc->p_aio;
19630Sstevel@tonic-gate 	if (aiop == NULL)
19640Sstevel@tonic-gate 		return (EINVAL);
19650Sstevel@tonic-gate 
19660Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL) {
19670Sstevel@tonic-gate 		return (EBADF);
19680Sstevel@tonic-gate 	}
19690Sstevel@tonic-gate 
19700Sstevel@tonic-gate 	/*
19710Sstevel@tonic-gate 	 * check the permission of the partition
19720Sstevel@tonic-gate 	 */
19730Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
19740Sstevel@tonic-gate 		releasef(fdes);
19750Sstevel@tonic-gate 		return (EBADF);
19760Sstevel@tonic-gate 	}
19770Sstevel@tonic-gate 
19780Sstevel@tonic-gate 	vp = fp->f_vnode;
19790Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
19800Sstevel@tonic-gate 	if (aio_func == NULL) {
19810Sstevel@tonic-gate 		releasef(fdes);
19820Sstevel@tonic-gate 		return (EBADFD);
19830Sstevel@tonic-gate 	}
19840Sstevel@tonic-gate #ifdef _LP64
19850Sstevel@tonic-gate 	aiocb.aio_fildes = fdes;
19860Sstevel@tonic-gate 	aiocb.aio_buf = bufp;
19870Sstevel@tonic-gate 	aiocb.aio_nbytes = bufsize;
19880Sstevel@tonic-gate 	aiocb.aio_offset = offset;
19890Sstevel@tonic-gate 	aiocb.aio_sigevent.sigev_notify = 0;
19901885Sraf 	error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
19910Sstevel@tonic-gate #else
19920Sstevel@tonic-gate 	aiocb64.aio_fildes = fdes;
19930Sstevel@tonic-gate 	aiocb64.aio_buf = (caddr32_t)bufp;
19940Sstevel@tonic-gate 	aiocb64.aio_nbytes = bufsize;
19950Sstevel@tonic-gate 	aiocb64.aio_offset = offset;
19960Sstevel@tonic-gate 	aiocb64.aio_sigevent.sigev_notify = 0;
19971885Sraf 	error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
19980Sstevel@tonic-gate #endif
19990Sstevel@tonic-gate 	if (error) {
20000Sstevel@tonic-gate 		releasef(fdes);
20010Sstevel@tonic-gate 		return (error);
20020Sstevel@tonic-gate 	}
20030Sstevel@tonic-gate 
20040Sstevel@tonic-gate 	/*
20050Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
20060Sstevel@tonic-gate 	 * the AIO poll bit set
20070Sstevel@tonic-gate 	 */
20080Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
20090Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
20100Sstevel@tonic-gate 
20110Sstevel@tonic-gate 	if (bufsize == 0) {
20120Sstevel@tonic-gate 		clear_active_fd(fdes);
20130Sstevel@tonic-gate 		aio_zerolen(reqp);
20140Sstevel@tonic-gate 		return (0);
20150Sstevel@tonic-gate 	}
20160Sstevel@tonic-gate 	/*
20170Sstevel@tonic-gate 	 * send the request to driver.
20180Sstevel@tonic-gate 	 */
20190Sstevel@tonic-gate 	error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
20200Sstevel@tonic-gate 	/*
20210Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
20220Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
20230Sstevel@tonic-gate 	 * completed.
20240Sstevel@tonic-gate 	 */
20250Sstevel@tonic-gate 	if (error) {
20260Sstevel@tonic-gate 		releasef(fdes);
20270Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
20280Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
20290Sstevel@tonic-gate 		aiop->aio_pending--;
20300Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
20310Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
20320Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
20330Sstevel@tonic-gate 		return (error);
20340Sstevel@tonic-gate 	}
20350Sstevel@tonic-gate 	clear_active_fd(fdes);
20360Sstevel@tonic-gate 	return (0);
20370Sstevel@tonic-gate }
20380Sstevel@tonic-gate 
20390Sstevel@tonic-gate /*
20400Sstevel@tonic-gate  * posix version of asynchronous read and write
20410Sstevel@tonic-gate  */
20421885Sraf static int
20430Sstevel@tonic-gate aiorw(
20440Sstevel@tonic-gate 	int		opcode,
20450Sstevel@tonic-gate 	void		*aiocb_arg,
20460Sstevel@tonic-gate 	int		mode,
20470Sstevel@tonic-gate 	int		run_mode)
20480Sstevel@tonic-gate {
20490Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
20500Sstevel@tonic-gate 	aiocb32_t	aiocb32;
20510Sstevel@tonic-gate 	struct	sigevent32 *sigev32;
20520Sstevel@tonic-gate 	port_notify32_t	pntfy32;
20530Sstevel@tonic-gate #endif
20540Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
20550Sstevel@tonic-gate 	aiocb_t		aiocb;
20560Sstevel@tonic-gate 	file_t		*fp;
20570Sstevel@tonic-gate 	int		error, fd;
20580Sstevel@tonic-gate 	size_t		bufsize;
20590Sstevel@tonic-gate 	struct vnode	*vp;
20600Sstevel@tonic-gate 	aio_req_t	*reqp;
20610Sstevel@tonic-gate 	aio_t		*aiop;
20620Sstevel@tonic-gate 	int		(*aio_func)();
20630Sstevel@tonic-gate 	aio_result_t	*resultp;
20640Sstevel@tonic-gate 	struct	sigevent *sigev;
20650Sstevel@tonic-gate 	model_t		model;
20660Sstevel@tonic-gate 	int		aio_use_port = 0;
20670Sstevel@tonic-gate 	port_notify_t	pntfy;
20680Sstevel@tonic-gate 
20690Sstevel@tonic-gate 	model = get_udatamodel();
20700Sstevel@tonic-gate 	aiop = curproc->p_aio;
20710Sstevel@tonic-gate 	if (aiop == NULL)
20720Sstevel@tonic-gate 		return (EINVAL);
20730Sstevel@tonic-gate 
20740Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
20750Sstevel@tonic-gate 		if (run_mode != AIO_LARGEFILE) {
20760Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
20770Sstevel@tonic-gate 				return (EFAULT);
20780Sstevel@tonic-gate 			bufsize = aiocb.aio_nbytes;
20790Sstevel@tonic-gate 			resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
20800Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
20810Sstevel@tonic-gate 				return (EBADF);
20820Sstevel@tonic-gate 			}
20830Sstevel@tonic-gate 			sigev = &aiocb.aio_sigevent;
20840Sstevel@tonic-gate 		} else {
20850Sstevel@tonic-gate 			/*
20860Sstevel@tonic-gate 			 * We come here only when we make largefile
20870Sstevel@tonic-gate 			 * call on 32 bit kernel using 32 bit library.
20880Sstevel@tonic-gate 			 */
20890Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
20900Sstevel@tonic-gate 				return (EFAULT);
20910Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
20920Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
20930Sstevel@tonic-gate 			    ->aio_resultp);
20941885Sraf 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
20950Sstevel@tonic-gate 				return (EBADF);
20960Sstevel@tonic-gate 			sigev = (struct sigevent *)&aiocb64.aio_sigevent;
20970Sstevel@tonic-gate 		}
20980Sstevel@tonic-gate 
20990Sstevel@tonic-gate 		if (sigev->sigev_notify == SIGEV_PORT) {
21000Sstevel@tonic-gate 			if (copyin((void *)sigev->sigev_value.sival_ptr,
21010Sstevel@tonic-gate 			    &pntfy, sizeof (port_notify_t))) {
21020Sstevel@tonic-gate 				releasef(fd);
21030Sstevel@tonic-gate 				return (EFAULT);
21040Sstevel@tonic-gate 			}
21050Sstevel@tonic-gate 			aio_use_port = 1;
21061885Sraf 		} else if (sigev->sigev_notify == SIGEV_THREAD) {
21071885Sraf 			pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
21081885Sraf 			pntfy.portnfy_user =
21091885Sraf 			    aiocb.aio_sigevent.sigev_value.sival_ptr;
21101885Sraf 			aio_use_port = 1;
21110Sstevel@tonic-gate 		}
21120Sstevel@tonic-gate 	}
21130Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
21140Sstevel@tonic-gate 	else {
21150Sstevel@tonic-gate 		if (run_mode == AIO_32) {
21160Sstevel@tonic-gate 			/* 32 bit system call is being made on 64 bit kernel */
21170Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
21180Sstevel@tonic-gate 				return (EFAULT);
21190Sstevel@tonic-gate 
21200Sstevel@tonic-gate 			bufsize = aiocb32.aio_nbytes;
21210Sstevel@tonic-gate 			aiocb_32ton(&aiocb32, &aiocb);
21220Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
21230Sstevel@tonic-gate 			    aio_resultp);
21240Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
21250Sstevel@tonic-gate 				return (EBADF);
21260Sstevel@tonic-gate 			}
21270Sstevel@tonic-gate 			sigev32 = &aiocb32.aio_sigevent;
21280Sstevel@tonic-gate 		} else if (run_mode == AIO_LARGEFILE) {
21290Sstevel@tonic-gate 			/*
21300Sstevel@tonic-gate 			 * We come here only when we make largefile
21310Sstevel@tonic-gate 			 * call on 64 bit kernel using 32 bit library.
21320Sstevel@tonic-gate 			 */
21330Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
21340Sstevel@tonic-gate 				return (EFAULT);
21350Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
21360Sstevel@tonic-gate 			aiocb_LFton(&aiocb64, &aiocb);
21370Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
21380Sstevel@tonic-gate 			    ->aio_resultp);
21390Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
21400Sstevel@tonic-gate 				return (EBADF);
21410Sstevel@tonic-gate 			sigev32 = &aiocb64.aio_sigevent;
21420Sstevel@tonic-gate 		}
21430Sstevel@tonic-gate 
21440Sstevel@tonic-gate 		if (sigev32->sigev_notify == SIGEV_PORT) {
21450Sstevel@tonic-gate 			if (copyin(
21460Sstevel@tonic-gate 			    (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
21470Sstevel@tonic-gate 			    &pntfy32, sizeof (port_notify32_t))) {
21480Sstevel@tonic-gate 				releasef(fd);
21490Sstevel@tonic-gate 				return (EFAULT);
21500Sstevel@tonic-gate 			}
21510Sstevel@tonic-gate 			pntfy.portnfy_port = pntfy32.portnfy_port;
21521885Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
21531885Sraf 			    pntfy32.portnfy_user;
21541885Sraf 			aio_use_port = 1;
21551885Sraf 		} else if (sigev32->sigev_notify == SIGEV_THREAD) {
21561885Sraf 			pntfy.portnfy_port = sigev32->sigev_signo;
21571885Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
21581885Sraf 			    sigev32->sigev_value.sival_ptr;
21590Sstevel@tonic-gate 			aio_use_port = 1;
21600Sstevel@tonic-gate 		}
21610Sstevel@tonic-gate 	}
21620Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
21630Sstevel@tonic-gate 
21640Sstevel@tonic-gate 	/*
21650Sstevel@tonic-gate 	 * check the permission of the partition
21660Sstevel@tonic-gate 	 */
21670Sstevel@tonic-gate 
21680Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
21690Sstevel@tonic-gate 		releasef(fd);
21700Sstevel@tonic-gate 		return (EBADF);
21710Sstevel@tonic-gate 	}
21720Sstevel@tonic-gate 
21730Sstevel@tonic-gate 	vp = fp->f_vnode;
21740Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
21750Sstevel@tonic-gate 	if (aio_func == NULL) {
21760Sstevel@tonic-gate 		releasef(fd);
21770Sstevel@tonic-gate 		return (EBADFD);
21780Sstevel@tonic-gate 	}
21791885Sraf 	if (run_mode == AIO_LARGEFILE)
21801885Sraf 		error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
21810Sstevel@tonic-gate 	else
21821885Sraf 		error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
21830Sstevel@tonic-gate 
21840Sstevel@tonic-gate 	if (error) {
21850Sstevel@tonic-gate 		releasef(fd);
21860Sstevel@tonic-gate 		return (error);
21870Sstevel@tonic-gate 	}
21880Sstevel@tonic-gate 	/*
21890Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
21900Sstevel@tonic-gate 	 * the AIO poll bit set
21910Sstevel@tonic-gate 	 */
21920Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
21930Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
21940Sstevel@tonic-gate 
21950Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
21960Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = aiocb_arg;
21970Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
21980Sstevel@tonic-gate 	else
21990Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
22000Sstevel@tonic-gate #endif
22010Sstevel@tonic-gate 
22021885Sraf 	if (aio_use_port) {
22031885Sraf 		int event = (run_mode == AIO_LARGEFILE)?
22041885Sraf 		    ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
22051885Sraf 		    ((mode == FREAD)? AIOAREAD : AIOAWRITE);
22061885Sraf 		error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
22071885Sraf 	}
22080Sstevel@tonic-gate 
22090Sstevel@tonic-gate 	/*
22100Sstevel@tonic-gate 	 * send the request to driver.
22110Sstevel@tonic-gate 	 */
22120Sstevel@tonic-gate 	if (error == 0) {
22130Sstevel@tonic-gate 		if (bufsize == 0) {
22140Sstevel@tonic-gate 			clear_active_fd(fd);
22150Sstevel@tonic-gate 			aio_zerolen(reqp);
22160Sstevel@tonic-gate 			return (0);
22170Sstevel@tonic-gate 		}
22180Sstevel@tonic-gate 		error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
22190Sstevel@tonic-gate 	}
22200Sstevel@tonic-gate 
22210Sstevel@tonic-gate 	/*
22220Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
22230Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
22240Sstevel@tonic-gate 	 * completed.
22250Sstevel@tonic-gate 	 */
22260Sstevel@tonic-gate 	if (error) {
22270Sstevel@tonic-gate 		releasef(fd);
22280Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
22291885Sraf 		aio_deq(&aiop->aio_portpending, reqp);
22300Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
22310Sstevel@tonic-gate 		aiop->aio_pending--;
22320Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
22330Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
22340Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
22350Sstevel@tonic-gate 		return (error);
22360Sstevel@tonic-gate 	}
22370Sstevel@tonic-gate 	clear_active_fd(fd);
22380Sstevel@tonic-gate 	return (0);
22390Sstevel@tonic-gate }
22400Sstevel@tonic-gate 
22410Sstevel@tonic-gate 
22420Sstevel@tonic-gate /*
22430Sstevel@tonic-gate  * set error for a list IO entry that failed.
22440Sstevel@tonic-gate  */
22450Sstevel@tonic-gate static void
22460Sstevel@tonic-gate lio_set_error(aio_req_t *reqp)
22470Sstevel@tonic-gate {
22480Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22490Sstevel@tonic-gate 
22500Sstevel@tonic-gate 	if (aiop == NULL)
22510Sstevel@tonic-gate 		return;
22520Sstevel@tonic-gate 
22530Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
22541885Sraf 	aio_deq(&aiop->aio_portpending, reqp);
22550Sstevel@tonic-gate 	aiop->aio_pending--;
22560Sstevel@tonic-gate 	/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
22570Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PHYSIODONE;
22580Sstevel@tonic-gate 	/*
22590Sstevel@tonic-gate 	 * Need to free the request now as its never
22600Sstevel@tonic-gate 	 * going to get on the done queue
22610Sstevel@tonic-gate 	 *
22620Sstevel@tonic-gate 	 * Note: aio_outstanding is decremented in
22630Sstevel@tonic-gate 	 *	 aio_req_free()
22640Sstevel@tonic-gate 	 */
22650Sstevel@tonic-gate 	aio_req_free(aiop, reqp);
22660Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK)
22670Sstevel@tonic-gate 		cv_signal(&aiop->aio_cleanupcv);
22680Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
22690Sstevel@tonic-gate }
22700Sstevel@tonic-gate 
22710Sstevel@tonic-gate /*
22720Sstevel@tonic-gate  * check if a specified request is done, and remove it from
22730Sstevel@tonic-gate  * the done queue. otherwise remove anybody from the done queue
22740Sstevel@tonic-gate  * if NULL is specified.
22750Sstevel@tonic-gate  */
22760Sstevel@tonic-gate static aio_req_t *
22770Sstevel@tonic-gate aio_req_done(void *resultp)
22780Sstevel@tonic-gate {
22790Sstevel@tonic-gate 	aio_req_t **bucket;
22800Sstevel@tonic-gate 	aio_req_t *ent;
22810Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22820Sstevel@tonic-gate 	long index;
22830Sstevel@tonic-gate 
22840Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
22850Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
22860Sstevel@tonic-gate 
22870Sstevel@tonic-gate 	if (resultp) {
22880Sstevel@tonic-gate 		index = AIO_HASH(resultp);
22890Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
22900Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
22910Sstevel@tonic-gate 			if (ent->aio_req_resultp == (aio_result_t *)resultp) {
22920Sstevel@tonic-gate 				if (ent->aio_req_flags & AIO_DONEQ) {
22930Sstevel@tonic-gate 					return (aio_req_remove(ent));
22940Sstevel@tonic-gate 				}
22950Sstevel@tonic-gate 				return (NULL);
22960Sstevel@tonic-gate 			}
22970Sstevel@tonic-gate 		}
22980Sstevel@tonic-gate 		/* no match, resultp is invalid */
22990Sstevel@tonic-gate 		return (NULL);
23000Sstevel@tonic-gate 	}
23010Sstevel@tonic-gate 	return (aio_req_remove(NULL));
23020Sstevel@tonic-gate }
23030Sstevel@tonic-gate 
23040Sstevel@tonic-gate /*
23050Sstevel@tonic-gate  * determine if a user-level resultp pointer is associated with an
23060Sstevel@tonic-gate  * active IO request. Zero is returned when the request is done,
23070Sstevel@tonic-gate  * and the request is removed from the done queue. Only when the
23080Sstevel@tonic-gate  * return value is zero, is the "reqp" pointer valid. One is returned
23090Sstevel@tonic-gate  * when the request is inprogress. Two is returned when the request
23100Sstevel@tonic-gate  * is invalid.
23110Sstevel@tonic-gate  */
23120Sstevel@tonic-gate static int
23130Sstevel@tonic-gate aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
23140Sstevel@tonic-gate {
23150Sstevel@tonic-gate 	aio_req_t **bucket;
23160Sstevel@tonic-gate 	aio_req_t *ent;
23170Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23180Sstevel@tonic-gate 	long index;
23190Sstevel@tonic-gate 
23200Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
23210Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23220Sstevel@tonic-gate 
23230Sstevel@tonic-gate 	index = AIO_HASH(resultp);
23240Sstevel@tonic-gate 	bucket = &aiop->aio_hash[index];
23250Sstevel@tonic-gate 	for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
23260Sstevel@tonic-gate 		if (ent->aio_req_resultp == resultp) {
23270Sstevel@tonic-gate 			if (ent->aio_req_flags & AIO_DONEQ) {
23280Sstevel@tonic-gate 				*reqp = aio_req_remove(ent);
23290Sstevel@tonic-gate 				return (0);
23300Sstevel@tonic-gate 			}
23310Sstevel@tonic-gate 			return (1);
23320Sstevel@tonic-gate 		}
23330Sstevel@tonic-gate 	}
23340Sstevel@tonic-gate 	/* no match, resultp is invalid */
23350Sstevel@tonic-gate 	return (2);
23360Sstevel@tonic-gate }
23370Sstevel@tonic-gate 
23380Sstevel@tonic-gate /*
23390Sstevel@tonic-gate  * remove a request from the done queue.
23400Sstevel@tonic-gate  */
23410Sstevel@tonic-gate static aio_req_t *
23420Sstevel@tonic-gate aio_req_remove(aio_req_t *reqp)
23430Sstevel@tonic-gate {
23440Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23450Sstevel@tonic-gate 
23460Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23470Sstevel@tonic-gate 
23481885Sraf 	if (reqp != NULL) {
23490Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23500Sstevel@tonic-gate 		if (reqp->aio_req_next == reqp) {
23510Sstevel@tonic-gate 			/* only one request on queue */
23520Sstevel@tonic-gate 			if (reqp ==  aiop->aio_doneq) {
23530Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
23540Sstevel@tonic-gate 			} else {
23550Sstevel@tonic-gate 				ASSERT(reqp == aiop->aio_cleanupq);
23560Sstevel@tonic-gate 				aiop->aio_cleanupq = NULL;
23570Sstevel@tonic-gate 			}
23580Sstevel@tonic-gate 		} else {
23590Sstevel@tonic-gate 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23600Sstevel@tonic-gate 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23610Sstevel@tonic-gate 			/*
23620Sstevel@tonic-gate 			 * The request can be either on the aio_doneq or the
23630Sstevel@tonic-gate 			 * aio_cleanupq
23640Sstevel@tonic-gate 			 */
23650Sstevel@tonic-gate 			if (reqp == aiop->aio_doneq)
23660Sstevel@tonic-gate 				aiop->aio_doneq = reqp->aio_req_next;
23670Sstevel@tonic-gate 
23680Sstevel@tonic-gate 			if (reqp == aiop->aio_cleanupq)
23690Sstevel@tonic-gate 				aiop->aio_cleanupq = reqp->aio_req_next;
23700Sstevel@tonic-gate 		}
23710Sstevel@tonic-gate 		reqp->aio_req_flags &= ~AIO_DONEQ;
23721885Sraf 		reqp->aio_req_next = NULL;
23731885Sraf 		reqp->aio_req_prev = NULL;
23741885Sraf 	} else if ((reqp = aiop->aio_doneq) != NULL) {
23751885Sraf 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23761885Sraf 		if (reqp == reqp->aio_req_next) {
23770Sstevel@tonic-gate 			/* only one request on queue */
23780Sstevel@tonic-gate 			aiop->aio_doneq = NULL;
23790Sstevel@tonic-gate 		} else {
23801885Sraf 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23811885Sraf 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23821885Sraf 			aiop->aio_doneq = reqp->aio_req_next;
23830Sstevel@tonic-gate 		}
23841885Sraf 		reqp->aio_req_flags &= ~AIO_DONEQ;
23851885Sraf 		reqp->aio_req_next = NULL;
23861885Sraf 		reqp->aio_req_prev = NULL;
23870Sstevel@tonic-gate 	}
23881885Sraf 	if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
23891885Sraf 		cv_broadcast(&aiop->aio_waitcv);
23901885Sraf 	return (reqp);
23910Sstevel@tonic-gate }
23920Sstevel@tonic-gate 
23930Sstevel@tonic-gate static int
23940Sstevel@tonic-gate aio_req_setup(
23950Sstevel@tonic-gate 	aio_req_t	**reqpp,
23960Sstevel@tonic-gate 	aio_t 		*aiop,
23970Sstevel@tonic-gate 	aiocb_t 	*arg,
23980Sstevel@tonic-gate 	aio_result_t 	*resultp,
23990Sstevel@tonic-gate 	vnode_t		*vp)
24000Sstevel@tonic-gate {
24011885Sraf 	sigqueue_t	*sqp = NULL;
24020Sstevel@tonic-gate 	aio_req_t 	*reqp;
24030Sstevel@tonic-gate 	struct uio 	*uio;
24040Sstevel@tonic-gate 	struct sigevent *sigev;
24050Sstevel@tonic-gate 	int		error;
24060Sstevel@tonic-gate 
24070Sstevel@tonic-gate 	sigev = &arg->aio_sigevent;
24081885Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
24091885Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
24100Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
24110Sstevel@tonic-gate 		if (sqp == NULL)
24120Sstevel@tonic-gate 			return (EAGAIN);
24130Sstevel@tonic-gate 		sqp->sq_func = NULL;
24140Sstevel@tonic-gate 		sqp->sq_next = NULL;
24150Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
24160Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
24170Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
24180Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
24190Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
24200Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
24210Sstevel@tonic-gate 		sqp->sq_info.si_value = sigev->sigev_value;
24221885Sraf 	}
24230Sstevel@tonic-gate 
24240Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
24250Sstevel@tonic-gate 
24260Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
24270Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24280Sstevel@tonic-gate 		if (sqp)
24290Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24300Sstevel@tonic-gate 		return (EIO);
24310Sstevel@tonic-gate 	}
24320Sstevel@tonic-gate 	/*
24330Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
24340Sstevel@tonic-gate 	 * from dynamic memory.
24350Sstevel@tonic-gate 	 */
24360Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
24370Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24380Sstevel@tonic-gate 		if (sqp)
24390Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24400Sstevel@tonic-gate 		return (error);
24410Sstevel@tonic-gate 	}
24420Sstevel@tonic-gate 	aiop->aio_pending++;
24430Sstevel@tonic-gate 	aiop->aio_outstanding++;
24440Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
24451885Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
24461885Sraf 	    sigev->sigev_notify == SIGEV_PORT)
24471885Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
24480Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
24490Sstevel@tonic-gate 	/*
24500Sstevel@tonic-gate 	 * initialize aio request.
24510Sstevel@tonic-gate 	 */
24520Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
24530Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
24540Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
24551885Sraf 	reqp->aio_req_lio = NULL;
24560Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
24570Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
24580Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
24590Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
24600Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
24610Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
24620Sstevel@tonic-gate 	*reqpp = reqp;
24630Sstevel@tonic-gate 	return (0);
24640Sstevel@tonic-gate }
24650Sstevel@tonic-gate 
24660Sstevel@tonic-gate /*
24670Sstevel@tonic-gate  * Allocate p_aio struct.
24680Sstevel@tonic-gate  */
24690Sstevel@tonic-gate static aio_t *
24700Sstevel@tonic-gate aio_aiop_alloc(void)
24710Sstevel@tonic-gate {
24720Sstevel@tonic-gate 	aio_t	*aiop;
24730Sstevel@tonic-gate 
24740Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&curproc->p_lock));
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 	aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
24770Sstevel@tonic-gate 	if (aiop) {
24780Sstevel@tonic-gate 		mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
24790Sstevel@tonic-gate 		mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
24800Sstevel@tonic-gate 									NULL);
24810Sstevel@tonic-gate 		mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
24820Sstevel@tonic-gate 	}
24830Sstevel@tonic-gate 	return (aiop);
24840Sstevel@tonic-gate }
24850Sstevel@tonic-gate 
24860Sstevel@tonic-gate /*
24870Sstevel@tonic-gate  * Allocate an aio_req struct.
24880Sstevel@tonic-gate  */
24890Sstevel@tonic-gate static int
24900Sstevel@tonic-gate aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
24910Sstevel@tonic-gate {
24920Sstevel@tonic-gate 	aio_req_t *reqp;
24930Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
24940Sstevel@tonic-gate 
24950Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
24960Sstevel@tonic-gate 
24970Sstevel@tonic-gate 	if ((reqp = aiop->aio_free) != NULL) {
24980Sstevel@tonic-gate 		aiop->aio_free = reqp->aio_req_next;
24991885Sraf 		bzero(reqp, sizeof (*reqp));
25000Sstevel@tonic-gate 	} else {
25010Sstevel@tonic-gate 		/*
25020Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25030Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25040Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25050Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25060Sstevel@tonic-gate 		 */
25070Sstevel@tonic-gate 		if (freemem < desfree)
25080Sstevel@tonic-gate 			return (EAGAIN);
25090Sstevel@tonic-gate 		reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
25100Sstevel@tonic-gate 		if (reqp == NULL)
25110Sstevel@tonic-gate 			return (EAGAIN);
25120Sstevel@tonic-gate 	}
25131885Sraf 	reqp->aio_req.aio_uio = &reqp->aio_req_uio;
25141885Sraf 	reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
25151885Sraf 	reqp->aio_req.aio_private = reqp;
25160Sstevel@tonic-gate 	reqp->aio_req_buf.b_offset = -1;
25170Sstevel@tonic-gate 	reqp->aio_req_resultp = resultp;
25180Sstevel@tonic-gate 	if (aio_hash_insert(reqp, aiop)) {
25190Sstevel@tonic-gate 		reqp->aio_req_next = aiop->aio_free;
25200Sstevel@tonic-gate 		aiop->aio_free = reqp;
25210Sstevel@tonic-gate 		return (EINVAL);
25220Sstevel@tonic-gate 	}
25230Sstevel@tonic-gate 	*nreqp = reqp;
25240Sstevel@tonic-gate 	return (0);
25250Sstevel@tonic-gate }
25260Sstevel@tonic-gate 
25270Sstevel@tonic-gate /*
25280Sstevel@tonic-gate  * Allocate an aio_lio_t struct.
25290Sstevel@tonic-gate  */
25300Sstevel@tonic-gate static int
25310Sstevel@tonic-gate aio_lio_alloc(aio_lio_t **head)
25320Sstevel@tonic-gate {
25330Sstevel@tonic-gate 	aio_lio_t *liop;
25340Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
25350Sstevel@tonic-gate 
25360Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
25370Sstevel@tonic-gate 
25380Sstevel@tonic-gate 	if ((liop = aiop->aio_lio_free) != NULL) {
25390Sstevel@tonic-gate 		aiop->aio_lio_free = liop->lio_next;
25400Sstevel@tonic-gate 	} else {
25410Sstevel@tonic-gate 		/*
25420Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25430Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25440Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25450Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25460Sstevel@tonic-gate 		 */
25470Sstevel@tonic-gate 		if (freemem < desfree)
25480Sstevel@tonic-gate 			return (EAGAIN);
25490Sstevel@tonic-gate 
25500Sstevel@tonic-gate 		liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
25510Sstevel@tonic-gate 		if (liop == NULL)
25520Sstevel@tonic-gate 			return (EAGAIN);
25530Sstevel@tonic-gate 	}
25540Sstevel@tonic-gate 	*head = liop;
25550Sstevel@tonic-gate 	return (0);
25560Sstevel@tonic-gate }
25570Sstevel@tonic-gate 
25580Sstevel@tonic-gate /*
25590Sstevel@tonic-gate  * this is a special per-process thread that is only activated if
25600Sstevel@tonic-gate  * the process is unmapping a segment with outstanding aio. normally,
25610Sstevel@tonic-gate  * the process will have completed the aio before unmapping the
25620Sstevel@tonic-gate  * segment. If the process does unmap a segment with outstanding aio,
25630Sstevel@tonic-gate  * this special thread will guarentee that the locked pages due to
25640Sstevel@tonic-gate  * aphysio() are released, thereby permitting the segment to be
2565304Spraks  * unmapped. In addition to this, the cleanup thread is woken up
2566304Spraks  * during DR operations to release the locked pages.
25670Sstevel@tonic-gate  */
25680Sstevel@tonic-gate 
25690Sstevel@tonic-gate static int
25700Sstevel@tonic-gate aio_cleanup_thread(aio_t *aiop)
25710Sstevel@tonic-gate {
25720Sstevel@tonic-gate 	proc_t *p = curproc;
25730Sstevel@tonic-gate 	struct as *as = p->p_as;
25740Sstevel@tonic-gate 	int poked = 0;
25750Sstevel@tonic-gate 	kcondvar_t *cvp;
25760Sstevel@tonic-gate 	int exit_flag = 0;
2577304Spraks 	int rqclnup = 0;
25780Sstevel@tonic-gate 
25790Sstevel@tonic-gate 	sigfillset(&curthread->t_hold);
25800Sstevel@tonic-gate 	sigdiffset(&curthread->t_hold, &cantmask);
25810Sstevel@tonic-gate 	for (;;) {
25820Sstevel@tonic-gate 		/*
25830Sstevel@tonic-gate 		 * if a segment is being unmapped, and the current
25840Sstevel@tonic-gate 		 * process's done queue is not empty, then every request
25850Sstevel@tonic-gate 		 * on the doneq with locked resources should be forced
25860Sstevel@tonic-gate 		 * to release their locks. By moving the doneq request
25870Sstevel@tonic-gate 		 * to the cleanupq, aio_cleanup() will process the cleanupq,
25880Sstevel@tonic-gate 		 * and place requests back onto the doneq. All requests
25890Sstevel@tonic-gate 		 * processed by aio_cleanup() will have their physical
25900Sstevel@tonic-gate 		 * resources unlocked.
25910Sstevel@tonic-gate 		 */
25920Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
25930Sstevel@tonic-gate 		if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
25940Sstevel@tonic-gate 			aiop->aio_flags |= AIO_CLEANUP;
25950Sstevel@tonic-gate 			mutex_enter(&as->a_contents);
2596304Spraks 			if (aiop->aio_rqclnup) {
2597304Spraks 				aiop->aio_rqclnup = 0;
2598304Spraks 				rqclnup = 1;
2599304Spraks 			}
2600304Spraks 
2601304Spraks 			if ((rqclnup || AS_ISUNMAPWAIT(as)) &&
26021885Sraf 			    aiop->aio_doneq) {
26030Sstevel@tonic-gate 				aio_req_t *doneqhead = aiop->aio_doneq;
26040Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26050Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
26060Sstevel@tonic-gate 				aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
26070Sstevel@tonic-gate 			} else {
26080Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26090Sstevel@tonic-gate 			}
26100Sstevel@tonic-gate 		}
26110Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26120Sstevel@tonic-gate 		aio_cleanup(AIO_CLEANUP_THREAD);
26130Sstevel@tonic-gate 		/*
26140Sstevel@tonic-gate 		 * thread should block on the cleanupcv while
26150Sstevel@tonic-gate 		 * AIO_CLEANUP is set.
26160Sstevel@tonic-gate 		 */
26170Sstevel@tonic-gate 		cvp = &aiop->aio_cleanupcv;
26180Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
26190Sstevel@tonic-gate 
26200Sstevel@tonic-gate 		if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
26210Sstevel@tonic-gate 		    aiop->aio_notifyq != NULL ||
26220Sstevel@tonic-gate 		    aiop->aio_portcleanupq != NULL) {
26230Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
26240Sstevel@tonic-gate 			continue;
26250Sstevel@tonic-gate 		}
26260Sstevel@tonic-gate 		mutex_enter(&as->a_contents);
26270Sstevel@tonic-gate 
26280Sstevel@tonic-gate 		/*
26290Sstevel@tonic-gate 		 * AIO_CLEANUP determines when the cleanup thread
2630304Spraks 		 * should be active. This flag is set when
2631304Spraks 		 * the cleanup thread is awakened by as_unmap() or
2632304Spraks 		 * due to DR operations.
26330Sstevel@tonic-gate 		 * The flag is cleared when the blocking as_unmap()
26340Sstevel@tonic-gate 		 * that originally awakened us is allowed to
26350Sstevel@tonic-gate 		 * complete. as_unmap() blocks when trying to
26360Sstevel@tonic-gate 		 * unmap a segment that has SOFTLOCKed pages. when
26370Sstevel@tonic-gate 		 * the segment's pages are all SOFTUNLOCKed,
2638304Spraks 		 * as->a_flags & AS_UNMAPWAIT should be zero.
2639304Spraks 		 *
2640304Spraks 		 * In case of cleanup request by DR, the flag is cleared
2641304Spraks 		 * once all the pending aio requests have been processed.
2642304Spraks 		 *
2643304Spraks 		 * The flag shouldn't be cleared right away if the
2644304Spraks 		 * cleanup thread was interrupted because the process
2645304Spraks 		 * is doing forkall(). This happens when cv_wait_sig()
2646304Spraks 		 * returns zero, because it was awakened by a pokelwps().
2647304Spraks 		 * If the process is not exiting, it must be doing forkall().
26480Sstevel@tonic-gate 		 */
26490Sstevel@tonic-gate 		if ((poked == 0) &&
2650304Spraks 			((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2651304Spraks 					(aiop->aio_pending == 0))) {
26520Sstevel@tonic-gate 			aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
26530Sstevel@tonic-gate 			cvp = &as->a_cv;
2654304Spraks 			rqclnup = 0;
26550Sstevel@tonic-gate 		}
26560Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26570Sstevel@tonic-gate 		if (poked) {
26580Sstevel@tonic-gate 			/*
26590Sstevel@tonic-gate 			 * If the process is exiting/killed, don't return
26600Sstevel@tonic-gate 			 * immediately without waiting for pending I/O's
26610Sstevel@tonic-gate 			 * and releasing the page locks.
26620Sstevel@tonic-gate 			 */
26630Sstevel@tonic-gate 			if (p->p_flag & (SEXITLWPS|SKILLED)) {
26640Sstevel@tonic-gate 				/*
26650Sstevel@tonic-gate 				 * If exit_flag is set, then it is
26660Sstevel@tonic-gate 				 * safe to exit because we have released
26670Sstevel@tonic-gate 				 * page locks of completed I/O's.
26680Sstevel@tonic-gate 				 */
26690Sstevel@tonic-gate 				if (exit_flag)
26700Sstevel@tonic-gate 					break;
26710Sstevel@tonic-gate 
26720Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26730Sstevel@tonic-gate 
26740Sstevel@tonic-gate 				/*
26750Sstevel@tonic-gate 				 * Wait for all the pending aio to complete.
26760Sstevel@tonic-gate 				 */
26770Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
26780Sstevel@tonic-gate 				aiop->aio_flags |= AIO_REQ_BLOCK;
26790Sstevel@tonic-gate 				while (aiop->aio_pending != 0)
26800Sstevel@tonic-gate 					cv_wait(&aiop->aio_cleanupcv,
26810Sstevel@tonic-gate 						&aiop->aio_mutex);
26820Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
26830Sstevel@tonic-gate 				exit_flag = 1;
26840Sstevel@tonic-gate 				continue;
26850Sstevel@tonic-gate 			} else if (p->p_flag &
26860Sstevel@tonic-gate 			    (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
26870Sstevel@tonic-gate 				/*
26880Sstevel@tonic-gate 				 * hold LWP until it
26890Sstevel@tonic-gate 				 * is continued.
26900Sstevel@tonic-gate 				 */
26910Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26920Sstevel@tonic-gate 				mutex_enter(&p->p_lock);
26930Sstevel@tonic-gate 				stop(PR_SUSPENDED, SUSPEND_NORMAL);
26940Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
26950Sstevel@tonic-gate 				poked = 0;
26960Sstevel@tonic-gate 				continue;
26970Sstevel@tonic-gate 			}
26980Sstevel@tonic-gate 		} else {
26990Sstevel@tonic-gate 			/*
27000Sstevel@tonic-gate 			 * When started this thread will sleep on as->a_cv.
27010Sstevel@tonic-gate 			 * as_unmap will awake this thread if the
27020Sstevel@tonic-gate 			 * segment has SOFTLOCKed pages (poked = 0).
27030Sstevel@tonic-gate 			 * 1. pokelwps() awakes this thread =>
27040Sstevel@tonic-gate 			 *    break the loop to check SEXITLWPS, SHOLDFORK, etc
27050Sstevel@tonic-gate 			 * 2. as_unmap awakes this thread =>
27060Sstevel@tonic-gate 			 *    to break the loop it is necessary that
27070Sstevel@tonic-gate 			 *    - AS_UNMAPWAIT is set (as_unmap is waiting for
27080Sstevel@tonic-gate 			 *	memory to be unlocked)
27090Sstevel@tonic-gate 			 *    - AIO_CLEANUP is not set
27100Sstevel@tonic-gate 			 *	(if AIO_CLEANUP is set we have to wait for
27110Sstevel@tonic-gate 			 *	pending requests. aio_done will send a signal
27120Sstevel@tonic-gate 			 *	for every request which completes to continue
27130Sstevel@tonic-gate 			 *	unmapping the corresponding address range)
2714304Spraks 			 * 3. A cleanup request will wake this thread up, ex.
2715304Spraks 			 *    by the DR operations. The aio_rqclnup flag will
2716304Spraks 			 *    be set.
27170Sstevel@tonic-gate 			 */
27180Sstevel@tonic-gate 			while (poked == 0) {
2719304Spraks 				/*
2720304Spraks 				 * we need to handle cleanup requests
2721304Spraks 				 * that come in after we had just cleaned up,
2722304Spraks 				 * so that we do cleanup of any new aio
2723304Spraks 				 * requests that got completed and have
2724304Spraks 				 * locked resources.
2725304Spraks 				 */
2726304Spraks 				if ((aiop->aio_rqclnup ||
2727304Spraks 					(AS_ISUNMAPWAIT(as) != 0)) &&
2728304Spraks 					(aiop->aio_flags & AIO_CLEANUP) == 0)
27290Sstevel@tonic-gate 					break;
27300Sstevel@tonic-gate 				poked = !cv_wait_sig(cvp, &as->a_contents);
27310Sstevel@tonic-gate 				if (AS_ISUNMAPWAIT(as) == 0)
27320Sstevel@tonic-gate 					cv_signal(cvp);
27330Sstevel@tonic-gate 				if (aiop->aio_outstanding != 0)
27340Sstevel@tonic-gate 					break;
27350Sstevel@tonic-gate 			}
27360Sstevel@tonic-gate 		}
27370Sstevel@tonic-gate 		mutex_exit(&as->a_contents);
27380Sstevel@tonic-gate 	}
27390Sstevel@tonic-gate exit:
27400Sstevel@tonic-gate 	mutex_exit(&as->a_contents);
27410Sstevel@tonic-gate 	ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
27420Sstevel@tonic-gate 	aston(curthread);	/* make thread do post_syscall */
27430Sstevel@tonic-gate 	return (0);
27440Sstevel@tonic-gate }
27450Sstevel@tonic-gate 
27460Sstevel@tonic-gate /*
27470Sstevel@tonic-gate  * save a reference to a user's outstanding aio in a hash list.
27480Sstevel@tonic-gate  */
27490Sstevel@tonic-gate static int
27500Sstevel@tonic-gate aio_hash_insert(
27510Sstevel@tonic-gate 	aio_req_t *aio_reqp,
27520Sstevel@tonic-gate 	aio_t *aiop)
27530Sstevel@tonic-gate {
27540Sstevel@tonic-gate 	long index;
27550Sstevel@tonic-gate 	aio_result_t *resultp = aio_reqp->aio_req_resultp;
27560Sstevel@tonic-gate 	aio_req_t *current;
27570Sstevel@tonic-gate 	aio_req_t **nextp;
27580Sstevel@tonic-gate 
27590Sstevel@tonic-gate 	index = AIO_HASH(resultp);
27600Sstevel@tonic-gate 	nextp = &aiop->aio_hash[index];
27610Sstevel@tonic-gate 	while ((current = *nextp) != NULL) {
27620Sstevel@tonic-gate 		if (current->aio_req_resultp == resultp)
27630Sstevel@tonic-gate 			return (DUPLICATE);
27640Sstevel@tonic-gate 		nextp = &current->aio_hash_next;
27650Sstevel@tonic-gate 	}
27660Sstevel@tonic-gate 	*nextp = aio_reqp;
27670Sstevel@tonic-gate 	aio_reqp->aio_hash_next = NULL;
27680Sstevel@tonic-gate 	return (0);
27690Sstevel@tonic-gate }
27700Sstevel@tonic-gate 
27710Sstevel@tonic-gate static int
27720Sstevel@tonic-gate (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
27730Sstevel@tonic-gate     cred_t *)
27740Sstevel@tonic-gate {
27750Sstevel@tonic-gate 	struct snode *sp;
27760Sstevel@tonic-gate 	dev_t		dev;
27770Sstevel@tonic-gate 	struct cb_ops  	*cb;
27780Sstevel@tonic-gate 	major_t		major;
27790Sstevel@tonic-gate 	int		(*aio_func)();
27800Sstevel@tonic-gate 
27810Sstevel@tonic-gate 	dev = vp->v_rdev;
27820Sstevel@tonic-gate 	major = getmajor(dev);
27830Sstevel@tonic-gate 
27840Sstevel@tonic-gate 	/*
27850Sstevel@tonic-gate 	 * return NULL for requests to files and STREAMs so
27860Sstevel@tonic-gate 	 * that libaio takes care of them.
27870Sstevel@tonic-gate 	 */
27880Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
27890Sstevel@tonic-gate 		/* no stream device for kaio */
27900Sstevel@tonic-gate 		if (STREAMSTAB(major)) {
27910Sstevel@tonic-gate 			return (NULL);
27920Sstevel@tonic-gate 		}
27930Sstevel@tonic-gate 	} else {
27940Sstevel@tonic-gate 		return (NULL);
27950Sstevel@tonic-gate 	}
27960Sstevel@tonic-gate 
27970Sstevel@tonic-gate 	/*
27980Sstevel@tonic-gate 	 * Check old drivers which do not have async I/O entry points.
27990Sstevel@tonic-gate 	 */
28000Sstevel@tonic-gate 	if (devopsp[major]->devo_rev < 3)
28010Sstevel@tonic-gate 		return (NULL);
28020Sstevel@tonic-gate 
28030Sstevel@tonic-gate 	cb = devopsp[major]->devo_cb_ops;
28040Sstevel@tonic-gate 
28050Sstevel@tonic-gate 	if (cb->cb_rev < 1)
28060Sstevel@tonic-gate 		return (NULL);
28070Sstevel@tonic-gate 
28080Sstevel@tonic-gate 	/*
28090Sstevel@tonic-gate 	 * Check whether this device is a block device.
28100Sstevel@tonic-gate 	 * Kaio is not supported for devices like tty.
28110Sstevel@tonic-gate 	 */
28120Sstevel@tonic-gate 	if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
28130Sstevel@tonic-gate 		return (NULL);
28140Sstevel@tonic-gate 
28150Sstevel@tonic-gate 	/*
28160Sstevel@tonic-gate 	 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
28170Sstevel@tonic-gate 	 * We cannot call the driver directly. Instead return the
28180Sstevel@tonic-gate 	 * PXFS functions.
28190Sstevel@tonic-gate 	 */
28200Sstevel@tonic-gate 
28210Sstevel@tonic-gate 	if (IS_PXFSVP(vp)) {
28220Sstevel@tonic-gate 		if (mode & FREAD)
28230Sstevel@tonic-gate 			return (clpxfs_aio_read);
28240Sstevel@tonic-gate 		else
28250Sstevel@tonic-gate 			return (clpxfs_aio_write);
28260Sstevel@tonic-gate 	}
28270Sstevel@tonic-gate 	if (mode & FREAD)
28280Sstevel@tonic-gate 		aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
28290Sstevel@tonic-gate 	else
28300Sstevel@tonic-gate 		aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
28310Sstevel@tonic-gate 
28320Sstevel@tonic-gate 	/*
28330Sstevel@tonic-gate 	 * Do we need this ?
28340Sstevel@tonic-gate 	 * nodev returns ENXIO anyway.
28350Sstevel@tonic-gate 	 */
28360Sstevel@tonic-gate 	if (aio_func == nodev)
28370Sstevel@tonic-gate 		return (NULL);
28380Sstevel@tonic-gate 
28390Sstevel@tonic-gate 	sp = VTOS(vp);
28400Sstevel@tonic-gate 	smark(sp, SACC);
28410Sstevel@tonic-gate 	return (aio_func);
28420Sstevel@tonic-gate }
28430Sstevel@tonic-gate 
28440Sstevel@tonic-gate /*
28450Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28460Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28470Sstevel@tonic-gate  * We define this intermediate function that will do the right
28480Sstevel@tonic-gate  * thing for driver cases.
28490Sstevel@tonic-gate  */
28500Sstevel@tonic-gate 
28510Sstevel@tonic-gate static int
28520Sstevel@tonic-gate driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28530Sstevel@tonic-gate {
28540Sstevel@tonic-gate 	dev_t dev;
28550Sstevel@tonic-gate 	struct cb_ops  	*cb;
28560Sstevel@tonic-gate 
28570Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28580Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28590Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28600Sstevel@tonic-gate 	ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
28610Sstevel@tonic-gate 
28620Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28630Sstevel@tonic-gate 
28640Sstevel@tonic-gate 	ASSERT(cb->cb_awrite != nodev);
28650Sstevel@tonic-gate 	return ((*cb->cb_awrite)(dev, aio, cred_p));
28660Sstevel@tonic-gate }
28670Sstevel@tonic-gate 
28680Sstevel@tonic-gate /*
28690Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28700Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28710Sstevel@tonic-gate  * We define this intermediate function that will do the right
28720Sstevel@tonic-gate  * thing for driver cases.
28730Sstevel@tonic-gate  */
28740Sstevel@tonic-gate 
28750Sstevel@tonic-gate static int
28760Sstevel@tonic-gate driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28770Sstevel@tonic-gate {
28780Sstevel@tonic-gate 	dev_t dev;
28790Sstevel@tonic-gate 	struct cb_ops  	*cb;
28800Sstevel@tonic-gate 
28810Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28820Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28830Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28840Sstevel@tonic-gate 	ASSERT(!STREAMSTAB(getmajor(dev)));
28850Sstevel@tonic-gate 
28860Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28870Sstevel@tonic-gate 
28880Sstevel@tonic-gate 	ASSERT(cb->cb_aread != nodev);
28890Sstevel@tonic-gate 	return ((*cb->cb_aread)(dev, aio, cred_p));
28900Sstevel@tonic-gate }
28910Sstevel@tonic-gate 
28920Sstevel@tonic-gate /*
28930Sstevel@tonic-gate  * This routine is called when a largefile call is made by a 32bit
28940Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel. All 64bit processes are large
28950Sstevel@tonic-gate  * file by definition and will call alio() instead.
28960Sstevel@tonic-gate  */
28970Sstevel@tonic-gate static int
28980Sstevel@tonic-gate alioLF(
28990Sstevel@tonic-gate 	int		mode_arg,
29000Sstevel@tonic-gate 	void		*aiocb_arg,
29010Sstevel@tonic-gate 	int		nent,
29020Sstevel@tonic-gate 	void		*sigev)
29030Sstevel@tonic-gate {
29040Sstevel@tonic-gate 	file_t		*fp;
29050Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
29060Sstevel@tonic-gate 	int		prev_mode = -1;
29070Sstevel@tonic-gate 	struct vnode	*vp;
29080Sstevel@tonic-gate 	aio_lio_t	*head;
29090Sstevel@tonic-gate 	aio_req_t	*reqp;
29100Sstevel@tonic-gate 	aio_t		*aiop;
29110Sstevel@tonic-gate 	caddr_t		cbplist;
29121885Sraf 	aiocb64_32_t	cb64;
29131885Sraf 	aiocb64_32_t	*aiocb = &cb64;
29140Sstevel@tonic-gate 	aiocb64_32_t	*cbp;
29150Sstevel@tonic-gate 	caddr32_t	*ucbp;
29160Sstevel@tonic-gate #ifdef _LP64
29170Sstevel@tonic-gate 	aiocb_t		aiocb_n;
29180Sstevel@tonic-gate #endif
29190Sstevel@tonic-gate 	struct sigevent32	sigevk;
29200Sstevel@tonic-gate 	sigqueue_t	*sqp;
29210Sstevel@tonic-gate 	int		(*aio_func)();
29220Sstevel@tonic-gate 	int		mode;
29231885Sraf 	int		error = 0;
29241885Sraf 	int		aio_errors = 0;
29250Sstevel@tonic-gate 	int		i;
29260Sstevel@tonic-gate 	size_t		ssize;
29270Sstevel@tonic-gate 	int		deadhead = 0;
29280Sstevel@tonic-gate 	int		aio_notsupported = 0;
29291885Sraf 	int		lio_head_port;
29301885Sraf 	int		aio_port;
29311885Sraf 	int		aio_thread;
29320Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
29330Sstevel@tonic-gate 	port_notify32_t	pnotify;
29341885Sraf 	int		event;
29350Sstevel@tonic-gate 
29360Sstevel@tonic-gate 	aiop = curproc->p_aio;
29370Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
29380Sstevel@tonic-gate 		return (EINVAL);
29390Sstevel@tonic-gate 
29400Sstevel@tonic-gate 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
29410Sstevel@tonic-gate 
29420Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
29430Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
29440Sstevel@tonic-gate 	ucbp = (caddr32_t *)cbplist;
29450Sstevel@tonic-gate 
29461885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
29471885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
29480Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
29490Sstevel@tonic-gate 		return (EFAULT);
29500Sstevel@tonic-gate 	}
29510Sstevel@tonic-gate 
29521885Sraf 	/* Event Ports  */
29531885Sraf 	if (sigev &&
29541885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
29551885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
29561885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
29571885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
29581885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
29591885Sraf 		} else if (copyin(
29601885Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
29611885Sraf 		    &pnotify, sizeof (pnotify))) {
29620Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
29630Sstevel@tonic-gate 			return (EFAULT);
29640Sstevel@tonic-gate 		}
29651885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
29661885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
29671885Sraf 		if (error) {
29681885Sraf 			if (error == ENOMEM || error == EAGAIN)
29691885Sraf 				error = EAGAIN;
29701885Sraf 			else
29711885Sraf 				error = EINVAL;
29721885Sraf 			kmem_free(cbplist, ssize);
29731885Sraf 			return (error);
29741885Sraf 		}
29751885Sraf 		lio_head_port = pnotify.portnfy_port;
29760Sstevel@tonic-gate 	}
29770Sstevel@tonic-gate 
29780Sstevel@tonic-gate 	/*
29790Sstevel@tonic-gate 	 * a list head should be allocated if notification is
29800Sstevel@tonic-gate 	 * enabled for this list.
29810Sstevel@tonic-gate 	 */
29820Sstevel@tonic-gate 	head = NULL;
29830Sstevel@tonic-gate 
29841885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
29850Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
29860Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
29870Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
29880Sstevel@tonic-gate 		if (error)
29890Sstevel@tonic-gate 			goto done;
29900Sstevel@tonic-gate 		deadhead = 1;
29910Sstevel@tonic-gate 		head->lio_nent = nent;
29920Sstevel@tonic-gate 		head->lio_refcnt = nent;
29931885Sraf 		head->lio_port = -1;
29941885Sraf 		head->lio_portkev = NULL;
29951885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
29961885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
29970Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
29980Sstevel@tonic-gate 			if (sqp == NULL) {
29990Sstevel@tonic-gate 				error = EAGAIN;
30000Sstevel@tonic-gate 				goto done;
30010Sstevel@tonic-gate 			}
30020Sstevel@tonic-gate 			sqp->sq_func = NULL;
30030Sstevel@tonic-gate 			sqp->sq_next = NULL;
30040Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
30050Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
30060Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
30070Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
30080Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
30090Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
30100Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
30110Sstevel@tonic-gate 			    sigevk.sigev_value.sival_int;
30120Sstevel@tonic-gate 			head->lio_sigqp = sqp;
30130Sstevel@tonic-gate 		} else {
30140Sstevel@tonic-gate 			head->lio_sigqp = NULL;
30150Sstevel@tonic-gate 		}
30161885Sraf 		if (pkevtp) {
30171885Sraf 			/*
30181885Sraf 			 * Prepare data to send when list of aiocb's
30191885Sraf 			 * has completed.
30201885Sraf 			 */
30211885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
30221885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
30231885Sraf 			    NULL, head);
30241885Sraf 			pkevtp->portkev_events = AIOLIO64;
30251885Sraf 			head->lio_portkev = pkevtp;
30261885Sraf 			head->lio_port = pnotify.portnfy_port;
30271885Sraf 		}
30280Sstevel@tonic-gate 	}
30290Sstevel@tonic-gate 
30300Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
30310Sstevel@tonic-gate 
30320Sstevel@tonic-gate 		cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
30330Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
30341885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
30350Sstevel@tonic-gate 			if (head) {
30360Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30370Sstevel@tonic-gate 				head->lio_nent--;
30380Sstevel@tonic-gate 				head->lio_refcnt--;
30390Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30400Sstevel@tonic-gate 			}
30410Sstevel@tonic-gate 			continue;
30420Sstevel@tonic-gate 		}
30430Sstevel@tonic-gate 
30440Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
30450Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
30460Sstevel@tonic-gate 		if (mode == LIO_NOP) {
30470Sstevel@tonic-gate 			cbp = NULL;
30480Sstevel@tonic-gate 			if (head) {
30490Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30500Sstevel@tonic-gate 				head->lio_nent--;
30510Sstevel@tonic-gate 				head->lio_refcnt--;
30520Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30530Sstevel@tonic-gate 			}
30540Sstevel@tonic-gate 			continue;
30550Sstevel@tonic-gate 		}
30560Sstevel@tonic-gate 
30570Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
30580Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
30590Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30600Sstevel@tonic-gate 			if (head) {
30610Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30620Sstevel@tonic-gate 				head->lio_nent--;
30630Sstevel@tonic-gate 				head->lio_refcnt--;
30640Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30650Sstevel@tonic-gate 			}
30660Sstevel@tonic-gate 			aio_errors++;
30670Sstevel@tonic-gate 			continue;
30680Sstevel@tonic-gate 		}
30690Sstevel@tonic-gate 
30700Sstevel@tonic-gate 		/*
30710Sstevel@tonic-gate 		 * check the permission of the partition
30720Sstevel@tonic-gate 		 */
30730Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
30740Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
30750Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30760Sstevel@tonic-gate 			if (head) {
30770Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30780Sstevel@tonic-gate 				head->lio_nent--;
30790Sstevel@tonic-gate 				head->lio_refcnt--;
30800Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30810Sstevel@tonic-gate 			}
30820Sstevel@tonic-gate 			aio_errors++;
30830Sstevel@tonic-gate 			continue;
30840Sstevel@tonic-gate 		}
30850Sstevel@tonic-gate 
30860Sstevel@tonic-gate 		/*
30870Sstevel@tonic-gate 		 * common case where requests are to the same fd
30880Sstevel@tonic-gate 		 * for the same r/w operation
30890Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
30900Sstevel@tonic-gate 		 */
30911885Sraf 		vp = fp->f_vnode;
30921885Sraf 		if (fp != prev_fp || mode != prev_mode) {
30930Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
30940Sstevel@tonic-gate 			if (aio_func == NULL) {
30950Sstevel@tonic-gate 				prev_fp = NULL;
30960Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
30970Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
30980Sstevel@tonic-gate 				aio_notsupported++;
30990Sstevel@tonic-gate 				if (head) {
31000Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
31010Sstevel@tonic-gate 					head->lio_nent--;
31020Sstevel@tonic-gate 					head->lio_refcnt--;
31030Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
31040Sstevel@tonic-gate 				}
31050Sstevel@tonic-gate 				continue;
31060Sstevel@tonic-gate 			} else {
31070Sstevel@tonic-gate 				prev_fp = fp;
31080Sstevel@tonic-gate 				prev_mode = mode;
31090Sstevel@tonic-gate 			}
31100Sstevel@tonic-gate 		}
31111885Sraf 
31120Sstevel@tonic-gate #ifdef	_LP64
31130Sstevel@tonic-gate 		aiocb_LFton(aiocb, &aiocb_n);
31140Sstevel@tonic-gate 		error = aio_req_setup(&reqp, aiop, &aiocb_n,
31151885Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
31160Sstevel@tonic-gate #else
31170Sstevel@tonic-gate 		error = aio_req_setupLF(&reqp, aiop, aiocb,
31181885Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
31190Sstevel@tonic-gate #endif  /* _LP64 */
31200Sstevel@tonic-gate 		if (error) {
31210Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
31221885Sraf 			lio_set_uerror(&cbp->aio_resultp, error);
31230Sstevel@tonic-gate 			if (head) {
31240Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
31250Sstevel@tonic-gate 				head->lio_nent--;
31260Sstevel@tonic-gate 				head->lio_refcnt--;
31270Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
31280Sstevel@tonic-gate 			}
31290Sstevel@tonic-gate 			aio_errors++;
31300Sstevel@tonic-gate 			continue;
31310Sstevel@tonic-gate 		}
31320Sstevel@tonic-gate 
31330Sstevel@tonic-gate 		reqp->aio_req_lio = head;
31340Sstevel@tonic-gate 		deadhead = 0;
31350Sstevel@tonic-gate 
31360Sstevel@tonic-gate 		/*
31370Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
31380Sstevel@tonic-gate 		 * the driver to avoid a race condition
31390Sstevel@tonic-gate 		 */
31400Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
31410Sstevel@tonic-gate 		    EINPROGRESS);
31420Sstevel@tonic-gate 
31430Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = *ucbp;
31440Sstevel@tonic-gate 
31451885Sraf 		event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
31461885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
31471885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
31481885Sraf 		if (aio_port | aio_thread) {
31491885Sraf 			port_kevent_t *lpkevp;
31501885Sraf 			/*
31511885Sraf 			 * Prepare data to send with each aiocb completed.
31521885Sraf 			 */
31531885Sraf 			if (aio_port) {
31541885Sraf 				void *paddr = (void *)(uintptr_t)
31551885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
31561885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
31571885Sraf 					error = EFAULT;
31581885Sraf 			} else {	/* aio_thread */
31591885Sraf 				pnotify.portnfy_port =
31601885Sraf 				    aiocb->aio_sigevent.sigev_signo;
31611885Sraf 				pnotify.portnfy_user =
31621885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
31631885Sraf 			}
31641885Sraf 			if (error)
31651885Sraf 				/* EMPTY */;
31661885Sraf 			else if (pkevtp != NULL &&
31671885Sraf 			    pnotify.portnfy_port == lio_head_port)
31681885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
31691885Sraf 				    PORT_ALLOC_DEFAULT);
31701885Sraf 			else
31711885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
31721885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
31731885Sraf 				    &lpkevp);
31741885Sraf 			if (error == 0) {
31751885Sraf 				port_init_event(lpkevp, (uintptr_t)*ucbp,
31761885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
31771885Sraf 				    aio_port_callback, reqp);
31781885Sraf 				lpkevp->portkev_events = event;
31791885Sraf 				reqp->aio_req_portkev = lpkevp;
31801885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
31811885Sraf 			}
31820Sstevel@tonic-gate 		}
31830Sstevel@tonic-gate 
31840Sstevel@tonic-gate 		/*
31850Sstevel@tonic-gate 		 * send the request to driver.
31860Sstevel@tonic-gate 		 */
31870Sstevel@tonic-gate 		if (error == 0) {
31880Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
31890Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
31900Sstevel@tonic-gate 				aio_zerolen(reqp);
31910Sstevel@tonic-gate 				continue;
31920Sstevel@tonic-gate 			}
31930Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
31940Sstevel@tonic-gate 			    CRED());
31950Sstevel@tonic-gate 		}
31960Sstevel@tonic-gate 
31970Sstevel@tonic-gate 		/*
31980Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
31990Sstevel@tonic-gate 		 * completed unless there was an error.
32000Sstevel@tonic-gate 		 */
32010Sstevel@tonic-gate 		if (error) {
32020Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
32030Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
32040Sstevel@tonic-gate 			if (head) {
32050Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
32060Sstevel@tonic-gate 				head->lio_nent--;
32070Sstevel@tonic-gate 				head->lio_refcnt--;
32080Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32090Sstevel@tonic-gate 			}
32100Sstevel@tonic-gate 			if (error == ENOTSUP)
32110Sstevel@tonic-gate 				aio_notsupported++;
32120Sstevel@tonic-gate 			else
32130Sstevel@tonic-gate 				aio_errors++;
32140Sstevel@tonic-gate 			lio_set_error(reqp);
32150Sstevel@tonic-gate 		} else {
32160Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
32170Sstevel@tonic-gate 		}
32180Sstevel@tonic-gate 	}
32190Sstevel@tonic-gate 
32200Sstevel@tonic-gate 	if (aio_notsupported) {
32210Sstevel@tonic-gate 		error = ENOTSUP;
32220Sstevel@tonic-gate 	} else if (aio_errors) {
32230Sstevel@tonic-gate 		/*
32240Sstevel@tonic-gate 		 * return EIO if any request failed
32250Sstevel@tonic-gate 		 */
32260Sstevel@tonic-gate 		error = EIO;
32270Sstevel@tonic-gate 	}
32280Sstevel@tonic-gate 
32290Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
32300Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
32310Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
32320Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
32330Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32340Sstevel@tonic-gate 				error = EINTR;
32350Sstevel@tonic-gate 				goto done;
32360Sstevel@tonic-gate 			}
32370Sstevel@tonic-gate 		}
32380Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
32390Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
32400Sstevel@tonic-gate 	}
32410Sstevel@tonic-gate 
32420Sstevel@tonic-gate done:
32430Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
32440Sstevel@tonic-gate 	if (deadhead) {
32450Sstevel@tonic-gate 		if (head->lio_sigqp)
32460Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
32471885Sraf 		if (head->lio_portkev)
32481885Sraf 			port_free_event(head->lio_portkev);
32490Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
32500Sstevel@tonic-gate 	}
32510Sstevel@tonic-gate 	return (error);
32520Sstevel@tonic-gate }
32530Sstevel@tonic-gate 
32540Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
32550Sstevel@tonic-gate static void
32560Sstevel@tonic-gate aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
32570Sstevel@tonic-gate {
32580Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
32590Sstevel@tonic-gate 	dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
32600Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
32610Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
32620Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
32630Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
32640Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
32650Sstevel@tonic-gate 
32660Sstevel@tonic-gate 	/*
32670Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
32680Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
32690Sstevel@tonic-gate 	 */
32700Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
32710Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
32720Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
32730Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
32740Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
32750Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
32760Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
32770Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
32780Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
32790Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
32800Sstevel@tonic-gate }
32810Sstevel@tonic-gate #endif
32820Sstevel@tonic-gate 
32830Sstevel@tonic-gate /*
32840Sstevel@tonic-gate  * This function is used only for largefile calls made by
32851885Sraf  * 32 bit applications.
32860Sstevel@tonic-gate  */
32870Sstevel@tonic-gate static int
32880Sstevel@tonic-gate aio_req_setupLF(
32890Sstevel@tonic-gate 	aio_req_t	**reqpp,
32900Sstevel@tonic-gate 	aio_t		*aiop,
32910Sstevel@tonic-gate 	aiocb64_32_t	*arg,
32920Sstevel@tonic-gate 	aio_result_t	*resultp,
32930Sstevel@tonic-gate 	vnode_t		*vp)
32940Sstevel@tonic-gate {
32951885Sraf 	sigqueue_t	*sqp = NULL;
32960Sstevel@tonic-gate 	aio_req_t	*reqp;
32971885Sraf 	struct uio	*uio;
32981885Sraf 	struct sigevent32 *sigev;
32990Sstevel@tonic-gate 	int 		error;
33000Sstevel@tonic-gate 
33011885Sraf 	sigev = &arg->aio_sigevent;
33021885Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
33031885Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
33040Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
33050Sstevel@tonic-gate 		if (sqp == NULL)
33060Sstevel@tonic-gate 			return (EAGAIN);
33070Sstevel@tonic-gate 		sqp->sq_func = NULL;
33080Sstevel@tonic-gate 		sqp->sq_next = NULL;
33090Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
33100Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
33110Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
33120Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
33130Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
33140Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
33151885Sraf 		sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
33161885Sraf 	}
33170Sstevel@tonic-gate 
33180Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
33190Sstevel@tonic-gate 
33200Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
33210Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33220Sstevel@tonic-gate 		if (sqp)
33230Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33240Sstevel@tonic-gate 		return (EIO);
33250Sstevel@tonic-gate 	}
33260Sstevel@tonic-gate 	/*
33270Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
33280Sstevel@tonic-gate 	 * from dynamic memory.
33290Sstevel@tonic-gate 	 */
33300Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
33310Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33320Sstevel@tonic-gate 		if (sqp)
33330Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33340Sstevel@tonic-gate 		return (error);
33350Sstevel@tonic-gate 	}
33360Sstevel@tonic-gate 	aiop->aio_pending++;
33370Sstevel@tonic-gate 	aiop->aio_outstanding++;
33380Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
33391885Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
33401885Sraf 	    sigev->sigev_notify == SIGEV_PORT)
33411885Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
33420Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
33430Sstevel@tonic-gate 	/*
33440Sstevel@tonic-gate 	 * initialize aio request.
33450Sstevel@tonic-gate 	 */
33460Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
33470Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
33480Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
33491885Sraf 	reqp->aio_req_lio = NULL;
33500Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
33510Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
33520Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
33530Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
33540Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
33550Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
33560Sstevel@tonic-gate 	*reqpp = reqp;
33570Sstevel@tonic-gate 	return (0);
33580Sstevel@tonic-gate }
33590Sstevel@tonic-gate 
33600Sstevel@tonic-gate /*
33610Sstevel@tonic-gate  * This routine is called when a non largefile call is made by a 32bit
33620Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel.
33630Sstevel@tonic-gate  */
33640Sstevel@tonic-gate static int
33650Sstevel@tonic-gate alio32(
33660Sstevel@tonic-gate 	int		mode_arg,
33670Sstevel@tonic-gate 	void		*aiocb_arg,
33680Sstevel@tonic-gate 	int		nent,
33691885Sraf 	void		*sigev)
33700Sstevel@tonic-gate {
33710Sstevel@tonic-gate 	file_t		*fp;
33720Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
33730Sstevel@tonic-gate 	int		prev_mode = -1;
33740Sstevel@tonic-gate 	struct vnode	*vp;
33750Sstevel@tonic-gate 	aio_lio_t	*head;
33760Sstevel@tonic-gate 	aio_req_t	*reqp;
33770Sstevel@tonic-gate 	aio_t		*aiop;
33781885Sraf 	caddr_t		cbplist;
33790Sstevel@tonic-gate 	aiocb_t		cb;
33800Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
33810Sstevel@tonic-gate #ifdef	_LP64
33820Sstevel@tonic-gate 	aiocb32_t	*cbp;
33830Sstevel@tonic-gate 	caddr32_t	*ucbp;
33840Sstevel@tonic-gate 	aiocb32_t	cb32;
33850Sstevel@tonic-gate 	aiocb32_t	*aiocb32 = &cb32;
33861885Sraf 	struct sigevent32	sigevk;
33870Sstevel@tonic-gate #else
33880Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
33891885Sraf 	struct sigevent	sigevk;
33900Sstevel@tonic-gate #endif
33910Sstevel@tonic-gate 	sigqueue_t	*sqp;
33920Sstevel@tonic-gate 	int		(*aio_func)();
33930Sstevel@tonic-gate 	int		mode;
33941885Sraf 	int		error = 0;
33951885Sraf 	int		aio_errors = 0;
33960Sstevel@tonic-gate 	int		i;
33970Sstevel@tonic-gate 	size_t		ssize;
33980Sstevel@tonic-gate 	int		deadhead = 0;
33990Sstevel@tonic-gate 	int		aio_notsupported = 0;
34001885Sraf 	int		lio_head_port;
34011885Sraf 	int		aio_port;
34021885Sraf 	int		aio_thread;
34030Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
34040Sstevel@tonic-gate #ifdef	_LP64
34050Sstevel@tonic-gate 	port_notify32_t	pnotify;
34060Sstevel@tonic-gate #else
34070Sstevel@tonic-gate 	port_notify_t	pnotify;
34080Sstevel@tonic-gate #endif
34091885Sraf 	int		event;
34101885Sraf 
34110Sstevel@tonic-gate 	aiop = curproc->p_aio;
34120Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
34130Sstevel@tonic-gate 		return (EINVAL);
34140Sstevel@tonic-gate 
34150Sstevel@tonic-gate #ifdef	_LP64
34160Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
34170Sstevel@tonic-gate #else
34180Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
34190Sstevel@tonic-gate #endif
34200Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
34210Sstevel@tonic-gate 	ucbp = (void *)cbplist;
34220Sstevel@tonic-gate 
34231885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
34241885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
34250Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
34260Sstevel@tonic-gate 		return (EFAULT);
34270Sstevel@tonic-gate 	}
34280Sstevel@tonic-gate 
34291885Sraf 	/* Event Ports  */
34301885Sraf 	if (sigev &&
34311885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
34321885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
34331885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
34341885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
34351885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
34361885Sraf 		} else if (copyin(
34371885Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
34381885Sraf 		    &pnotify, sizeof (pnotify))) {
34390Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
34400Sstevel@tonic-gate 			return (EFAULT);
34410Sstevel@tonic-gate 		}
34421885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
34431885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
34441885Sraf 		if (error) {
34451885Sraf 			if (error == ENOMEM || error == EAGAIN)
34461885Sraf 				error = EAGAIN;
34471885Sraf 			else
34481885Sraf 				error = EINVAL;
34491885Sraf 			kmem_free(cbplist, ssize);
34501885Sraf 			return (error);
34511885Sraf 		}
34521885Sraf 		lio_head_port = pnotify.portnfy_port;
34530Sstevel@tonic-gate 	}
34540Sstevel@tonic-gate 
34550Sstevel@tonic-gate 	/*
34560Sstevel@tonic-gate 	 * a list head should be allocated if notification is
34570Sstevel@tonic-gate 	 * enabled for this list.
34580Sstevel@tonic-gate 	 */
34590Sstevel@tonic-gate 	head = NULL;
34600Sstevel@tonic-gate 
34611885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
34620Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
34630Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
34640Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
34650Sstevel@tonic-gate 		if (error)
34660Sstevel@tonic-gate 			goto done;
34670Sstevel@tonic-gate 		deadhead = 1;
34680Sstevel@tonic-gate 		head->lio_nent = nent;
34690Sstevel@tonic-gate 		head->lio_refcnt = nent;
34701885Sraf 		head->lio_port = -1;
34711885Sraf 		head->lio_portkev = NULL;
34721885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
34731885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
34740Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
34750Sstevel@tonic-gate 			if (sqp == NULL) {
34760Sstevel@tonic-gate 				error = EAGAIN;
34770Sstevel@tonic-gate 				goto done;
34780Sstevel@tonic-gate 			}
34790Sstevel@tonic-gate 			sqp->sq_func = NULL;
34800Sstevel@tonic-gate 			sqp->sq_next = NULL;
34810Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
34820Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
34830Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
34840Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
34850Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
34861885Sraf 			sqp->sq_info.si_signo = sigevk.sigev_signo;
34870Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
34881885Sraf 			    sigevk.sigev_value.sival_int;
34890Sstevel@tonic-gate 			head->lio_sigqp = sqp;
34900Sstevel@tonic-gate 		} else {
34910Sstevel@tonic-gate 			head->lio_sigqp = NULL;
34920Sstevel@tonic-gate 		}
34931885Sraf 		if (pkevtp) {
34941885Sraf 			/*
34951885Sraf 			 * Prepare data to send when list of aiocb's has
34961885Sraf 			 * completed.
34971885Sraf 			 */
34981885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
34991885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
35001885Sraf 			    NULL, head);
35011885Sraf 			pkevtp->portkev_events = AIOLIO;
35021885Sraf 			head->lio_portkev = pkevtp;
35031885Sraf 			head->lio_port = pnotify.portnfy_port;
35041885Sraf 		}
35050Sstevel@tonic-gate 	}
35060Sstevel@tonic-gate 
35070Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
35080Sstevel@tonic-gate 
35090Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
35100Sstevel@tonic-gate #ifdef	_LP64
35110Sstevel@tonic-gate 		cbp = (aiocb32_t *)(uintptr_t)*ucbp;
35121885Sraf 		if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
35130Sstevel@tonic-gate #else
35140Sstevel@tonic-gate 		cbp = (aiocb_t *)*ucbp;
35151885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
35160Sstevel@tonic-gate #endif
35171885Sraf 		{
35180Sstevel@tonic-gate 			if (head) {
35190Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35200Sstevel@tonic-gate 				head->lio_nent--;
35210Sstevel@tonic-gate 				head->lio_refcnt--;
35220Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35230Sstevel@tonic-gate 			}
35240Sstevel@tonic-gate 			continue;
35250Sstevel@tonic-gate 		}
35260Sstevel@tonic-gate #ifdef	_LP64
35270Sstevel@tonic-gate 		/*
35280Sstevel@tonic-gate 		 * copy 32 bit structure into 64 bit structure
35290Sstevel@tonic-gate 		 */
35300Sstevel@tonic-gate 		aiocb_32ton(aiocb32, aiocb);
35310Sstevel@tonic-gate #endif /* _LP64 */
35320Sstevel@tonic-gate 
35330Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
35340Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
35350Sstevel@tonic-gate 		if (mode == LIO_NOP) {
35360Sstevel@tonic-gate 			cbp = NULL;
35370Sstevel@tonic-gate 			if (head) {
35380Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35390Sstevel@tonic-gate 				head->lio_nent--;
35400Sstevel@tonic-gate 				head->lio_refcnt--;
35410Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35420Sstevel@tonic-gate 			}
35430Sstevel@tonic-gate 			continue;
35440Sstevel@tonic-gate 		}
35450Sstevel@tonic-gate 
35460Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
35470Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
35480Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35490Sstevel@tonic-gate 			if (head) {
35500Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35510Sstevel@tonic-gate 				head->lio_nent--;
35520Sstevel@tonic-gate 				head->lio_refcnt--;
35530Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35540Sstevel@tonic-gate 			}
35550Sstevel@tonic-gate 			aio_errors++;
35560Sstevel@tonic-gate 			continue;
35570Sstevel@tonic-gate 		}
35580Sstevel@tonic-gate 
35590Sstevel@tonic-gate 		/*
35600Sstevel@tonic-gate 		 * check the permission of the partition
35610Sstevel@tonic-gate 		 */
35620Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
35630Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
35640Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35650Sstevel@tonic-gate 			if (head) {
35660Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35670Sstevel@tonic-gate 				head->lio_nent--;
35680Sstevel@tonic-gate 				head->lio_refcnt--;
35690Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35700Sstevel@tonic-gate 			}
35710Sstevel@tonic-gate 			aio_errors++;
35720Sstevel@tonic-gate 			continue;
35730Sstevel@tonic-gate 		}
35740Sstevel@tonic-gate 
35750Sstevel@tonic-gate 		/*
35760Sstevel@tonic-gate 		 * common case where requests are to the same fd
35770Sstevel@tonic-gate 		 * for the same r/w operation
35780Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
35790Sstevel@tonic-gate 		 */
35801885Sraf 		vp = fp->f_vnode;
35811885Sraf 		if (fp != prev_fp || mode != prev_mode) {
35820Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
35830Sstevel@tonic-gate 			if (aio_func == NULL) {
35840Sstevel@tonic-gate 				prev_fp = NULL;
35850Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
35861885Sraf 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
35870Sstevel@tonic-gate 				aio_notsupported++;
35880Sstevel@tonic-gate 				if (head) {
35890Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
35900Sstevel@tonic-gate 					head->lio_nent--;
35910Sstevel@tonic-gate 					head->lio_refcnt--;
35920Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
35930Sstevel@tonic-gate 				}
35940Sstevel@tonic-gate 				continue;
35950Sstevel@tonic-gate 			} else {
35960Sstevel@tonic-gate 				prev_fp = fp;
35970Sstevel@tonic-gate 				prev_mode = mode;
35980Sstevel@tonic-gate 			}
35990Sstevel@tonic-gate 		}
36001885Sraf 
36011885Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
36021885Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
36031885Sraf 		if (error) {
36040Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
36050Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
36060Sstevel@tonic-gate 			if (head) {
36070Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
36080Sstevel@tonic-gate 				head->lio_nent--;
36090Sstevel@tonic-gate 				head->lio_refcnt--;
36100Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
36110Sstevel@tonic-gate 			}
36120Sstevel@tonic-gate 			aio_errors++;
36130Sstevel@tonic-gate 			continue;
36140Sstevel@tonic-gate 		}
36150Sstevel@tonic-gate 
36160Sstevel@tonic-gate 		reqp->aio_req_lio = head;
36170Sstevel@tonic-gate 		deadhead = 0;
36180Sstevel@tonic-gate 
36190Sstevel@tonic-gate 		/*
36200Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
36210Sstevel@tonic-gate 		 * the driver to avoid a race condition
36220Sstevel@tonic-gate 		 */
36230Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
36240Sstevel@tonic-gate 		    EINPROGRESS);
36250Sstevel@tonic-gate 
36261885Sraf 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
36271885Sraf 
36281885Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
36291885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
36301885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
36311885Sraf 		if (aio_port | aio_thread) {
36321885Sraf 			port_kevent_t *lpkevp;
36331885Sraf 			/*
36341885Sraf 			 * Prepare data to send with each aiocb completed.
36351885Sraf 			 */
36360Sstevel@tonic-gate #ifdef _LP64
36371885Sraf 			if (aio_port) {
36381885Sraf 				void *paddr = (void  *)(uintptr_t)
36391885Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
36401885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
36411885Sraf 					error = EFAULT;
36421885Sraf 			} else {	/* aio_thread */
36431885Sraf 				pnotify.portnfy_port =
36441885Sraf 				    aiocb32->aio_sigevent.sigev_signo;
36451885Sraf 				pnotify.portnfy_user =
36461885Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
36471885Sraf 			}
36480Sstevel@tonic-gate #else
36491885Sraf 			if (aio_port) {
36501885Sraf 				void *paddr =
36511885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
36521885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
36531885Sraf 					error = EFAULT;
36541885Sraf 			} else {	/* aio_thread */
36551885Sraf 				pnotify.portnfy_port =
36561885Sraf 				    aiocb->aio_sigevent.sigev_signo;
36571885Sraf 				pnotify.portnfy_user =
36581885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
36591885Sraf 			}
36600Sstevel@tonic-gate #endif
36611885Sraf 			if (error)
36621885Sraf 				/* EMPTY */;
36631885Sraf 			else if (pkevtp != NULL &&
36641885Sraf 			    pnotify.portnfy_port == lio_head_port)
36651885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
36661885Sraf 				    PORT_ALLOC_DEFAULT);
36671885Sraf 			else
36681885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
36691885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
36701885Sraf 				    &lpkevp);
36711885Sraf 			if (error == 0) {
36721885Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
36731885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
36741885Sraf 				    aio_port_callback, reqp);
36751885Sraf 				lpkevp->portkev_events = event;
36761885Sraf 				reqp->aio_req_portkev = lpkevp;
36771885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
36781885Sraf 			}
36790Sstevel@tonic-gate 		}
36800Sstevel@tonic-gate 
36810Sstevel@tonic-gate 		/*
36820Sstevel@tonic-gate 		 * send the request to driver.
36830Sstevel@tonic-gate 		 */
36840Sstevel@tonic-gate 		if (error == 0) {
36850Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
36860Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
36870Sstevel@tonic-gate 				aio_zerolen(reqp);
36880Sstevel@tonic-gate 				continue;
36890Sstevel@tonic-gate 			}
36900Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
36910Sstevel@tonic-gate 			    CRED());
36920Sstevel@tonic-gate 		}
36930Sstevel@tonic-gate 
36940Sstevel@tonic-gate 		/*
36950Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
36960Sstevel@tonic-gate 		 * completed unless there was an error.
36970Sstevel@tonic-gate 		 */
36980Sstevel@tonic-gate 		if (error) {
36990Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
37000Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
37010Sstevel@tonic-gate 			if (head) {
37020Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
37030Sstevel@tonic-gate 				head->lio_nent--;
37040Sstevel@tonic-gate 				head->lio_refcnt--;
37050Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37060Sstevel@tonic-gate 			}
37070Sstevel@tonic-gate 			if (error == ENOTSUP)
37080Sstevel@tonic-gate 				aio_notsupported++;
37090Sstevel@tonic-gate 			else
37100Sstevel@tonic-gate 				aio_errors++;
37110Sstevel@tonic-gate 			lio_set_error(reqp);
37120Sstevel@tonic-gate 		} else {
37130Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
37140Sstevel@tonic-gate 		}
37150Sstevel@tonic-gate 	}
37160Sstevel@tonic-gate 
37170Sstevel@tonic-gate 	if (aio_notsupported) {
37180Sstevel@tonic-gate 		error = ENOTSUP;
37190Sstevel@tonic-gate 	} else if (aio_errors) {
37200Sstevel@tonic-gate 		/*
37210Sstevel@tonic-gate 		 * return EIO if any request failed
37220Sstevel@tonic-gate 		 */
37230Sstevel@tonic-gate 		error = EIO;
37240Sstevel@tonic-gate 	}
37250Sstevel@tonic-gate 
37260Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
37270Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
37280Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
37290Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
37300Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37310Sstevel@tonic-gate 				error = EINTR;
37320Sstevel@tonic-gate 				goto done;
37330Sstevel@tonic-gate 			}
37340Sstevel@tonic-gate 		}
37350Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
37360Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
37370Sstevel@tonic-gate 	}
37380Sstevel@tonic-gate 
37390Sstevel@tonic-gate done:
37400Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
37410Sstevel@tonic-gate 	if (deadhead) {
37420Sstevel@tonic-gate 		if (head->lio_sigqp)
37430Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
37441885Sraf 		if (head->lio_portkev)
37451885Sraf 			port_free_event(head->lio_portkev);
37460Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
37470Sstevel@tonic-gate 	}
37480Sstevel@tonic-gate 	return (error);
37490Sstevel@tonic-gate }
37500Sstevel@tonic-gate 
37510Sstevel@tonic-gate 
37520Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
37530Sstevel@tonic-gate void
37540Sstevel@tonic-gate aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
37550Sstevel@tonic-gate {
37560Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
37570Sstevel@tonic-gate 	dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
37580Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
37590Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
37600Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
37610Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
37620Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
37630Sstevel@tonic-gate 
37640Sstevel@tonic-gate 	/*
37650Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
37660Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
37670Sstevel@tonic-gate 	 */
37680Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
37690Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
37700Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
37710Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
37720Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
37730Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
37740Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
37750Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
37760Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
37770Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
37780Sstevel@tonic-gate }
37790Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
37800Sstevel@tonic-gate 
37810Sstevel@tonic-gate /*
37820Sstevel@tonic-gate  * aio_port_callback() is called just before the event is retrieved from the
37830Sstevel@tonic-gate  * port. The task of this callback function is to finish the work of the
37840Sstevel@tonic-gate  * transaction for the application, it means :
37850Sstevel@tonic-gate  * - copyout transaction data to the application
37860Sstevel@tonic-gate  *	(this thread is running in the right process context)
37870Sstevel@tonic-gate  * - keep trace of the transaction (update of counters).
37880Sstevel@tonic-gate  * - free allocated buffers
37890Sstevel@tonic-gate  * The aiocb pointer is the object element of the port_kevent_t structure.
37900Sstevel@tonic-gate  *
37910Sstevel@tonic-gate  * flag :
37920Sstevel@tonic-gate  *	PORT_CALLBACK_DEFAULT : do copyout and free resources
37930Sstevel@tonic-gate  *	PORT_CALLBACK_CLOSE   : don't do copyout, free resources
37940Sstevel@tonic-gate  */
37950Sstevel@tonic-gate 
37960Sstevel@tonic-gate /*ARGSUSED*/
37970Sstevel@tonic-gate int
37980Sstevel@tonic-gate aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
37990Sstevel@tonic-gate {
38000Sstevel@tonic-gate 	aio_t		*aiop = curproc->p_aio;
38010Sstevel@tonic-gate 	aio_req_t	*reqp = arg;
38020Sstevel@tonic-gate 	struct	iovec	*iov;
38030Sstevel@tonic-gate 	struct	buf	*bp;
38040Sstevel@tonic-gate 	void		*resultp;
38050Sstevel@tonic-gate 
38060Sstevel@tonic-gate 	if (pid != curproc->p_pid) {
38070Sstevel@tonic-gate 		/* wrong proc !!, can not deliver data here ... */
38080Sstevel@tonic-gate 		return (EACCES);
38090Sstevel@tonic-gate 	}
38100Sstevel@tonic-gate 
38110Sstevel@tonic-gate 	mutex_enter(&aiop->aio_portq_mutex);
38120Sstevel@tonic-gate 	reqp->aio_req_portkev = NULL;
38130Sstevel@tonic-gate 	aio_req_remove_portq(aiop, reqp); /* remove request from portq */
38140Sstevel@tonic-gate 	mutex_exit(&aiop->aio_portq_mutex);
38150Sstevel@tonic-gate 	aphysio_unlock(reqp);		/* unlock used pages */
38160Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
38170Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
38180Sstevel@tonic-gate 		aio_req_free_port(aiop, reqp);	/* back to free list */
38190Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
38200Sstevel@tonic-gate 		return (0);
38210Sstevel@tonic-gate 	}
38220Sstevel@tonic-gate 
38230Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
38240Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
38250Sstevel@tonic-gate 	resultp = (void *)reqp->aio_req_resultp;
38260Sstevel@tonic-gate 	aio_req_free_port(aiop, reqp);	/* request struct back to free list */
38270Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
38280Sstevel@tonic-gate 	if (flag == PORT_CALLBACK_DEFAULT)
38290Sstevel@tonic-gate 		aio_copyout_result_port(iov, bp, resultp);
38300Sstevel@tonic-gate 	return (0);
38310Sstevel@tonic-gate }
3832