xref: /onnv-gate/usr/src/uts/common/os/aio.c (revision 10719:203f5727fba9)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51885Sraf  * Common Development and Distribution License (the "License").
61885Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211885Sraf 
220Sstevel@tonic-gate /*
238519SVamsi.Krishna@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate  * Kernel asynchronous I/O.
290Sstevel@tonic-gate  * This is only for raw devices now (as of Nov. 1993).
300Sstevel@tonic-gate  */
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #include <sys/types.h>
330Sstevel@tonic-gate #include <sys/errno.h>
340Sstevel@tonic-gate #include <sys/conf.h>
350Sstevel@tonic-gate #include <sys/file.h>
360Sstevel@tonic-gate #include <sys/fs/snode.h>
370Sstevel@tonic-gate #include <sys/unistd.h>
380Sstevel@tonic-gate #include <sys/cmn_err.h>
390Sstevel@tonic-gate #include <vm/as.h>
400Sstevel@tonic-gate #include <vm/faultcode.h>
410Sstevel@tonic-gate #include <sys/sysmacros.h>
420Sstevel@tonic-gate #include <sys/procfs.h>
430Sstevel@tonic-gate #include <sys/kmem.h>
440Sstevel@tonic-gate #include <sys/autoconf.h>
450Sstevel@tonic-gate #include <sys/ddi_impldefs.h>
460Sstevel@tonic-gate #include <sys/sunddi.h>
470Sstevel@tonic-gate #include <sys/aio_impl.h>
480Sstevel@tonic-gate #include <sys/debug.h>
490Sstevel@tonic-gate #include <sys/param.h>
500Sstevel@tonic-gate #include <sys/systm.h>
510Sstevel@tonic-gate #include <sys/vmsystm.h>
520Sstevel@tonic-gate #include <sys/fs/pxfs_ki.h>
530Sstevel@tonic-gate #include <sys/contract/process_impl.h>
540Sstevel@tonic-gate 
550Sstevel@tonic-gate /*
560Sstevel@tonic-gate  * external entry point.
570Sstevel@tonic-gate  */
580Sstevel@tonic-gate #ifdef _LP64
590Sstevel@tonic-gate static int64_t kaioc(long, long, long, long, long, long);
600Sstevel@tonic-gate #endif
610Sstevel@tonic-gate static int kaio(ulong_t *, rval_t *);
620Sstevel@tonic-gate 
630Sstevel@tonic-gate 
640Sstevel@tonic-gate #define	AIO_64	0
650Sstevel@tonic-gate #define	AIO_32	1
660Sstevel@tonic-gate #define	AIO_LARGEFILE	2
670Sstevel@tonic-gate 
680Sstevel@tonic-gate /*
690Sstevel@tonic-gate  * implementation specific functions (private)
700Sstevel@tonic-gate  */
710Sstevel@tonic-gate #ifdef _LP64
721885Sraf static int alio(int, aiocb_t **, int, struct sigevent *);
730Sstevel@tonic-gate #endif
740Sstevel@tonic-gate static int aionotify(void);
750Sstevel@tonic-gate static int aioinit(void);
760Sstevel@tonic-gate static int aiostart(void);
770Sstevel@tonic-gate static void alio_cleanup(aio_t *, aiocb_t **, int, int);
780Sstevel@tonic-gate static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
790Sstevel@tonic-gate     cred_t *);
804502Spraks static void lio_set_error(aio_req_t *, int portused);
810Sstevel@tonic-gate static aio_t *aio_aiop_alloc();
820Sstevel@tonic-gate static int aio_req_alloc(aio_req_t **, aio_result_t *);
830Sstevel@tonic-gate static int aio_lio_alloc(aio_lio_t **);
840Sstevel@tonic-gate static aio_req_t *aio_req_done(void *);
850Sstevel@tonic-gate static aio_req_t *aio_req_remove(aio_req_t *);
860Sstevel@tonic-gate static int aio_req_find(aio_result_t *, aio_req_t **);
870Sstevel@tonic-gate static int aio_hash_insert(struct aio_req_t *, aio_t *);
880Sstevel@tonic-gate static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
89*10719SRoger.Faulkner@Sun.COM     aio_result_t *, vnode_t *, int);
900Sstevel@tonic-gate static int aio_cleanup_thread(aio_t *);
910Sstevel@tonic-gate static aio_lio_t *aio_list_get(aio_result_t *);
920Sstevel@tonic-gate static void lio_set_uerror(void *, int);
930Sstevel@tonic-gate extern void aio_zerolen(aio_req_t *);
940Sstevel@tonic-gate static int aiowait(struct timeval *, int, long	*);
950Sstevel@tonic-gate static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
960Sstevel@tonic-gate static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
970Sstevel@tonic-gate     aio_req_t *reqlist, aio_t *aiop, model_t model);
980Sstevel@tonic-gate static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
990Sstevel@tonic-gate static int aiosuspend(void *, int, struct  timespec *, int,
1000Sstevel@tonic-gate     long	*, int);
1010Sstevel@tonic-gate static int aliowait(int, void *, int, void *, int);
1020Sstevel@tonic-gate static int aioerror(void *, int);
1030Sstevel@tonic-gate static int aio_cancel(int, void *, long	*, int);
1040Sstevel@tonic-gate static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
1050Sstevel@tonic-gate static int aiorw(int, void *, int, int);
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate static int alioLF(int, void *, int, void *);
1081885Sraf static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
109*10719SRoger.Faulkner@Sun.COM     aio_result_t *, vnode_t *, int);
1100Sstevel@tonic-gate static int alio32(int, void *, int, void *);
1110Sstevel@tonic-gate static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1120Sstevel@tonic-gate static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1130Sstevel@tonic-gate 
1140Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1150Sstevel@tonic-gate static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
1160Sstevel@tonic-gate void	aiocb_32ton(aiocb32_t *, aiocb_t *);
1170Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate  * implementation specific functions (external)
1210Sstevel@tonic-gate  */
1220Sstevel@tonic-gate void aio_req_free(aio_t *, aio_req_t *);
1230Sstevel@tonic-gate 
1240Sstevel@tonic-gate /*
1250Sstevel@tonic-gate  * Event Port framework
1260Sstevel@tonic-gate  */
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate void aio_req_free_port(aio_t *, aio_req_t *);
1290Sstevel@tonic-gate static int aio_port_callback(void *, int *, pid_t, int, void *);
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate /*
1320Sstevel@tonic-gate  * This is the loadable module wrapper.
1330Sstevel@tonic-gate  */
1340Sstevel@tonic-gate #include <sys/modctl.h>
1350Sstevel@tonic-gate #include <sys/syscall.h>
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate #ifdef _LP64
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate static struct sysent kaio_sysent = {
1400Sstevel@tonic-gate 	6,
1410Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
1420Sstevel@tonic-gate 	(int (*)())kaioc
1430Sstevel@tonic-gate };
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1460Sstevel@tonic-gate static struct sysent kaio_sysent32 = {
1470Sstevel@tonic-gate 	7,
1480Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL,
1490Sstevel@tonic-gate 	kaio
1500Sstevel@tonic-gate };
1510Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate #else   /* _LP64 */
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate static struct sysent kaio_sysent = {
1560Sstevel@tonic-gate 	7,
1570Sstevel@tonic-gate 	SE_NOUNLOAD | SE_32RVAL1,
1580Sstevel@tonic-gate 	kaio
1590Sstevel@tonic-gate };
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate #endif  /* _LP64 */
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate /*
1640Sstevel@tonic-gate  * Module linkage information for the kernel.
1650Sstevel@tonic-gate  */
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate static struct modlsys modlsys = {
1680Sstevel@tonic-gate 	&mod_syscallops,
1690Sstevel@tonic-gate 	"kernel Async I/O",
1700Sstevel@tonic-gate 	&kaio_sysent
1710Sstevel@tonic-gate };
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1740Sstevel@tonic-gate static struct modlsys modlsys32 = {
1750Sstevel@tonic-gate 	&mod_syscallops32,
1760Sstevel@tonic-gate 	"kernel Async I/O for 32 bit compatibility",
1770Sstevel@tonic-gate 	&kaio_sysent32
1780Sstevel@tonic-gate };
1790Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate 
1820Sstevel@tonic-gate static struct modlinkage modlinkage = {
1830Sstevel@tonic-gate 	MODREV_1,
1840Sstevel@tonic-gate 	&modlsys,
1850Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1860Sstevel@tonic-gate 	&modlsys32,
1870Sstevel@tonic-gate #endif
1880Sstevel@tonic-gate 	NULL
1890Sstevel@tonic-gate };
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate int
_init(void)1920Sstevel@tonic-gate _init(void)
1930Sstevel@tonic-gate {
1940Sstevel@tonic-gate 	int retval;
1950Sstevel@tonic-gate 
1960Sstevel@tonic-gate 	if ((retval = mod_install(&modlinkage)) != 0)
1970Sstevel@tonic-gate 		return (retval);
1980Sstevel@tonic-gate 
1990Sstevel@tonic-gate 	return (0);
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate int
_fini(void)2030Sstevel@tonic-gate _fini(void)
2040Sstevel@tonic-gate {
2050Sstevel@tonic-gate 	int retval;
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate 	retval = mod_remove(&modlinkage);
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate 	return (retval);
2100Sstevel@tonic-gate }
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate int
_info(struct modinfo * modinfop)2130Sstevel@tonic-gate _info(struct modinfo *modinfop)
2140Sstevel@tonic-gate {
2150Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
2160Sstevel@tonic-gate }
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate #ifdef	_LP64
2190Sstevel@tonic-gate static int64_t
kaioc(long a0,long a1,long a2,long a3,long a4,long a5)2200Sstevel@tonic-gate kaioc(
2210Sstevel@tonic-gate 	long	a0,
2220Sstevel@tonic-gate 	long	a1,
2230Sstevel@tonic-gate 	long	a2,
2240Sstevel@tonic-gate 	long	a3,
2250Sstevel@tonic-gate 	long	a4,
2260Sstevel@tonic-gate 	long	a5)
2270Sstevel@tonic-gate {
2280Sstevel@tonic-gate 	int	error;
2290Sstevel@tonic-gate 	long	rval = 0;
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate 	switch ((int)a0 & ~AIO_POLL_BIT) {
2320Sstevel@tonic-gate 	case AIOREAD:
2330Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2340Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FREAD);
2350Sstevel@tonic-gate 		break;
2360Sstevel@tonic-gate 	case AIOWRITE:
2370Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2380Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FWRITE);
2390Sstevel@tonic-gate 		break;
2400Sstevel@tonic-gate 	case AIOWAIT:
2410Sstevel@tonic-gate 		error = aiowait((struct timeval *)a1, (int)a2, &rval);
2420Sstevel@tonic-gate 		break;
2430Sstevel@tonic-gate 	case AIOWAITN:
2440Sstevel@tonic-gate 		error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
2450Sstevel@tonic-gate 		    (timespec_t *)a4);
2460Sstevel@tonic-gate 		break;
2470Sstevel@tonic-gate 	case AIONOTIFY:
2480Sstevel@tonic-gate 		error = aionotify();
2490Sstevel@tonic-gate 		break;
2500Sstevel@tonic-gate 	case AIOINIT:
2510Sstevel@tonic-gate 		error = aioinit();
2520Sstevel@tonic-gate 		break;
2530Sstevel@tonic-gate 	case AIOSTART:
2540Sstevel@tonic-gate 		error = aiostart();
2550Sstevel@tonic-gate 		break;
2560Sstevel@tonic-gate 	case AIOLIO:
2571885Sraf 		error = alio((int)a1, (aiocb_t **)a2, (int)a3,
2580Sstevel@tonic-gate 		    (struct sigevent *)a4);
2590Sstevel@tonic-gate 		break;
2600Sstevel@tonic-gate 	case AIOLIOWAIT:
2610Sstevel@tonic-gate 		error = aliowait((int)a1, (void *)a2, (int)a3,
2620Sstevel@tonic-gate 		    (struct sigevent *)a4, AIO_64);
2630Sstevel@tonic-gate 		break;
2640Sstevel@tonic-gate 	case AIOSUSPEND:
2650Sstevel@tonic-gate 		error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
2660Sstevel@tonic-gate 		    (int)a4, &rval, AIO_64);
2670Sstevel@tonic-gate 		break;
2680Sstevel@tonic-gate 	case AIOERROR:
2690Sstevel@tonic-gate 		error = aioerror((void *)a1, AIO_64);
2700Sstevel@tonic-gate 		break;
2710Sstevel@tonic-gate 	case AIOAREAD:
2720Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
2730Sstevel@tonic-gate 		break;
2740Sstevel@tonic-gate 	case AIOAWRITE:
2750Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
2760Sstevel@tonic-gate 		break;
2770Sstevel@tonic-gate 	case AIOCANCEL:
2780Sstevel@tonic-gate 		error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
2790Sstevel@tonic-gate 		break;
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 	/*
2820Sstevel@tonic-gate 	 * The large file related stuff is valid only for
2830Sstevel@tonic-gate 	 * 32 bit kernel and not for 64 bit kernel
2840Sstevel@tonic-gate 	 * On 64 bit kernel we convert large file calls
2850Sstevel@tonic-gate 	 * to regular 64bit calls.
2860Sstevel@tonic-gate 	 */
2870Sstevel@tonic-gate 
2880Sstevel@tonic-gate 	default:
2890Sstevel@tonic-gate 		error = EINVAL;
2900Sstevel@tonic-gate 	}
2910Sstevel@tonic-gate 	if (error)
2920Sstevel@tonic-gate 		return ((int64_t)set_errno(error));
2930Sstevel@tonic-gate 	return (rval);
2940Sstevel@tonic-gate }
2950Sstevel@tonic-gate #endif
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate static int
kaio(ulong_t * uap,rval_t * rvp)2980Sstevel@tonic-gate kaio(
2990Sstevel@tonic-gate 	ulong_t *uap,
3000Sstevel@tonic-gate 	rval_t *rvp)
3010Sstevel@tonic-gate {
3020Sstevel@tonic-gate 	long rval = 0;
3030Sstevel@tonic-gate 	int	error = 0;
3040Sstevel@tonic-gate 	offset_t	off;
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate 
3070Sstevel@tonic-gate 		rvp->r_vals = 0;
3080Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
3090Sstevel@tonic-gate 	off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
3100Sstevel@tonic-gate #else
3110Sstevel@tonic-gate 	off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
3120Sstevel@tonic-gate #endif
3130Sstevel@tonic-gate 
3140Sstevel@tonic-gate 	switch (uap[0] & ~AIO_POLL_BIT) {
3150Sstevel@tonic-gate 	/*
3160Sstevel@tonic-gate 	 * It must be the 32 bit system call on 64 bit kernel
3170Sstevel@tonic-gate 	 */
3180Sstevel@tonic-gate 	case AIOREAD:
3190Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3200Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
3210Sstevel@tonic-gate 	case AIOWRITE:
3220Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3230Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
3240Sstevel@tonic-gate 	case AIOWAIT:
3250Sstevel@tonic-gate 		error = aiowait((struct	timeval *)uap[1], (int)uap[2],
3260Sstevel@tonic-gate 		    &rval);
3270Sstevel@tonic-gate 		break;
3280Sstevel@tonic-gate 	case AIOWAITN:
3290Sstevel@tonic-gate 		error = aiowaitn((void *)uap[1], (uint_t)uap[2],
3300Sstevel@tonic-gate 		    (uint_t *)uap[3], (timespec_t *)uap[4]);
3310Sstevel@tonic-gate 		break;
3320Sstevel@tonic-gate 	case AIONOTIFY:
3330Sstevel@tonic-gate 		return (aionotify());
3340Sstevel@tonic-gate 	case AIOINIT:
3350Sstevel@tonic-gate 		return (aioinit());
3360Sstevel@tonic-gate 	case AIOSTART:
3370Sstevel@tonic-gate 		return (aiostart());
3380Sstevel@tonic-gate 	case AIOLIO:
3390Sstevel@tonic-gate 		return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
3400Sstevel@tonic-gate 		    (void *)uap[4]));
3410Sstevel@tonic-gate 	case AIOLIOWAIT:
3420Sstevel@tonic-gate 		return (aliowait((int)uap[1], (void *)uap[2],
3430Sstevel@tonic-gate 		    (int)uap[3], (struct sigevent *)uap[4], AIO_32));
3440Sstevel@tonic-gate 	case AIOSUSPEND:
3450Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3460Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4],
3470Sstevel@tonic-gate 		    &rval, AIO_32);
3480Sstevel@tonic-gate 		break;
3490Sstevel@tonic-gate 	case AIOERROR:
3500Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_32));
3510Sstevel@tonic-gate 	case AIOAREAD:
3520Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3530Sstevel@tonic-gate 		    FREAD, AIO_32));
3540Sstevel@tonic-gate 	case AIOAWRITE:
3550Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3560Sstevel@tonic-gate 		    FWRITE, AIO_32));
3570Sstevel@tonic-gate 	case AIOCANCEL:
3580Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
3590Sstevel@tonic-gate 		    AIO_32));
3600Sstevel@tonic-gate 		break;
3610Sstevel@tonic-gate 	case AIOLIO64:
3620Sstevel@tonic-gate 		return (alioLF((int)uap[1], (void *)uap[2],
3630Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4]));
3640Sstevel@tonic-gate 	case AIOLIOWAIT64:
3650Sstevel@tonic-gate 		return (aliowait(uap[1], (void *)uap[2],
3660Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
3670Sstevel@tonic-gate 	case AIOSUSPEND64:
3680Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3690Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4], &rval,
3700Sstevel@tonic-gate 		    AIO_LARGEFILE);
3710Sstevel@tonic-gate 		break;
3720Sstevel@tonic-gate 	case AIOERROR64:
3730Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_LARGEFILE));
3740Sstevel@tonic-gate 	case AIOAREAD64:
3750Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FREAD,
3760Sstevel@tonic-gate 		    AIO_LARGEFILE));
3770Sstevel@tonic-gate 	case AIOAWRITE64:
3780Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
3790Sstevel@tonic-gate 		    AIO_LARGEFILE));
3800Sstevel@tonic-gate 	case AIOCANCEL64:
3810Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2],
3820Sstevel@tonic-gate 		    &rval, AIO_LARGEFILE));
3830Sstevel@tonic-gate 		break;
3840Sstevel@tonic-gate 	default:
3850Sstevel@tonic-gate 		return (EINVAL);
3860Sstevel@tonic-gate 	}
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 	rvp->r_val1 = rval;
3890Sstevel@tonic-gate 	return (error);
3900Sstevel@tonic-gate }
3910Sstevel@tonic-gate 
3920Sstevel@tonic-gate /*
3930Sstevel@tonic-gate  * wake up LWPs in this process that are sleeping in
3940Sstevel@tonic-gate  * aiowait().
3950Sstevel@tonic-gate  */
3960Sstevel@tonic-gate static int
aionotify(void)3970Sstevel@tonic-gate aionotify(void)
3980Sstevel@tonic-gate {
3990Sstevel@tonic-gate 	aio_t	*aiop;
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate 	aiop = curproc->p_aio;
4020Sstevel@tonic-gate 	if (aiop == NULL)
4030Sstevel@tonic-gate 		return (0);
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
4060Sstevel@tonic-gate 	aiop->aio_notifycnt++;
4070Sstevel@tonic-gate 	cv_broadcast(&aiop->aio_waitcv);
4080Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 	return (0);
4110Sstevel@tonic-gate }
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate static int
timeval2reltime(struct timeval * timout,timestruc_t * rqtime,timestruc_t ** rqtp,int * blocking)4140Sstevel@tonic-gate timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
4150Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4160Sstevel@tonic-gate {
4170Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4180Sstevel@tonic-gate 	struct timeval32 wait_time_32;
4190Sstevel@tonic-gate #endif
4200Sstevel@tonic-gate 	struct timeval wait_time;
4210Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 	*rqtp = NULL;
4240Sstevel@tonic-gate 	if (timout == NULL) {		/* wait indefinitely */
4250Sstevel@tonic-gate 		*blocking = 1;
4260Sstevel@tonic-gate 		return (0);
4270Sstevel@tonic-gate 	}
4280Sstevel@tonic-gate 
4290Sstevel@tonic-gate 	/*
4300Sstevel@tonic-gate 	 * Need to correctly compare with the -1 passed in for a user
4310Sstevel@tonic-gate 	 * address pointer, with both 32 bit and 64 bit apps.
4320Sstevel@tonic-gate 	 */
4330Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4340Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)-1) {	/* don't wait */
4350Sstevel@tonic-gate 			*blocking = 0;
4360Sstevel@tonic-gate 			return (0);
4370Sstevel@tonic-gate 		}
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 		if (copyin(timout, &wait_time, sizeof (wait_time)))
4400Sstevel@tonic-gate 			return (EFAULT);
4410Sstevel@tonic-gate 	}
4420Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4430Sstevel@tonic-gate 	else {
4440Sstevel@tonic-gate 		/*
4450Sstevel@tonic-gate 		 * -1 from a 32bit app. It will not get sign extended.
4460Sstevel@tonic-gate 		 * don't wait if -1.
4470Sstevel@tonic-gate 		 */
4480Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
4490Sstevel@tonic-gate 			*blocking = 0;
4500Sstevel@tonic-gate 			return (0);
4510Sstevel@tonic-gate 		}
4520Sstevel@tonic-gate 
4530Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
4540Sstevel@tonic-gate 			return (EFAULT);
4550Sstevel@tonic-gate 		TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
4560Sstevel@tonic-gate 	}
4570Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
4580Sstevel@tonic-gate 
4590Sstevel@tonic-gate 	if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {	/* don't wait */
4600Sstevel@tonic-gate 		*blocking = 0;
4610Sstevel@tonic-gate 		return (0);
4620Sstevel@tonic-gate 	}
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate 	if (wait_time.tv_sec < 0 ||
4650Sstevel@tonic-gate 	    wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
4660Sstevel@tonic-gate 		return (EINVAL);
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 	rqtime->tv_sec = wait_time.tv_sec;
4690Sstevel@tonic-gate 	rqtime->tv_nsec = wait_time.tv_usec * 1000;
4700Sstevel@tonic-gate 	*rqtp = rqtime;
4710Sstevel@tonic-gate 	*blocking = 1;
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate 	return (0);
4740Sstevel@tonic-gate }
4750Sstevel@tonic-gate 
4760Sstevel@tonic-gate static int
timespec2reltime(timespec_t * timout,timestruc_t * rqtime,timestruc_t ** rqtp,int * blocking)4770Sstevel@tonic-gate timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
4780Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4790Sstevel@tonic-gate {
4800Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4810Sstevel@tonic-gate 	timespec32_t wait_time_32;
4820Sstevel@tonic-gate #endif
4830Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate 	*rqtp = NULL;
4860Sstevel@tonic-gate 	if (timout == NULL) {
4870Sstevel@tonic-gate 		*blocking = 1;
4880Sstevel@tonic-gate 		return (0);
4890Sstevel@tonic-gate 	}
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4920Sstevel@tonic-gate 		if (copyin(timout, rqtime, sizeof (*rqtime)))
4930Sstevel@tonic-gate 			return (EFAULT);
4940Sstevel@tonic-gate 	}
4950Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4960Sstevel@tonic-gate 	else {
4970Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
4980Sstevel@tonic-gate 			return (EFAULT);
4990Sstevel@tonic-gate 		TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
5000Sstevel@tonic-gate 	}
5010Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
5040Sstevel@tonic-gate 		*blocking = 0;
5050Sstevel@tonic-gate 		return (0);
5060Sstevel@tonic-gate 	}
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 	if (rqtime->tv_sec < 0 ||
5090Sstevel@tonic-gate 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
5100Sstevel@tonic-gate 		return (EINVAL);
5110Sstevel@tonic-gate 
5120Sstevel@tonic-gate 	*rqtp = rqtime;
5130Sstevel@tonic-gate 	*blocking = 1;
5140Sstevel@tonic-gate 
5150Sstevel@tonic-gate 	return (0);
5160Sstevel@tonic-gate }
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate /*ARGSUSED*/
5190Sstevel@tonic-gate static int
aiowait(struct timeval * timout,int dontblockflg,long * rval)5200Sstevel@tonic-gate aiowait(
5210Sstevel@tonic-gate 	struct timeval	*timout,
5220Sstevel@tonic-gate 	int	dontblockflg,
5230Sstevel@tonic-gate 	long	*rval)
5240Sstevel@tonic-gate {
5250Sstevel@tonic-gate 	int 		error;
5260Sstevel@tonic-gate 	aio_t		*aiop;
5270Sstevel@tonic-gate 	aio_req_t	*reqp;
5280Sstevel@tonic-gate 	clock_t		status;
5290Sstevel@tonic-gate 	int		blocking;
5304123Sdm120769 	int		timecheck;
5310Sstevel@tonic-gate 	timestruc_t	rqtime;
5320Sstevel@tonic-gate 	timestruc_t	*rqtp;
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate 	aiop = curproc->p_aio;
5350Sstevel@tonic-gate 	if (aiop == NULL)
5360Sstevel@tonic-gate 		return (EINVAL);
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate 	/*
5390Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
5400Sstevel@tonic-gate 	 */
5410Sstevel@tonic-gate 	error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
5420Sstevel@tonic-gate 	if (error)
5430Sstevel@tonic-gate 		return (error);
5440Sstevel@tonic-gate 	if (rqtp) {
5450Sstevel@tonic-gate 		timestruc_t now;
5464123Sdm120769 		timecheck = timechanged;
5470Sstevel@tonic-gate 		gethrestime(&now);
5480Sstevel@tonic-gate 		timespecadd(rqtp, &now);
5490Sstevel@tonic-gate 	}
5500Sstevel@tonic-gate 
5510Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
5520Sstevel@tonic-gate 	for (;;) {
5530Sstevel@tonic-gate 		/* process requests on poll queue */
5540Sstevel@tonic-gate 		if (aiop->aio_pollq) {
5550Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
5560Sstevel@tonic-gate 			aio_cleanup(0);
5570Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
5580Sstevel@tonic-gate 		}
5590Sstevel@tonic-gate 		if ((reqp = aio_req_remove(NULL)) != NULL) {
5600Sstevel@tonic-gate 			*rval = (long)reqp->aio_req_resultp;
5610Sstevel@tonic-gate 			break;
5620Sstevel@tonic-gate 		}
5630Sstevel@tonic-gate 		/* user-level done queue might not be empty */
5640Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
5650Sstevel@tonic-gate 			aiop->aio_notifycnt--;
5660Sstevel@tonic-gate 			*rval = 1;
5670Sstevel@tonic-gate 			break;
5680Sstevel@tonic-gate 		}
5690Sstevel@tonic-gate 		/* don't block if no outstanding aio */
5700Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0 && dontblockflg) {
5710Sstevel@tonic-gate 			error = EINVAL;
5720Sstevel@tonic-gate 			break;
5730Sstevel@tonic-gate 		}
5740Sstevel@tonic-gate 		if (blocking) {
5750Sstevel@tonic-gate 			status = cv_waituntil_sig(&aiop->aio_waitcv,
5764123Sdm120769 			    &aiop->aio_mutex, rqtp, timecheck);
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate 			if (status > 0)		/* check done queue again */
5790Sstevel@tonic-gate 				continue;
5800Sstevel@tonic-gate 			if (status == 0) {	/* interrupted by a signal */
5810Sstevel@tonic-gate 				error = EINTR;
5820Sstevel@tonic-gate 				*rval = -1;
5830Sstevel@tonic-gate 			} else {		/* timer expired */
5840Sstevel@tonic-gate 				error = ETIME;
5850Sstevel@tonic-gate 			}
5860Sstevel@tonic-gate 		}
5870Sstevel@tonic-gate 		break;
5880Sstevel@tonic-gate 	}
5890Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
5900Sstevel@tonic-gate 	if (reqp) {
5910Sstevel@tonic-gate 		aphysio_unlock(reqp);
5920Sstevel@tonic-gate 		aio_copyout_result(reqp);
5930Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
5940Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
5950Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
5960Sstevel@tonic-gate 	}
5970Sstevel@tonic-gate 	return (error);
5980Sstevel@tonic-gate }
5990Sstevel@tonic-gate 
6000Sstevel@tonic-gate /*
6010Sstevel@tonic-gate  * aiowaitn can be used to reap completed asynchronous requests submitted with
6020Sstevel@tonic-gate  * lio_listio, aio_read or aio_write.
6030Sstevel@tonic-gate  * This function only reaps asynchronous raw I/Os.
6040Sstevel@tonic-gate  */
6050Sstevel@tonic-gate 
6060Sstevel@tonic-gate /*ARGSUSED*/
6070Sstevel@tonic-gate static int
aiowaitn(void * uiocb,uint_t nent,uint_t * nwait,timespec_t * timout)6080Sstevel@tonic-gate aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
6090Sstevel@tonic-gate {
6100Sstevel@tonic-gate 	int 		error = 0;
6110Sstevel@tonic-gate 	aio_t		*aiop;
6120Sstevel@tonic-gate 	aio_req_t	*reqlist = NULL;
6130Sstevel@tonic-gate 	caddr_t		iocblist = NULL;	/* array of iocb ptr's */
6140Sstevel@tonic-gate 	uint_t		waitcnt, cnt = 0;	/* iocb cnt */
6150Sstevel@tonic-gate 	size_t		iocbsz;			/* users iocb size */
6160Sstevel@tonic-gate 	size_t		riocbsz;		/* returned iocb size */
6170Sstevel@tonic-gate 	int		iocb_index = 0;
6180Sstevel@tonic-gate 	model_t		model = get_udatamodel();
6190Sstevel@tonic-gate 	int		blocking = 1;
6204123Sdm120769 	int		timecheck;
6210Sstevel@tonic-gate 	timestruc_t	rqtime;
6220Sstevel@tonic-gate 	timestruc_t	*rqtp;
6230Sstevel@tonic-gate 
6240Sstevel@tonic-gate 	aiop = curproc->p_aio;
6258519SVamsi.Krishna@Sun.COM 	if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX)
6268519SVamsi.Krishna@Sun.COM 		return (EINVAL);
6278519SVamsi.Krishna@Sun.COM 
6288519SVamsi.Krishna@Sun.COM 	if (aiop->aio_outstanding == 0)
6290Sstevel@tonic-gate 		return (EAGAIN);
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate 	if (copyin(nwait, &waitcnt, sizeof (uint_t)))
6320Sstevel@tonic-gate 		return (EFAULT);
6330Sstevel@tonic-gate 
6340Sstevel@tonic-gate 	/* set *nwait to zero, if we must return prematurely */
6350Sstevel@tonic-gate 	if (copyout(&cnt, nwait, sizeof (uint_t)))
6360Sstevel@tonic-gate 		return (EFAULT);
6370Sstevel@tonic-gate 
6380Sstevel@tonic-gate 	if (waitcnt == 0) {
6390Sstevel@tonic-gate 		blocking = 0;
6400Sstevel@tonic-gate 		rqtp = NULL;
6410Sstevel@tonic-gate 		waitcnt = nent;
6420Sstevel@tonic-gate 	} else {
6430Sstevel@tonic-gate 		error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
6440Sstevel@tonic-gate 		if (error)
6450Sstevel@tonic-gate 			return (error);
6460Sstevel@tonic-gate 	}
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
6490Sstevel@tonic-gate 		iocbsz = (sizeof (aiocb_t *) * nent);
6500Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
6510Sstevel@tonic-gate 	else
6520Sstevel@tonic-gate 		iocbsz = (sizeof (caddr32_t) * nent);
6530Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate 	/*
6560Sstevel@tonic-gate 	 * Only one aio_waitn call is allowed at a time.
6570Sstevel@tonic-gate 	 * The active aio_waitn will collect all requests
6580Sstevel@tonic-gate 	 * out of the "done" list and if necessary it will wait
6590Sstevel@tonic-gate 	 * for some/all pending requests to fulfill the nwait
6600Sstevel@tonic-gate 	 * parameter.
6610Sstevel@tonic-gate 	 * A second or further aio_waitn calls will sleep here
6620Sstevel@tonic-gate 	 * until the active aio_waitn finishes and leaves the kernel
6630Sstevel@tonic-gate 	 * If the second call does not block (poll), then return
6640Sstevel@tonic-gate 	 * immediately with the error code : EAGAIN.
6650Sstevel@tonic-gate 	 * If the second call should block, then sleep here, but
6660Sstevel@tonic-gate 	 * do not touch the timeout. The timeout starts when this
6670Sstevel@tonic-gate 	 * aio_waitn-call becomes active.
6680Sstevel@tonic-gate 	 */
6690Sstevel@tonic-gate 
6700Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	while (aiop->aio_flags & AIO_WAITN) {
6730Sstevel@tonic-gate 		if (blocking == 0) {
6740Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6750Sstevel@tonic-gate 			return (EAGAIN);
6760Sstevel@tonic-gate 		}
6770Sstevel@tonic-gate 
6780Sstevel@tonic-gate 		/* block, no timeout */
6790Sstevel@tonic-gate 		aiop->aio_flags |= AIO_WAITN_PENDING;
6800Sstevel@tonic-gate 		if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
6810Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6820Sstevel@tonic-gate 			return (EINTR);
6830Sstevel@tonic-gate 		}
6840Sstevel@tonic-gate 	}
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 	/*
6870Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
6880Sstevel@tonic-gate 	 */
6890Sstevel@tonic-gate 	if (rqtp) {
6900Sstevel@tonic-gate 		timestruc_t now;
6914123Sdm120769 		timecheck = timechanged;
6920Sstevel@tonic-gate 		gethrestime(&now);
6930Sstevel@tonic-gate 		timespecadd(rqtp, &now);
6940Sstevel@tonic-gate 	}
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate 	if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
6970Sstevel@tonic-gate 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
6980Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
6990Sstevel@tonic-gate 	}
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate 	if (aiop->aio_iocb == NULL) {
7020Sstevel@tonic-gate 		iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
7030Sstevel@tonic-gate 		if (iocblist == NULL) {
7040Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7050Sstevel@tonic-gate 			return (ENOMEM);
7060Sstevel@tonic-gate 		}
7070Sstevel@tonic-gate 		aiop->aio_iocb = (aiocb_t **)iocblist;
7080Sstevel@tonic-gate 		aiop->aio_iocbsz = iocbsz;
7090Sstevel@tonic-gate 	} else {
7100Sstevel@tonic-gate 		iocblist = (char *)aiop->aio_iocb;
7110Sstevel@tonic-gate 	}
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate 	aiop->aio_waitncnt = waitcnt;
7140Sstevel@tonic-gate 	aiop->aio_flags |= AIO_WAITN;
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 	for (;;) {
7170Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
7180Sstevel@tonic-gate 		if (aiop->aio_pollq) {
7190Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7200Sstevel@tonic-gate 			aio_cleanup(0);
7210Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
7220Sstevel@tonic-gate 		}
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate 		/* check for requests on done queue */
7250Sstevel@tonic-gate 		if (aiop->aio_doneq) {
7260Sstevel@tonic-gate 			cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
7270Sstevel@tonic-gate 			aiop->aio_waitncnt = waitcnt - cnt;
7280Sstevel@tonic-gate 		}
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 		/* user-level done queue might not be empty */
7310Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
7320Sstevel@tonic-gate 			aiop->aio_notifycnt--;
7330Sstevel@tonic-gate 			error = 0;
7340Sstevel@tonic-gate 			break;
7350Sstevel@tonic-gate 		}
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 		/*
7380Sstevel@tonic-gate 		 * if we are here second time as a result of timer
7390Sstevel@tonic-gate 		 * expiration, we reset error if there are enough
7400Sstevel@tonic-gate 		 * aiocb's to satisfy request.
7410Sstevel@tonic-gate 		 * We return also if all requests are already done
7420Sstevel@tonic-gate 		 * and we picked up the whole done queue.
7430Sstevel@tonic-gate 		 */
7440Sstevel@tonic-gate 
7450Sstevel@tonic-gate 		if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
7460Sstevel@tonic-gate 		    aiop->aio_doneq == NULL)) {
7470Sstevel@tonic-gate 			error = 0;
7480Sstevel@tonic-gate 			break;
7490Sstevel@tonic-gate 		}
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 		if ((cnt < waitcnt) && blocking) {
7520Sstevel@tonic-gate 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
7534502Spraks 			    &aiop->aio_mutex, rqtp, timecheck);
7540Sstevel@tonic-gate 			if (rval > 0)
7550Sstevel@tonic-gate 				continue;
7560Sstevel@tonic-gate 			if (rval < 0) {
7570Sstevel@tonic-gate 				error = ETIME;
7580Sstevel@tonic-gate 				blocking = 0;
7590Sstevel@tonic-gate 				continue;
7600Sstevel@tonic-gate 			}
7610Sstevel@tonic-gate 			error = EINTR;
7620Sstevel@tonic-gate 		}
7630Sstevel@tonic-gate 		break;
7640Sstevel@tonic-gate 	}
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	if (cnt > 0) {
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate 		iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
7710Sstevel@tonic-gate 		    aiop, model);
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE)
7740Sstevel@tonic-gate 			riocbsz = (sizeof (aiocb_t *) * cnt);
7750Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
7760Sstevel@tonic-gate 		else
7770Sstevel@tonic-gate 			riocbsz = (sizeof (caddr32_t) * cnt);
7780Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate 		if (copyout(iocblist, uiocb, riocbsz) ||
7810Sstevel@tonic-gate 		    copyout(&cnt, nwait, sizeof (uint_t)))
7820Sstevel@tonic-gate 			error = EFAULT;
7830Sstevel@tonic-gate 	}
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 	/* check if there is another thread waiting for execution */
7860Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
7870Sstevel@tonic-gate 	aiop->aio_flags &= ~AIO_WAITN;
7880Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_WAITN_PENDING) {
7890Sstevel@tonic-gate 		aiop->aio_flags &= ~AIO_WAITN_PENDING;
7900Sstevel@tonic-gate 		cv_signal(&aiop->aio_waitncv);
7910Sstevel@tonic-gate 	}
7920Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7930Sstevel@tonic-gate 
7940Sstevel@tonic-gate 	return (error);
7950Sstevel@tonic-gate }
7960Sstevel@tonic-gate 
7970Sstevel@tonic-gate /*
7980Sstevel@tonic-gate  * aio_unlock_requests
7990Sstevel@tonic-gate  * copyouts the result of the request as well as the return value.
8000Sstevel@tonic-gate  * It builds the list of completed asynchronous requests,
8010Sstevel@tonic-gate  * unlocks the allocated memory ranges and
8020Sstevel@tonic-gate  * put the aio request structure back into the free list.
8030Sstevel@tonic-gate  */
8040Sstevel@tonic-gate 
8050Sstevel@tonic-gate static int
aio_unlock_requests(caddr_t iocblist,int iocb_index,aio_req_t * reqlist,aio_t * aiop,model_t model)8060Sstevel@tonic-gate aio_unlock_requests(
8070Sstevel@tonic-gate 	caddr_t	iocblist,
8080Sstevel@tonic-gate 	int	iocb_index,
8090Sstevel@tonic-gate 	aio_req_t *reqlist,
8100Sstevel@tonic-gate 	aio_t	*aiop,
8110Sstevel@tonic-gate 	model_t	model)
8120Sstevel@tonic-gate {
8130Sstevel@tonic-gate 	aio_req_t	*reqp, *nreqp;
8140Sstevel@tonic-gate 
8150Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
8160Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8170Sstevel@tonic-gate 			(((caddr_t *)iocblist)[iocb_index++]) =
8180Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb;
8190Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8200Sstevel@tonic-gate 			aphysio_unlock(reqp);
8210Sstevel@tonic-gate 			aio_copyout_result(reqp);
8220Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8230Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8240Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8250Sstevel@tonic-gate 		}
8260Sstevel@tonic-gate 	}
8270Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
8280Sstevel@tonic-gate 	else {
8290Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8300Sstevel@tonic-gate 			((caddr32_t *)iocblist)[iocb_index++] =
8310Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb32;
8320Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8330Sstevel@tonic-gate 			aphysio_unlock(reqp);
8340Sstevel@tonic-gate 			aio_copyout_result(reqp);
8350Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8360Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8370Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8380Sstevel@tonic-gate 		}
8390Sstevel@tonic-gate 	}
8400Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
8410Sstevel@tonic-gate 	return (iocb_index);
8420Sstevel@tonic-gate }
8430Sstevel@tonic-gate 
8440Sstevel@tonic-gate /*
8450Sstevel@tonic-gate  * aio_reqlist_concat
8460Sstevel@tonic-gate  * moves "max" elements from the done queue to the reqlist queue and removes
8470Sstevel@tonic-gate  * the AIO_DONEQ flag.
8480Sstevel@tonic-gate  * - reqlist queue is a simple linked list
8490Sstevel@tonic-gate  * - done queue is a double linked list
8500Sstevel@tonic-gate  */
8510Sstevel@tonic-gate 
8520Sstevel@tonic-gate static int
aio_reqlist_concat(aio_t * aiop,aio_req_t ** reqlist,int max)8530Sstevel@tonic-gate aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate 	aio_req_t *q2, *q2work, *list;
8560Sstevel@tonic-gate 	int count = 0;
8570Sstevel@tonic-gate 
8580Sstevel@tonic-gate 	list = *reqlist;
8590Sstevel@tonic-gate 	q2 = aiop->aio_doneq;
8600Sstevel@tonic-gate 	q2work = q2;
8610Sstevel@tonic-gate 	while (max-- > 0) {
8620Sstevel@tonic-gate 		q2work->aio_req_flags &= ~AIO_DONEQ;
8630Sstevel@tonic-gate 		q2work = q2work->aio_req_next;
8640Sstevel@tonic-gate 		count++;
8650Sstevel@tonic-gate 		if (q2work == q2)
8660Sstevel@tonic-gate 			break;
8670Sstevel@tonic-gate 	}
8680Sstevel@tonic-gate 
8690Sstevel@tonic-gate 	if (q2work == q2) {
8700Sstevel@tonic-gate 		/* all elements revised */
8710Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = list;
8720Sstevel@tonic-gate 		list = q2;
8730Sstevel@tonic-gate 		aiop->aio_doneq = NULL;
8740Sstevel@tonic-gate 	} else {
8750Sstevel@tonic-gate 		/*
8760Sstevel@tonic-gate 		 * max < elements in the doneq
8770Sstevel@tonic-gate 		 * detach only the required amount of elements
8780Sstevel@tonic-gate 		 * out of the doneq
8790Sstevel@tonic-gate 		 */
8800Sstevel@tonic-gate 		q2work->aio_req_prev->aio_req_next = list;
8810Sstevel@tonic-gate 		list = q2;
8820Sstevel@tonic-gate 
8830Sstevel@tonic-gate 		aiop->aio_doneq = q2work;
8840Sstevel@tonic-gate 		q2work->aio_req_prev = q2->aio_req_prev;
8850Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = q2work;
8860Sstevel@tonic-gate 	}
8870Sstevel@tonic-gate 	*reqlist = list;
8880Sstevel@tonic-gate 	return (count);
8890Sstevel@tonic-gate }
8900Sstevel@tonic-gate 
8910Sstevel@tonic-gate /*ARGSUSED*/
8920Sstevel@tonic-gate static int
aiosuspend(void * aiocb,int nent,struct timespec * timout,int flag,long * rval,int run_mode)8930Sstevel@tonic-gate aiosuspend(
8940Sstevel@tonic-gate 	void	*aiocb,
8950Sstevel@tonic-gate 	int	nent,
8960Sstevel@tonic-gate 	struct	timespec	*timout,
8970Sstevel@tonic-gate 	int	flag,
8980Sstevel@tonic-gate 	long	*rval,
8990Sstevel@tonic-gate 	int	run_mode)
9000Sstevel@tonic-gate {
9010Sstevel@tonic-gate 	int 		error;
9020Sstevel@tonic-gate 	aio_t		*aiop;
9030Sstevel@tonic-gate 	aio_req_t	*reqp, *found, *next;
9040Sstevel@tonic-gate 	caddr_t		cbplist = NULL;
9050Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
9060Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9070Sstevel@tonic-gate 	aiocb32_t	*cbp32;
9080Sstevel@tonic-gate 	caddr32_t	*ucbp32;
9090Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9100Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
9110Sstevel@tonic-gate 	int		rv;
9120Sstevel@tonic-gate 	int		i;
9130Sstevel@tonic-gate 	size_t		ssize;
9140Sstevel@tonic-gate 	model_t		model = get_udatamodel();
9150Sstevel@tonic-gate 	int		blocking;
9164123Sdm120769 	int		timecheck;
9170Sstevel@tonic-gate 	timestruc_t	rqtime;
9180Sstevel@tonic-gate 	timestruc_t	*rqtp;
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate 	aiop = curproc->p_aio;
9218519SVamsi.Krishna@Sun.COM 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
9220Sstevel@tonic-gate 		return (EINVAL);
9230Sstevel@tonic-gate 
9240Sstevel@tonic-gate 	/*
9250Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
9260Sstevel@tonic-gate 	 */
9270Sstevel@tonic-gate 	error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
9280Sstevel@tonic-gate 	if (error)
9290Sstevel@tonic-gate 		return (error);
9300Sstevel@tonic-gate 	if (rqtp) {
9310Sstevel@tonic-gate 		timestruc_t now;
9324123Sdm120769 		timecheck = timechanged;
9330Sstevel@tonic-gate 		gethrestime(&now);
9340Sstevel@tonic-gate 		timespecadd(rqtp, &now);
9350Sstevel@tonic-gate 	}
9360Sstevel@tonic-gate 
9370Sstevel@tonic-gate 	/*
9380Sstevel@tonic-gate 	 * If we are not blocking and there's no IO complete
9390Sstevel@tonic-gate 	 * skip aiocb copyin.
9400Sstevel@tonic-gate 	 */
9410Sstevel@tonic-gate 	if (!blocking && (aiop->aio_pollq == NULL) &&
9420Sstevel@tonic-gate 	    (aiop->aio_doneq == NULL)) {
9430Sstevel@tonic-gate 		return (EAGAIN);
9440Sstevel@tonic-gate 	}
9450Sstevel@tonic-gate 
9460Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
9470Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
9480Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9490Sstevel@tonic-gate 	else
9500Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
9510Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_NOSLEEP);
9540Sstevel@tonic-gate 	if (cbplist == NULL)
9550Sstevel@tonic-gate 		return (ENOMEM);
9560Sstevel@tonic-gate 
9570Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
9580Sstevel@tonic-gate 		error = EFAULT;
9590Sstevel@tonic-gate 		goto done;
9600Sstevel@tonic-gate 	}
9610Sstevel@tonic-gate 
9620Sstevel@tonic-gate 	found = NULL;
9630Sstevel@tonic-gate 	/*
9640Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
9650Sstevel@tonic-gate 	 * aio_req_done().
9660Sstevel@tonic-gate 	 */
9670Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
9680Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
9690Sstevel@tonic-gate 	for (;;) {
9700Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
9710Sstevel@tonic-gate 		if (aiop->aio_pollq) {
9720Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
9730Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
9740Sstevel@tonic-gate 			aio_cleanup(0);
9750Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
9760Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9770Sstevel@tonic-gate 		}
9780Sstevel@tonic-gate 		/* check for requests on done queue */
9790Sstevel@tonic-gate 		if (aiop->aio_doneq) {
9800Sstevel@tonic-gate 			if (model == DATAMODEL_NATIVE)
9810Sstevel@tonic-gate 				ucbp = (aiocb_t **)cbplist;
9820Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9830Sstevel@tonic-gate 			else
9840Sstevel@tonic-gate 				ucbp32 = (caddr32_t *)cbplist;
9850Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9860Sstevel@tonic-gate 			for (i = 0; i < nent; i++) {
9870Sstevel@tonic-gate 				if (model == DATAMODEL_NATIVE) {
9880Sstevel@tonic-gate 					if ((cbp = *ucbp++) == NULL)
9890Sstevel@tonic-gate 						continue;
9900Sstevel@tonic-gate 					if (run_mode != AIO_LARGEFILE)
9910Sstevel@tonic-gate 						reqp = aio_req_done(
9920Sstevel@tonic-gate 						    &cbp->aio_resultp);
9930Sstevel@tonic-gate 					else {
9940Sstevel@tonic-gate 						cbp64 = (aiocb64_32_t *)cbp;
9950Sstevel@tonic-gate 						reqp = aio_req_done(
9960Sstevel@tonic-gate 						    &cbp64->aio_resultp);
9970Sstevel@tonic-gate 					}
9980Sstevel@tonic-gate 				}
9990Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
10000Sstevel@tonic-gate 				else {
10010Sstevel@tonic-gate 					if (run_mode == AIO_32) {
10020Sstevel@tonic-gate 						if ((cbp32 =
10030Sstevel@tonic-gate 						    (aiocb32_t *)(uintptr_t)
10040Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10050Sstevel@tonic-gate 							continue;
10060Sstevel@tonic-gate 						reqp = aio_req_done(
10070Sstevel@tonic-gate 						    &cbp32->aio_resultp);
10080Sstevel@tonic-gate 					} else if (run_mode == AIO_LARGEFILE) {
10090Sstevel@tonic-gate 						if ((cbp64 =
10100Sstevel@tonic-gate 						    (aiocb64_32_t *)(uintptr_t)
10110Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10120Sstevel@tonic-gate 							continue;
10134502Spraks 						reqp = aio_req_done(
10144502Spraks 						    &cbp64->aio_resultp);
10150Sstevel@tonic-gate 					}
10160Sstevel@tonic-gate 
10170Sstevel@tonic-gate 				}
10180Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
10190Sstevel@tonic-gate 				if (reqp) {
10200Sstevel@tonic-gate 					reqp->aio_req_next = found;
10210Sstevel@tonic-gate 					found = reqp;
10220Sstevel@tonic-gate 				}
10230Sstevel@tonic-gate 				if (aiop->aio_doneq == NULL)
10240Sstevel@tonic-gate 					break;
10250Sstevel@tonic-gate 			}
10260Sstevel@tonic-gate 			if (found)
10270Sstevel@tonic-gate 				break;
10280Sstevel@tonic-gate 		}
10290Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
10300Sstevel@tonic-gate 			/*
10310Sstevel@tonic-gate 			 * nothing on the kernel's queue. the user
10320Sstevel@tonic-gate 			 * has notified the kernel that it has items
10330Sstevel@tonic-gate 			 * on a user-level queue.
10340Sstevel@tonic-gate 			 */
10350Sstevel@tonic-gate 			aiop->aio_notifycnt--;
10360Sstevel@tonic-gate 			*rval = 1;
10370Sstevel@tonic-gate 			error = 0;
10380Sstevel@tonic-gate 			break;
10390Sstevel@tonic-gate 		}
10400Sstevel@tonic-gate 		/* don't block if nothing is outstanding */
10410Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0) {
10420Sstevel@tonic-gate 			error = EAGAIN;
10430Sstevel@tonic-gate 			break;
10440Sstevel@tonic-gate 		}
10450Sstevel@tonic-gate 		if (blocking) {
10460Sstevel@tonic-gate 			/*
10470Sstevel@tonic-gate 			 * drop the aio_cleanupq_mutex as we are
10480Sstevel@tonic-gate 			 * going to block.
10490Sstevel@tonic-gate 			 */
10500Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
10510Sstevel@tonic-gate 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
10524502Spraks 			    &aiop->aio_mutex, rqtp, timecheck);
10530Sstevel@tonic-gate 			/*
10540Sstevel@tonic-gate 			 * we have to drop aio_mutex and
10550Sstevel@tonic-gate 			 * grab it in the right order.
10560Sstevel@tonic-gate 			 */
10570Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
10580Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
10590Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
10600Sstevel@tonic-gate 			if (rv > 0)	/* check done queue again */
10610Sstevel@tonic-gate 				continue;
10620Sstevel@tonic-gate 			if (rv == 0)	/* interrupted by a signal */
10630Sstevel@tonic-gate 				error = EINTR;
10640Sstevel@tonic-gate 			else		/* timer expired */
10650Sstevel@tonic-gate 				error = ETIME;
10660Sstevel@tonic-gate 		} else {
10670Sstevel@tonic-gate 			error = EAGAIN;
10680Sstevel@tonic-gate 		}
10690Sstevel@tonic-gate 		break;
10700Sstevel@tonic-gate 	}
10710Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
10720Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
10730Sstevel@tonic-gate 	for (reqp = found; reqp != NULL; reqp = next) {
10740Sstevel@tonic-gate 		next = reqp->aio_req_next;
10750Sstevel@tonic-gate 		aphysio_unlock(reqp);
10760Sstevel@tonic-gate 		aio_copyout_result(reqp);
10770Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
10780Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
10790Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
10800Sstevel@tonic-gate 	}
10810Sstevel@tonic-gate done:
10820Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
10830Sstevel@tonic-gate 	return (error);
10840Sstevel@tonic-gate }
10850Sstevel@tonic-gate 
10860Sstevel@tonic-gate /*
10870Sstevel@tonic-gate  * initialize aio by allocating an aio_t struct for this
10880Sstevel@tonic-gate  * process.
10890Sstevel@tonic-gate  */
10900Sstevel@tonic-gate static int
aioinit(void)10910Sstevel@tonic-gate aioinit(void)
10920Sstevel@tonic-gate {
10930Sstevel@tonic-gate 	proc_t *p = curproc;
10940Sstevel@tonic-gate 	aio_t *aiop;
10950Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
10960Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL) {
10970Sstevel@tonic-gate 		aiop = aio_aiop_alloc();
10980Sstevel@tonic-gate 		p->p_aio = aiop;
10990Sstevel@tonic-gate 	}
11000Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11010Sstevel@tonic-gate 	if (aiop == NULL)
11020Sstevel@tonic-gate 		return (ENOMEM);
11030Sstevel@tonic-gate 	return (0);
11040Sstevel@tonic-gate }
11050Sstevel@tonic-gate 
11060Sstevel@tonic-gate /*
11070Sstevel@tonic-gate  * start a special thread that will cleanup after aio requests
11080Sstevel@tonic-gate  * that are preventing a segment from being unmapped. as_unmap()
11090Sstevel@tonic-gate  * blocks until all phsyio to this segment is completed. this
11100Sstevel@tonic-gate  * doesn't happen until all the pages in this segment are not
11110Sstevel@tonic-gate  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
11120Sstevel@tonic-gate  * requests still outstanding. this special thread will make sure
11130Sstevel@tonic-gate  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
11140Sstevel@tonic-gate  *
11150Sstevel@tonic-gate  * this function will return an error if the process has only
11160Sstevel@tonic-gate  * one LWP. the assumption is that the caller is a separate LWP
11170Sstevel@tonic-gate  * that remains blocked in the kernel for the life of this process.
11180Sstevel@tonic-gate  */
11190Sstevel@tonic-gate static int
aiostart(void)11200Sstevel@tonic-gate aiostart(void)
11210Sstevel@tonic-gate {
11220Sstevel@tonic-gate 	proc_t *p = curproc;
11230Sstevel@tonic-gate 	aio_t *aiop;
11240Sstevel@tonic-gate 	int first, error = 0;
11250Sstevel@tonic-gate 
11260Sstevel@tonic-gate 	if (p->p_lwpcnt == 1)
11270Sstevel@tonic-gate 		return (EDEADLK);
11280Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11290Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL)
11300Sstevel@tonic-gate 		error = EINVAL;
11310Sstevel@tonic-gate 	else {
11320Sstevel@tonic-gate 		first = aiop->aio_ok;
11330Sstevel@tonic-gate 		if (aiop->aio_ok == 0)
11340Sstevel@tonic-gate 			aiop->aio_ok = 1;
11350Sstevel@tonic-gate 	}
11360Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11370Sstevel@tonic-gate 	if (error == 0 && first == 0) {
11380Sstevel@tonic-gate 		return (aio_cleanup_thread(aiop));
11390Sstevel@tonic-gate 		/* should return only to exit */
11400Sstevel@tonic-gate 	}
11410Sstevel@tonic-gate 	return (error);
11420Sstevel@tonic-gate }
11430Sstevel@tonic-gate 
11440Sstevel@tonic-gate /*
11450Sstevel@tonic-gate  * Associate an aiocb with a port.
11460Sstevel@tonic-gate  * This function is used by aiorw() to associate a transaction with a port.
11470Sstevel@tonic-gate  * Allocate an event port structure (port_alloc_event()) and store the
11480Sstevel@tonic-gate  * delivered user pointer (portnfy_user) in the portkev_user field of the
11490Sstevel@tonic-gate  * port_kevent_t structure..
11500Sstevel@tonic-gate  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
11510Sstevel@tonic-gate  * the port association.
11520Sstevel@tonic-gate  */
11530Sstevel@tonic-gate 
11540Sstevel@tonic-gate static int
aio_req_assoc_port_rw(port_notify_t * pntfy,aiocb_t * cbp,aio_req_t * reqp,int event)11551885Sraf aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
11561885Sraf 	aio_req_t *reqp, int event)
11570Sstevel@tonic-gate {
11580Sstevel@tonic-gate 	port_kevent_t	*pkevp = NULL;
11590Sstevel@tonic-gate 	int		error;
11600Sstevel@tonic-gate 
11610Sstevel@tonic-gate 	error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
11620Sstevel@tonic-gate 	    PORT_SOURCE_AIO, &pkevp);
11630Sstevel@tonic-gate 	if (error) {
11640Sstevel@tonic-gate 		if ((error == ENOMEM) || (error == EAGAIN))
11650Sstevel@tonic-gate 			error = EAGAIN;
11660Sstevel@tonic-gate 		else
11670Sstevel@tonic-gate 			error = EINVAL;
11680Sstevel@tonic-gate 	} else {
11690Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
11700Sstevel@tonic-gate 		    aio_port_callback, reqp);
11711885Sraf 		pkevp->portkev_events = event;
11720Sstevel@tonic-gate 		reqp->aio_req_portkev = pkevp;
11730Sstevel@tonic-gate 		reqp->aio_req_port = pntfy->portnfy_port;
11740Sstevel@tonic-gate 	}
11750Sstevel@tonic-gate 	return (error);
11760Sstevel@tonic-gate }
11770Sstevel@tonic-gate 
11780Sstevel@tonic-gate #ifdef _LP64
11790Sstevel@tonic-gate 
11800Sstevel@tonic-gate /*
11810Sstevel@tonic-gate  * Asynchronous list IO. A chain of aiocb's are copied in
11820Sstevel@tonic-gate  * one at a time. If the aiocb is invalid, it is skipped.
11830Sstevel@tonic-gate  * For each aiocb, the appropriate driver entry point is
11840Sstevel@tonic-gate  * called. Optimize for the common case where the list
11850Sstevel@tonic-gate  * of requests is to the same file descriptor.
11860Sstevel@tonic-gate  *
11870Sstevel@tonic-gate  * One possible optimization is to define a new driver entry
11880Sstevel@tonic-gate  * point that supports a list of IO requests. Whether this
11890Sstevel@tonic-gate  * improves performance depends somewhat on the driver's
11900Sstevel@tonic-gate  * locking strategy. Processing a list could adversely impact
11910Sstevel@tonic-gate  * the driver's interrupt latency.
11920Sstevel@tonic-gate  */
11930Sstevel@tonic-gate static int
alio(int mode_arg,aiocb_t ** aiocb_arg,int nent,struct sigevent * sigev)11940Sstevel@tonic-gate alio(
11951885Sraf 	int		mode_arg,
11961885Sraf 	aiocb_t		**aiocb_arg,
11971885Sraf 	int		nent,
11981885Sraf 	struct sigevent	*sigev)
11990Sstevel@tonic-gate {
12000Sstevel@tonic-gate 	file_t		*fp;
12010Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
12020Sstevel@tonic-gate 	int		prev_mode = -1;
12030Sstevel@tonic-gate 	struct vnode	*vp;
12040Sstevel@tonic-gate 	aio_lio_t	*head;
12050Sstevel@tonic-gate 	aio_req_t	*reqp;
12060Sstevel@tonic-gate 	aio_t		*aiop;
12070Sstevel@tonic-gate 	caddr_t		cbplist;
12080Sstevel@tonic-gate 	aiocb_t		cb;
12090Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
12101885Sraf 	aiocb_t		*cbp;
12111885Sraf 	aiocb_t		**ucbp;
12120Sstevel@tonic-gate 	struct sigevent sigevk;
12130Sstevel@tonic-gate 	sigqueue_t	*sqp;
12140Sstevel@tonic-gate 	int		(*aio_func)();
12150Sstevel@tonic-gate 	int		mode;
12160Sstevel@tonic-gate 	int		error = 0;
12170Sstevel@tonic-gate 	int		aio_errors = 0;
12180Sstevel@tonic-gate 	int		i;
12190Sstevel@tonic-gate 	size_t		ssize;
12200Sstevel@tonic-gate 	int		deadhead = 0;
12210Sstevel@tonic-gate 	int		aio_notsupported = 0;
12221885Sraf 	int		lio_head_port;
12231885Sraf 	int		aio_port;
12241885Sraf 	int		aio_thread;
12250Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
12264502Spraks 	int		portused = 0;
12270Sstevel@tonic-gate 	port_notify_t	pnotify;
12281885Sraf 	int		event;
12290Sstevel@tonic-gate 
12300Sstevel@tonic-gate 	aiop = curproc->p_aio;
12310Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
12320Sstevel@tonic-gate 		return (EINVAL);
12330Sstevel@tonic-gate 
12340Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
12350Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
12360Sstevel@tonic-gate 	ucbp = (aiocb_t **)cbplist;
12370Sstevel@tonic-gate 
12381885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
12391885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
12400Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
12410Sstevel@tonic-gate 		return (EFAULT);
12420Sstevel@tonic-gate 	}
12430Sstevel@tonic-gate 
12441885Sraf 	/* Event Ports  */
12451885Sraf 	if (sigev &&
12461885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
12471885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
12481885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
12491885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
12501885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
12511885Sraf 		} else if (copyin(sigevk.sigev_value.sival_ptr,
12521885Sraf 		    &pnotify, sizeof (pnotify))) {
12530Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
12540Sstevel@tonic-gate 			return (EFAULT);
12550Sstevel@tonic-gate 		}
12561885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
12571885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
12581885Sraf 		if (error) {
12591885Sraf 			if (error == ENOMEM || error == EAGAIN)
12601885Sraf 				error = EAGAIN;
12611885Sraf 			else
12621885Sraf 				error = EINVAL;
12631885Sraf 			kmem_free(cbplist, ssize);
12641885Sraf 			return (error);
12651885Sraf 		}
12661885Sraf 		lio_head_port = pnotify.portnfy_port;
12674502Spraks 		portused = 1;
12680Sstevel@tonic-gate 	}
12690Sstevel@tonic-gate 
12700Sstevel@tonic-gate 	/*
12710Sstevel@tonic-gate 	 * a list head should be allocated if notification is
12720Sstevel@tonic-gate 	 * enabled for this list.
12730Sstevel@tonic-gate 	 */
12740Sstevel@tonic-gate 	head = NULL;
12750Sstevel@tonic-gate 
12761885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
12770Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
12780Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
12790Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
12800Sstevel@tonic-gate 		if (error)
12810Sstevel@tonic-gate 			goto done;
12820Sstevel@tonic-gate 		deadhead = 1;
12830Sstevel@tonic-gate 		head->lio_nent = nent;
12840Sstevel@tonic-gate 		head->lio_refcnt = nent;
12851885Sraf 		head->lio_port = -1;
12861885Sraf 		head->lio_portkev = NULL;
12871885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
12881885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
12890Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
12900Sstevel@tonic-gate 			if (sqp == NULL) {
12910Sstevel@tonic-gate 				error = EAGAIN;
12920Sstevel@tonic-gate 				goto done;
12930Sstevel@tonic-gate 			}
12940Sstevel@tonic-gate 			sqp->sq_func = NULL;
12950Sstevel@tonic-gate 			sqp->sq_next = NULL;
12960Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
12970Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
12980Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
12990Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
13000Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
13010Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
13020Sstevel@tonic-gate 			sqp->sq_info.si_value = sigevk.sigev_value;
13030Sstevel@tonic-gate 			head->lio_sigqp = sqp;
13040Sstevel@tonic-gate 		} else {
13050Sstevel@tonic-gate 			head->lio_sigqp = NULL;
13060Sstevel@tonic-gate 		}
13071885Sraf 		if (pkevtp) {
13081885Sraf 			/*
13091885Sraf 			 * Prepare data to send when list of aiocb's
13101885Sraf 			 * has completed.
13111885Sraf 			 */
13121885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
13131885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
13141885Sraf 			    NULL, head);
13151885Sraf 			pkevtp->portkev_events = AIOLIO;
13161885Sraf 			head->lio_portkev = pkevtp;
13171885Sraf 			head->lio_port = pnotify.portnfy_port;
13181885Sraf 		}
13190Sstevel@tonic-gate 	}
13200Sstevel@tonic-gate 
13210Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
13220Sstevel@tonic-gate 
13230Sstevel@tonic-gate 		cbp = *ucbp;
13240Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
13251885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
13260Sstevel@tonic-gate 			if (head) {
13270Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13280Sstevel@tonic-gate 				head->lio_nent--;
13290Sstevel@tonic-gate 				head->lio_refcnt--;
13300Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13310Sstevel@tonic-gate 			}
13320Sstevel@tonic-gate 			continue;
13330Sstevel@tonic-gate 		}
13340Sstevel@tonic-gate 
13350Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
13360Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
13370Sstevel@tonic-gate 		if (mode == LIO_NOP) {
13380Sstevel@tonic-gate 			cbp = NULL;
13390Sstevel@tonic-gate 			if (head) {
13400Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13410Sstevel@tonic-gate 				head->lio_nent--;
13420Sstevel@tonic-gate 				head->lio_refcnt--;
13430Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13440Sstevel@tonic-gate 			}
13450Sstevel@tonic-gate 			continue;
13460Sstevel@tonic-gate 		}
13470Sstevel@tonic-gate 
13480Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
13490Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
13500Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13510Sstevel@tonic-gate 			if (head) {
13520Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13530Sstevel@tonic-gate 				head->lio_nent--;
13540Sstevel@tonic-gate 				head->lio_refcnt--;
13550Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13560Sstevel@tonic-gate 			}
13570Sstevel@tonic-gate 			aio_errors++;
13580Sstevel@tonic-gate 			continue;
13590Sstevel@tonic-gate 		}
13600Sstevel@tonic-gate 
13610Sstevel@tonic-gate 		/*
13620Sstevel@tonic-gate 		 * check the permission of the partition
13630Sstevel@tonic-gate 		 */
13640Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
13650Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
13660Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13670Sstevel@tonic-gate 			if (head) {
13680Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13690Sstevel@tonic-gate 				head->lio_nent--;
13700Sstevel@tonic-gate 				head->lio_refcnt--;
13710Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13720Sstevel@tonic-gate 			}
13730Sstevel@tonic-gate 			aio_errors++;
13740Sstevel@tonic-gate 			continue;
13750Sstevel@tonic-gate 		}
13760Sstevel@tonic-gate 
13770Sstevel@tonic-gate 		/*
13781885Sraf 		 * common case where requests are to the same fd
13791885Sraf 		 * for the same r/w operation.
13800Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
13810Sstevel@tonic-gate 		 */
13821885Sraf 		vp = fp->f_vnode;
13831885Sraf 		if (fp != prev_fp || mode != prev_mode) {
13840Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
13850Sstevel@tonic-gate 			if (aio_func == NULL) {
13860Sstevel@tonic-gate 				prev_fp = NULL;
13870Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
13880Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
13890Sstevel@tonic-gate 				aio_notsupported++;
13900Sstevel@tonic-gate 				if (head) {
13910Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
13920Sstevel@tonic-gate 					head->lio_nent--;
13930Sstevel@tonic-gate 					head->lio_refcnt--;
13940Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
13950Sstevel@tonic-gate 				}
13960Sstevel@tonic-gate 				continue;
13970Sstevel@tonic-gate 			} else {
13980Sstevel@tonic-gate 				prev_fp = fp;
13990Sstevel@tonic-gate 				prev_mode = mode;
14000Sstevel@tonic-gate 			}
14010Sstevel@tonic-gate 		}
14020Sstevel@tonic-gate 
14031885Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
1404*10719SRoger.Faulkner@Sun.COM 		    &cbp->aio_resultp, vp, 0);
14051885Sraf 		if (error) {
14060Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14070Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14080Sstevel@tonic-gate 			if (head) {
14090Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14100Sstevel@tonic-gate 				head->lio_nent--;
14110Sstevel@tonic-gate 				head->lio_refcnt--;
14120Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14130Sstevel@tonic-gate 			}
14140Sstevel@tonic-gate 			aio_errors++;
14150Sstevel@tonic-gate 			continue;
14160Sstevel@tonic-gate 		}
14170Sstevel@tonic-gate 
14180Sstevel@tonic-gate 		reqp->aio_req_lio = head;
14190Sstevel@tonic-gate 		deadhead = 0;
14200Sstevel@tonic-gate 
14210Sstevel@tonic-gate 		/*
14220Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
14230Sstevel@tonic-gate 		 * the driver to avoid a race condition
14240Sstevel@tonic-gate 		 */
14250Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
14260Sstevel@tonic-gate 		    EINPROGRESS);
14270Sstevel@tonic-gate 
14280Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = (caddr_t)cbp;
14290Sstevel@tonic-gate 
14301885Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
14311885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
14321885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
14331885Sraf 		if (aio_port | aio_thread) {
14341885Sraf 			port_kevent_t *lpkevp;
14351885Sraf 			/*
14361885Sraf 			 * Prepare data to send with each aiocb completed.
14371885Sraf 			 */
14381885Sraf 			if (aio_port) {
14391885Sraf 				void *paddr =
14401885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
14411885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
14421885Sraf 					error = EFAULT;
14431885Sraf 			} else {	/* aio_thread */
14441885Sraf 				pnotify.portnfy_port =
14451885Sraf 				    aiocb->aio_sigevent.sigev_signo;
14461885Sraf 				pnotify.portnfy_user =
14471885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
14481885Sraf 			}
14491885Sraf 			if (error)
14501885Sraf 				/* EMPTY */;
14511885Sraf 			else if (pkevtp != NULL &&
14521885Sraf 			    pnotify.portnfy_port == lio_head_port)
14531885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
14541885Sraf 				    PORT_ALLOC_DEFAULT);
14551885Sraf 			else
14561885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
14571885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
14581885Sraf 				    &lpkevp);
14591885Sraf 			if (error == 0) {
14601885Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
14611885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
14621885Sraf 				    aio_port_callback, reqp);
14631885Sraf 				lpkevp->portkev_events = event;
14641885Sraf 				reqp->aio_req_portkev = lpkevp;
14651885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
14661885Sraf 			}
14670Sstevel@tonic-gate 		}
14680Sstevel@tonic-gate 
14690Sstevel@tonic-gate 		/*
14700Sstevel@tonic-gate 		 * send the request to driver.
14710Sstevel@tonic-gate 		 */
14720Sstevel@tonic-gate 		if (error == 0) {
14730Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
14740Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
14750Sstevel@tonic-gate 				aio_zerolen(reqp);
14760Sstevel@tonic-gate 				continue;
14770Sstevel@tonic-gate 			}
14780Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
14790Sstevel@tonic-gate 			    CRED());
14800Sstevel@tonic-gate 		}
14811885Sraf 
14820Sstevel@tonic-gate 		/*
14830Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
14840Sstevel@tonic-gate 		 * completed unless there was an error.
14850Sstevel@tonic-gate 		 */
14860Sstevel@tonic-gate 		if (error) {
14870Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14880Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14890Sstevel@tonic-gate 			if (head) {
14900Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14910Sstevel@tonic-gate 				head->lio_nent--;
14920Sstevel@tonic-gate 				head->lio_refcnt--;
14930Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14940Sstevel@tonic-gate 			}
14950Sstevel@tonic-gate 			if (error == ENOTSUP)
14960Sstevel@tonic-gate 				aio_notsupported++;
14970Sstevel@tonic-gate 			else
14980Sstevel@tonic-gate 				aio_errors++;
14994502Spraks 			lio_set_error(reqp, portused);
15000Sstevel@tonic-gate 		} else {
15010Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
15020Sstevel@tonic-gate 		}
15030Sstevel@tonic-gate 	}
15040Sstevel@tonic-gate 
15050Sstevel@tonic-gate 	if (aio_notsupported) {
15060Sstevel@tonic-gate 		error = ENOTSUP;
15070Sstevel@tonic-gate 	} else if (aio_errors) {
15080Sstevel@tonic-gate 		/*
15090Sstevel@tonic-gate 		 * return EIO if any request failed
15100Sstevel@tonic-gate 		 */
15110Sstevel@tonic-gate 		error = EIO;
15120Sstevel@tonic-gate 	}
15130Sstevel@tonic-gate 
15140Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
15150Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
15160Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
15170Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
15180Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
15190Sstevel@tonic-gate 				error = EINTR;
15200Sstevel@tonic-gate 				goto done;
15210Sstevel@tonic-gate 			}
15220Sstevel@tonic-gate 		}
15230Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
15240Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
15250Sstevel@tonic-gate 	}
15260Sstevel@tonic-gate 
15270Sstevel@tonic-gate done:
15280Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
15290Sstevel@tonic-gate 	if (deadhead) {
15300Sstevel@tonic-gate 		if (head->lio_sigqp)
15310Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
15321885Sraf 		if (head->lio_portkev)
15331885Sraf 			port_free_event(head->lio_portkev);
15340Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
15350Sstevel@tonic-gate 	}
15360Sstevel@tonic-gate 	return (error);
15370Sstevel@tonic-gate }
15380Sstevel@tonic-gate 
15390Sstevel@tonic-gate #endif /* _LP64 */
15400Sstevel@tonic-gate 
15410Sstevel@tonic-gate /*
15420Sstevel@tonic-gate  * Asynchronous list IO.
15430Sstevel@tonic-gate  * If list I/O is called with LIO_WAIT it can still return
15440Sstevel@tonic-gate  * before all the I/O's are completed if a signal is caught
15450Sstevel@tonic-gate  * or if the list include UFS I/O requests. If this happens,
15460Sstevel@tonic-gate  * libaio will call aliowait() to wait for the I/O's to
15470Sstevel@tonic-gate  * complete
15480Sstevel@tonic-gate  */
15490Sstevel@tonic-gate /*ARGSUSED*/
15500Sstevel@tonic-gate static int
aliowait(int mode,void * aiocb,int nent,void * sigev,int run_mode)15510Sstevel@tonic-gate aliowait(
15520Sstevel@tonic-gate 	int	mode,
15530Sstevel@tonic-gate 	void	*aiocb,
15540Sstevel@tonic-gate 	int	nent,
15550Sstevel@tonic-gate 	void	*sigev,
15560Sstevel@tonic-gate 	int	run_mode)
15570Sstevel@tonic-gate {
15580Sstevel@tonic-gate 	aio_lio_t	*head;
15590Sstevel@tonic-gate 	aio_t		*aiop;
15600Sstevel@tonic-gate 	caddr_t		cbplist;
15610Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
15620Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15630Sstevel@tonic-gate 	aiocb32_t	*cbp32;
15640Sstevel@tonic-gate 	caddr32_t	*ucbp32;
15650Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
15660Sstevel@tonic-gate #endif
15670Sstevel@tonic-gate 	int		error = 0;
15680Sstevel@tonic-gate 	int		i;
15690Sstevel@tonic-gate 	size_t		ssize = 0;
15700Sstevel@tonic-gate 	model_t		model = get_udatamodel();
15710Sstevel@tonic-gate 
15720Sstevel@tonic-gate 	aiop = curproc->p_aio;
15730Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
15740Sstevel@tonic-gate 		return (EINVAL);
15750Sstevel@tonic-gate 
15760Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15770Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
15780Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15790Sstevel@tonic-gate 	else
15800Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
15810Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15820Sstevel@tonic-gate 
15830Sstevel@tonic-gate 	if (ssize == 0)
15840Sstevel@tonic-gate 		return (EINVAL);
15850Sstevel@tonic-gate 
15860Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
15870Sstevel@tonic-gate 
15880Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15890Sstevel@tonic-gate 		ucbp = (aiocb_t **)cbplist;
15900Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15910Sstevel@tonic-gate 	else
15920Sstevel@tonic-gate 		ucbp32 = (caddr32_t *)cbplist;
15930Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15940Sstevel@tonic-gate 
15950Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
15960Sstevel@tonic-gate 		error = EFAULT;
15970Sstevel@tonic-gate 		goto done;
15980Sstevel@tonic-gate 	}
15990Sstevel@tonic-gate 
16000Sstevel@tonic-gate 	/*
16010Sstevel@tonic-gate 	 * To find the list head, we go through the
16020Sstevel@tonic-gate 	 * list of aiocb structs, find the request
16030Sstevel@tonic-gate 	 * its for, then get the list head that reqp
16040Sstevel@tonic-gate 	 * points to
16050Sstevel@tonic-gate 	 */
16060Sstevel@tonic-gate 	head = NULL;
16070Sstevel@tonic-gate 
16080Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
16090Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE) {
16100Sstevel@tonic-gate 			/*
16110Sstevel@tonic-gate 			 * Since we are only checking for a NULL pointer
16120Sstevel@tonic-gate 			 * Following should work on both native data sizes
16130Sstevel@tonic-gate 			 * as well as for largefile aiocb.
16140Sstevel@tonic-gate 			 */
16150Sstevel@tonic-gate 			if ((cbp = *ucbp++) == NULL)
16160Sstevel@tonic-gate 				continue;
16170Sstevel@tonic-gate 			if (run_mode != AIO_LARGEFILE)
16180Sstevel@tonic-gate 				if (head = aio_list_get(&cbp->aio_resultp))
16190Sstevel@tonic-gate 					break;
16200Sstevel@tonic-gate 			else {
16210Sstevel@tonic-gate 				/*
16220Sstevel@tonic-gate 				 * This is a case when largefile call is
16230Sstevel@tonic-gate 				 * made on 32 bit kernel.
16240Sstevel@tonic-gate 				 * Treat each pointer as pointer to
16250Sstevel@tonic-gate 				 * aiocb64_32
16260Sstevel@tonic-gate 				 */
16270Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16280Sstevel@tonic-gate 				    &(((aiocb64_32_t *)cbp)->aio_resultp)))
16290Sstevel@tonic-gate 					break;
16300Sstevel@tonic-gate 			}
16310Sstevel@tonic-gate 		}
16320Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
16330Sstevel@tonic-gate 		else {
16340Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
16350Sstevel@tonic-gate 				if ((cbp64 = (aiocb64_32_t *)
16360Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16370Sstevel@tonic-gate 					continue;
16380Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16390Sstevel@tonic-gate 				    &cbp64->aio_resultp))
16400Sstevel@tonic-gate 					break;
16410Sstevel@tonic-gate 			} else if (run_mode == AIO_32) {
16420Sstevel@tonic-gate 				if ((cbp32 = (aiocb32_t *)
16430Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16440Sstevel@tonic-gate 					continue;
16450Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16460Sstevel@tonic-gate 				    &cbp32->aio_resultp))
16470Sstevel@tonic-gate 					break;
16480Sstevel@tonic-gate 			}
16490Sstevel@tonic-gate 		}
16500Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
16510Sstevel@tonic-gate 	}
16520Sstevel@tonic-gate 
16530Sstevel@tonic-gate 	if (head == NULL) {
16540Sstevel@tonic-gate 		error = EINVAL;
16550Sstevel@tonic-gate 		goto done;
16560Sstevel@tonic-gate 	}
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
16590Sstevel@tonic-gate 	while (head->lio_refcnt > 0) {
16600Sstevel@tonic-gate 		if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
16610Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
16620Sstevel@tonic-gate 			error = EINTR;
16630Sstevel@tonic-gate 			goto done;
16640Sstevel@tonic-gate 		}
16650Sstevel@tonic-gate 	}
16660Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
16670Sstevel@tonic-gate 	alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
16680Sstevel@tonic-gate done:
16690Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
16700Sstevel@tonic-gate 	return (error);
16710Sstevel@tonic-gate }
16720Sstevel@tonic-gate 
16730Sstevel@tonic-gate aio_lio_t *
aio_list_get(aio_result_t * resultp)16740Sstevel@tonic-gate aio_list_get(aio_result_t *resultp)
16750Sstevel@tonic-gate {
16760Sstevel@tonic-gate 	aio_lio_t	*head = NULL;
16770Sstevel@tonic-gate 	aio_t		*aiop;
16780Sstevel@tonic-gate 	aio_req_t 	**bucket;
16790Sstevel@tonic-gate 	aio_req_t 	*reqp;
16800Sstevel@tonic-gate 	long		index;
16810Sstevel@tonic-gate 
16820Sstevel@tonic-gate 	aiop = curproc->p_aio;
16830Sstevel@tonic-gate 	if (aiop == NULL)
16840Sstevel@tonic-gate 		return (NULL);
16850Sstevel@tonic-gate 
16860Sstevel@tonic-gate 	if (resultp) {
16870Sstevel@tonic-gate 		index = AIO_HASH(resultp);
16880Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
16890Sstevel@tonic-gate 		for (reqp = *bucket; reqp != NULL;
16900Sstevel@tonic-gate 		    reqp = reqp->aio_hash_next) {
16910Sstevel@tonic-gate 			if (reqp->aio_req_resultp == resultp) {
16920Sstevel@tonic-gate 				head = reqp->aio_req_lio;
16930Sstevel@tonic-gate 				return (head);
16940Sstevel@tonic-gate 			}
16950Sstevel@tonic-gate 		}
16960Sstevel@tonic-gate 	}
16970Sstevel@tonic-gate 	return (NULL);
16980Sstevel@tonic-gate }
16990Sstevel@tonic-gate 
17000Sstevel@tonic-gate 
17010Sstevel@tonic-gate static void
lio_set_uerror(void * resultp,int error)17020Sstevel@tonic-gate lio_set_uerror(void *resultp, int error)
17030Sstevel@tonic-gate {
17040Sstevel@tonic-gate 	/*
17050Sstevel@tonic-gate 	 * the resultp field is a pointer to where the
17060Sstevel@tonic-gate 	 * error should be written out to the user's
17070Sstevel@tonic-gate 	 * aiocb.
17080Sstevel@tonic-gate 	 *
17090Sstevel@tonic-gate 	 */
17100Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
17110Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return,
17120Sstevel@tonic-gate 		    (ssize_t)-1);
17130Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
17140Sstevel@tonic-gate 	}
17150Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17160Sstevel@tonic-gate 	else {
17170Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
17180Sstevel@tonic-gate 		    (uint_t)-1);
17190Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
17200Sstevel@tonic-gate 	}
17210Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17220Sstevel@tonic-gate }
17230Sstevel@tonic-gate 
17240Sstevel@tonic-gate /*
17250Sstevel@tonic-gate  * do cleanup completion for all requests in list. memory for
17260Sstevel@tonic-gate  * each request is also freed.
17270Sstevel@tonic-gate  */
17280Sstevel@tonic-gate static void
alio_cleanup(aio_t * aiop,aiocb_t ** cbp,int nent,int run_mode)17290Sstevel@tonic-gate alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
17300Sstevel@tonic-gate {
17310Sstevel@tonic-gate 	int i;
17320Sstevel@tonic-gate 	aio_req_t *reqp;
17330Sstevel@tonic-gate 	aio_result_t *resultp;
17341885Sraf 	aiocb64_32_t *aiocb_64;
17350Sstevel@tonic-gate 
17360Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
17370Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
17380Sstevel@tonic-gate 			if (cbp[i] == NULL)
17390Sstevel@tonic-gate 				continue;
17400Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
17410Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)cbp[i];
17421885Sraf 				resultp = (aio_result_t *)
17431885Sraf 				    &aiocb_64->aio_resultp;
17440Sstevel@tonic-gate 			} else
17450Sstevel@tonic-gate 				resultp = &cbp[i]->aio_resultp;
17460Sstevel@tonic-gate 		}
17470Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17480Sstevel@tonic-gate 		else {
17491885Sraf 			aiocb32_t *aiocb_32;
17501885Sraf 			caddr32_t *cbp32;
17510Sstevel@tonic-gate 
17520Sstevel@tonic-gate 			cbp32 = (caddr32_t *)cbp;
17530Sstevel@tonic-gate 			if (cbp32[i] == NULL)
17540Sstevel@tonic-gate 				continue;
17550Sstevel@tonic-gate 			if (run_mode == AIO_32) {
17560Sstevel@tonic-gate 				aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
17570Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_32->
17580Sstevel@tonic-gate 				    aio_resultp;
17590Sstevel@tonic-gate 			} else if (run_mode == AIO_LARGEFILE) {
17600Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
17610Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_64->
17620Sstevel@tonic-gate 				    aio_resultp;
17630Sstevel@tonic-gate 			}
17640Sstevel@tonic-gate 		}
17650Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17660Sstevel@tonic-gate 		/*
17670Sstevel@tonic-gate 		 * we need to get the aio_cleanupq_mutex since we call
17680Sstevel@tonic-gate 		 * aio_req_done().
17690Sstevel@tonic-gate 		 */
17700Sstevel@tonic-gate 		mutex_enter(&aiop->aio_cleanupq_mutex);
17710Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
17720Sstevel@tonic-gate 		reqp = aio_req_done(resultp);
17730Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
17740Sstevel@tonic-gate 		mutex_exit(&aiop->aio_cleanupq_mutex);
17750Sstevel@tonic-gate 		if (reqp != NULL) {
17760Sstevel@tonic-gate 			aphysio_unlock(reqp);
17770Sstevel@tonic-gate 			aio_copyout_result(reqp);
17780Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
17790Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
17800Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
17810Sstevel@tonic-gate 		}
17820Sstevel@tonic-gate 	}
17830Sstevel@tonic-gate }
17840Sstevel@tonic-gate 
17850Sstevel@tonic-gate /*
17861885Sraf  * Write out the results for an aio request that is done.
17870Sstevel@tonic-gate  */
17880Sstevel@tonic-gate static int
aioerror(void * cb,int run_mode)17890Sstevel@tonic-gate aioerror(void *cb, int run_mode)
17900Sstevel@tonic-gate {
17910Sstevel@tonic-gate 	aio_result_t *resultp;
17920Sstevel@tonic-gate 	aio_t *aiop;
17930Sstevel@tonic-gate 	aio_req_t *reqp;
17940Sstevel@tonic-gate 	int retval;
17950Sstevel@tonic-gate 
17960Sstevel@tonic-gate 	aiop = curproc->p_aio;
17970Sstevel@tonic-gate 	if (aiop == NULL || cb == NULL)
17980Sstevel@tonic-gate 		return (EINVAL);
17990Sstevel@tonic-gate 
18000Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
18010Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18020Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18030Sstevel@tonic-gate 			    aio_resultp;
18040Sstevel@tonic-gate 		else
18050Sstevel@tonic-gate 			resultp = &((aiocb_t *)cb)->aio_resultp;
18060Sstevel@tonic-gate 	}
18070Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18080Sstevel@tonic-gate 	else {
18090Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18100Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18110Sstevel@tonic-gate 			    aio_resultp;
18120Sstevel@tonic-gate 		else if (run_mode == AIO_32)
18130Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb32_t *)cb)->
18140Sstevel@tonic-gate 			    aio_resultp;
18150Sstevel@tonic-gate 	}
18160Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
18170Sstevel@tonic-gate 	/*
18180Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
18190Sstevel@tonic-gate 	 * aio_req_find().
18200Sstevel@tonic-gate 	 */
18210Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
18220Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18230Sstevel@tonic-gate 	retval = aio_req_find(resultp, &reqp);
18240Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
18250Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
18260Sstevel@tonic-gate 	if (retval == 0) {
18270Sstevel@tonic-gate 		aphysio_unlock(reqp);
18280Sstevel@tonic-gate 		aio_copyout_result(reqp);
18290Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
18300Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
18310Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
18320Sstevel@tonic-gate 		return (0);
18330Sstevel@tonic-gate 	} else if (retval == 1)
18340Sstevel@tonic-gate 		return (EINPROGRESS);
18350Sstevel@tonic-gate 	else if (retval == 2)
18360Sstevel@tonic-gate 		return (EINVAL);
18370Sstevel@tonic-gate 	return (0);
18380Sstevel@tonic-gate }
18390Sstevel@tonic-gate 
18400Sstevel@tonic-gate /*
18410Sstevel@tonic-gate  * 	aio_cancel - if no requests outstanding,
18420Sstevel@tonic-gate  *			return AIO_ALLDONE
18430Sstevel@tonic-gate  *			else
18440Sstevel@tonic-gate  *			return AIO_NOTCANCELED
18450Sstevel@tonic-gate  */
18460Sstevel@tonic-gate static int
aio_cancel(int fildes,void * cb,long * rval,int run_mode)18470Sstevel@tonic-gate aio_cancel(
18480Sstevel@tonic-gate 	int	fildes,
18490Sstevel@tonic-gate 	void 	*cb,
18500Sstevel@tonic-gate 	long	*rval,
18510Sstevel@tonic-gate 	int	run_mode)
18520Sstevel@tonic-gate {
18530Sstevel@tonic-gate 	aio_t *aiop;
18540Sstevel@tonic-gate 	void *resultp;
18550Sstevel@tonic-gate 	int index;
18560Sstevel@tonic-gate 	aio_req_t **bucket;
18570Sstevel@tonic-gate 	aio_req_t *ent;
18580Sstevel@tonic-gate 
18590Sstevel@tonic-gate 
18600Sstevel@tonic-gate 	/*
18610Sstevel@tonic-gate 	 * Verify valid file descriptor
18620Sstevel@tonic-gate 	 */
18630Sstevel@tonic-gate 	if ((getf(fildes)) == NULL) {
18640Sstevel@tonic-gate 		return (EBADF);
18650Sstevel@tonic-gate 	}
18660Sstevel@tonic-gate 	releasef(fildes);
18670Sstevel@tonic-gate 
18680Sstevel@tonic-gate 	aiop = curproc->p_aio;
18690Sstevel@tonic-gate 	if (aiop == NULL)
18700Sstevel@tonic-gate 		return (EINVAL);
18710Sstevel@tonic-gate 
18720Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0) {
18730Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
18740Sstevel@tonic-gate 		return (0);
18750Sstevel@tonic-gate 	}
18760Sstevel@tonic-gate 
18770Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18780Sstevel@tonic-gate 	if (cb != NULL) {
18790Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
18800Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18810Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18820Sstevel@tonic-gate 				    ->aio_resultp;
18830Sstevel@tonic-gate 			else
18840Sstevel@tonic-gate 				resultp = &((aiocb_t *)cb)->aio_resultp;
18850Sstevel@tonic-gate 		}
18860Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18870Sstevel@tonic-gate 		else {
18880Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18890Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18900Sstevel@tonic-gate 				    ->aio_resultp;
18910Sstevel@tonic-gate 			else if (run_mode == AIO_32)
18920Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb32_t *)cb)
18930Sstevel@tonic-gate 				    ->aio_resultp;
18940Sstevel@tonic-gate 		}
18950Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
18960Sstevel@tonic-gate 		index = AIO_HASH(resultp);
18970Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
18980Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
18990Sstevel@tonic-gate 			if (ent->aio_req_resultp == resultp) {
19000Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) == 0) {
19010Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19020Sstevel@tonic-gate 					*rval = AIO_ALLDONE;
19030Sstevel@tonic-gate 					return (0);
19040Sstevel@tonic-gate 				}
19050Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
19060Sstevel@tonic-gate 				*rval = AIO_NOTCANCELED;
19070Sstevel@tonic-gate 				return (0);
19080Sstevel@tonic-gate 			}
19090Sstevel@tonic-gate 		}
19100Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
19110Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
19120Sstevel@tonic-gate 		return (0);
19130Sstevel@tonic-gate 	}
19140Sstevel@tonic-gate 
19150Sstevel@tonic-gate 	for (index = 0; index < AIO_HASHSZ; index++) {
19160Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19170Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19180Sstevel@tonic-gate 			if (ent->aio_req_fd == fildes) {
19190Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) != 0) {
19200Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19210Sstevel@tonic-gate 					*rval = AIO_NOTCANCELED;
19220Sstevel@tonic-gate 					return (0);
19230Sstevel@tonic-gate 				}
19240Sstevel@tonic-gate 			}
19250Sstevel@tonic-gate 		}
19260Sstevel@tonic-gate 	}
19270Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
19280Sstevel@tonic-gate 	*rval = AIO_ALLDONE;
19290Sstevel@tonic-gate 	return (0);
19300Sstevel@tonic-gate }
19310Sstevel@tonic-gate 
19320Sstevel@tonic-gate /*
19330Sstevel@tonic-gate  * solaris version of asynchronous read and write
19340Sstevel@tonic-gate  */
19350Sstevel@tonic-gate static int
arw(int opcode,int fdes,char * bufp,int bufsize,offset_t offset,aio_result_t * resultp,int mode)19360Sstevel@tonic-gate arw(
19370Sstevel@tonic-gate 	int	opcode,
19380Sstevel@tonic-gate 	int	fdes,
19390Sstevel@tonic-gate 	char	*bufp,
19400Sstevel@tonic-gate 	int	bufsize,
19410Sstevel@tonic-gate 	offset_t	offset,
19420Sstevel@tonic-gate 	aio_result_t	*resultp,
19430Sstevel@tonic-gate 	int		mode)
19440Sstevel@tonic-gate {
19450Sstevel@tonic-gate 	file_t		*fp;
19460Sstevel@tonic-gate 	int		error;
19470Sstevel@tonic-gate 	struct vnode	*vp;
19480Sstevel@tonic-gate 	aio_req_t	*reqp;
19490Sstevel@tonic-gate 	aio_t		*aiop;
19500Sstevel@tonic-gate 	int		(*aio_func)();
19510Sstevel@tonic-gate #ifdef _LP64
19520Sstevel@tonic-gate 	aiocb_t		aiocb;
19530Sstevel@tonic-gate #else
19540Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
19550Sstevel@tonic-gate #endif
19560Sstevel@tonic-gate 
19570Sstevel@tonic-gate 	aiop = curproc->p_aio;
19580Sstevel@tonic-gate 	if (aiop == NULL)
19590Sstevel@tonic-gate 		return (EINVAL);
19600Sstevel@tonic-gate 
19610Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL) {
19620Sstevel@tonic-gate 		return (EBADF);
19630Sstevel@tonic-gate 	}
19640Sstevel@tonic-gate 
19650Sstevel@tonic-gate 	/*
19660Sstevel@tonic-gate 	 * check the permission of the partition
19670Sstevel@tonic-gate 	 */
19680Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
19690Sstevel@tonic-gate 		releasef(fdes);
19700Sstevel@tonic-gate 		return (EBADF);
19710Sstevel@tonic-gate 	}
19720Sstevel@tonic-gate 
19730Sstevel@tonic-gate 	vp = fp->f_vnode;
19740Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
19750Sstevel@tonic-gate 	if (aio_func == NULL) {
19760Sstevel@tonic-gate 		releasef(fdes);
19770Sstevel@tonic-gate 		return (EBADFD);
19780Sstevel@tonic-gate 	}
19790Sstevel@tonic-gate #ifdef _LP64
19800Sstevel@tonic-gate 	aiocb.aio_fildes = fdes;
19810Sstevel@tonic-gate 	aiocb.aio_buf = bufp;
19820Sstevel@tonic-gate 	aiocb.aio_nbytes = bufsize;
19830Sstevel@tonic-gate 	aiocb.aio_offset = offset;
19840Sstevel@tonic-gate 	aiocb.aio_sigevent.sigev_notify = 0;
1985*10719SRoger.Faulkner@Sun.COM 	error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 1);
19860Sstevel@tonic-gate #else
19870Sstevel@tonic-gate 	aiocb64.aio_fildes = fdes;
19880Sstevel@tonic-gate 	aiocb64.aio_buf = (caddr32_t)bufp;
19890Sstevel@tonic-gate 	aiocb64.aio_nbytes = bufsize;
19900Sstevel@tonic-gate 	aiocb64.aio_offset = offset;
19910Sstevel@tonic-gate 	aiocb64.aio_sigevent.sigev_notify = 0;
1992*10719SRoger.Faulkner@Sun.COM 	error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 1);
19930Sstevel@tonic-gate #endif
19940Sstevel@tonic-gate 	if (error) {
19950Sstevel@tonic-gate 		releasef(fdes);
19960Sstevel@tonic-gate 		return (error);
19970Sstevel@tonic-gate 	}
19980Sstevel@tonic-gate 
19990Sstevel@tonic-gate 	/*
20000Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
20010Sstevel@tonic-gate 	 * the AIO poll bit set
20020Sstevel@tonic-gate 	 */
20030Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
20040Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
20050Sstevel@tonic-gate 
20060Sstevel@tonic-gate 	if (bufsize == 0) {
20070Sstevel@tonic-gate 		clear_active_fd(fdes);
20080Sstevel@tonic-gate 		aio_zerolen(reqp);
20090Sstevel@tonic-gate 		return (0);
20100Sstevel@tonic-gate 	}
20110Sstevel@tonic-gate 	/*
20120Sstevel@tonic-gate 	 * send the request to driver.
20130Sstevel@tonic-gate 	 */
20140Sstevel@tonic-gate 	error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
20150Sstevel@tonic-gate 	/*
20160Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
20170Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
20180Sstevel@tonic-gate 	 * completed.
20190Sstevel@tonic-gate 	 */
20200Sstevel@tonic-gate 	if (error) {
20210Sstevel@tonic-gate 		releasef(fdes);
20220Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
20230Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
20240Sstevel@tonic-gate 		aiop->aio_pending--;
20250Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
20260Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
20270Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
20280Sstevel@tonic-gate 		return (error);
20290Sstevel@tonic-gate 	}
20300Sstevel@tonic-gate 	clear_active_fd(fdes);
20310Sstevel@tonic-gate 	return (0);
20320Sstevel@tonic-gate }
20330Sstevel@tonic-gate 
20340Sstevel@tonic-gate /*
20350Sstevel@tonic-gate  * posix version of asynchronous read and write
20360Sstevel@tonic-gate  */
20371885Sraf static int
aiorw(int opcode,void * aiocb_arg,int mode,int run_mode)20380Sstevel@tonic-gate aiorw(
20390Sstevel@tonic-gate 	int		opcode,
20400Sstevel@tonic-gate 	void		*aiocb_arg,
20410Sstevel@tonic-gate 	int		mode,
20420Sstevel@tonic-gate 	int		run_mode)
20430Sstevel@tonic-gate {
20440Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
20450Sstevel@tonic-gate 	aiocb32_t	aiocb32;
20460Sstevel@tonic-gate 	struct	sigevent32 *sigev32;
20470Sstevel@tonic-gate 	port_notify32_t	pntfy32;
20480Sstevel@tonic-gate #endif
20490Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
20500Sstevel@tonic-gate 	aiocb_t		aiocb;
20510Sstevel@tonic-gate 	file_t		*fp;
20520Sstevel@tonic-gate 	int		error, fd;
20530Sstevel@tonic-gate 	size_t		bufsize;
20540Sstevel@tonic-gate 	struct vnode	*vp;
20550Sstevel@tonic-gate 	aio_req_t	*reqp;
20560Sstevel@tonic-gate 	aio_t		*aiop;
20570Sstevel@tonic-gate 	int		(*aio_func)();
20580Sstevel@tonic-gate 	aio_result_t	*resultp;
20590Sstevel@tonic-gate 	struct	sigevent *sigev;
20600Sstevel@tonic-gate 	model_t		model;
20610Sstevel@tonic-gate 	int		aio_use_port = 0;
20620Sstevel@tonic-gate 	port_notify_t	pntfy;
20630Sstevel@tonic-gate 
20640Sstevel@tonic-gate 	model = get_udatamodel();
20650Sstevel@tonic-gate 	aiop = curproc->p_aio;
20660Sstevel@tonic-gate 	if (aiop == NULL)
20670Sstevel@tonic-gate 		return (EINVAL);
20680Sstevel@tonic-gate 
20690Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
20700Sstevel@tonic-gate 		if (run_mode != AIO_LARGEFILE) {
20710Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
20720Sstevel@tonic-gate 				return (EFAULT);
20730Sstevel@tonic-gate 			bufsize = aiocb.aio_nbytes;
20740Sstevel@tonic-gate 			resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
20750Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
20760Sstevel@tonic-gate 				return (EBADF);
20770Sstevel@tonic-gate 			}
20780Sstevel@tonic-gate 			sigev = &aiocb.aio_sigevent;
20790Sstevel@tonic-gate 		} else {
20800Sstevel@tonic-gate 			/*
20810Sstevel@tonic-gate 			 * We come here only when we make largefile
20820Sstevel@tonic-gate 			 * call on 32 bit kernel using 32 bit library.
20830Sstevel@tonic-gate 			 */
20840Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
20850Sstevel@tonic-gate 				return (EFAULT);
20860Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
20870Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
20880Sstevel@tonic-gate 			    ->aio_resultp);
20891885Sraf 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
20900Sstevel@tonic-gate 				return (EBADF);
20910Sstevel@tonic-gate 			sigev = (struct sigevent *)&aiocb64.aio_sigevent;
20920Sstevel@tonic-gate 		}
20930Sstevel@tonic-gate 
20940Sstevel@tonic-gate 		if (sigev->sigev_notify == SIGEV_PORT) {
20950Sstevel@tonic-gate 			if (copyin((void *)sigev->sigev_value.sival_ptr,
20960Sstevel@tonic-gate 			    &pntfy, sizeof (port_notify_t))) {
20970Sstevel@tonic-gate 				releasef(fd);
20980Sstevel@tonic-gate 				return (EFAULT);
20990Sstevel@tonic-gate 			}
21000Sstevel@tonic-gate 			aio_use_port = 1;
21011885Sraf 		} else if (sigev->sigev_notify == SIGEV_THREAD) {
21021885Sraf 			pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
21031885Sraf 			pntfy.portnfy_user =
21041885Sraf 			    aiocb.aio_sigevent.sigev_value.sival_ptr;
21051885Sraf 			aio_use_port = 1;
21060Sstevel@tonic-gate 		}
21070Sstevel@tonic-gate 	}
21080Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
21090Sstevel@tonic-gate 	else {
21100Sstevel@tonic-gate 		if (run_mode == AIO_32) {
21110Sstevel@tonic-gate 			/* 32 bit system call is being made on 64 bit kernel */
21120Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
21130Sstevel@tonic-gate 				return (EFAULT);
21140Sstevel@tonic-gate 
21150Sstevel@tonic-gate 			bufsize = aiocb32.aio_nbytes;
21160Sstevel@tonic-gate 			aiocb_32ton(&aiocb32, &aiocb);
21170Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
21180Sstevel@tonic-gate 			    aio_resultp);
21190Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
21200Sstevel@tonic-gate 				return (EBADF);
21210Sstevel@tonic-gate 			}
21220Sstevel@tonic-gate 			sigev32 = &aiocb32.aio_sigevent;
21230Sstevel@tonic-gate 		} else if (run_mode == AIO_LARGEFILE) {
21240Sstevel@tonic-gate 			/*
21250Sstevel@tonic-gate 			 * We come here only when we make largefile
21260Sstevel@tonic-gate 			 * call on 64 bit kernel using 32 bit library.
21270Sstevel@tonic-gate 			 */
21280Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
21290Sstevel@tonic-gate 				return (EFAULT);
21300Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
21310Sstevel@tonic-gate 			aiocb_LFton(&aiocb64, &aiocb);
21320Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
21330Sstevel@tonic-gate 			    ->aio_resultp);
21340Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
21350Sstevel@tonic-gate 				return (EBADF);
21360Sstevel@tonic-gate 			sigev32 = &aiocb64.aio_sigevent;
21370Sstevel@tonic-gate 		}
21380Sstevel@tonic-gate 
21390Sstevel@tonic-gate 		if (sigev32->sigev_notify == SIGEV_PORT) {
21400Sstevel@tonic-gate 			if (copyin(
21410Sstevel@tonic-gate 			    (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
21420Sstevel@tonic-gate 			    &pntfy32, sizeof (port_notify32_t))) {
21430Sstevel@tonic-gate 				releasef(fd);
21440Sstevel@tonic-gate 				return (EFAULT);
21450Sstevel@tonic-gate 			}
21460Sstevel@tonic-gate 			pntfy.portnfy_port = pntfy32.portnfy_port;
21471885Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
21481885Sraf 			    pntfy32.portnfy_user;
21491885Sraf 			aio_use_port = 1;
21501885Sraf 		} else if (sigev32->sigev_notify == SIGEV_THREAD) {
21511885Sraf 			pntfy.portnfy_port = sigev32->sigev_signo;
21521885Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
21531885Sraf 			    sigev32->sigev_value.sival_ptr;
21540Sstevel@tonic-gate 			aio_use_port = 1;
21550Sstevel@tonic-gate 		}
21560Sstevel@tonic-gate 	}
21570Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
21580Sstevel@tonic-gate 
21590Sstevel@tonic-gate 	/*
21600Sstevel@tonic-gate 	 * check the permission of the partition
21610Sstevel@tonic-gate 	 */
21620Sstevel@tonic-gate 
21630Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
21640Sstevel@tonic-gate 		releasef(fd);
21650Sstevel@tonic-gate 		return (EBADF);
21660Sstevel@tonic-gate 	}
21670Sstevel@tonic-gate 
21680Sstevel@tonic-gate 	vp = fp->f_vnode;
21690Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
21700Sstevel@tonic-gate 	if (aio_func == NULL) {
21710Sstevel@tonic-gate 		releasef(fd);
21720Sstevel@tonic-gate 		return (EBADFD);
21730Sstevel@tonic-gate 	}
21741885Sraf 	if (run_mode == AIO_LARGEFILE)
2175*10719SRoger.Faulkner@Sun.COM 		error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 0);
21760Sstevel@tonic-gate 	else
2177*10719SRoger.Faulkner@Sun.COM 		error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 0);
21780Sstevel@tonic-gate 
21790Sstevel@tonic-gate 	if (error) {
21800Sstevel@tonic-gate 		releasef(fd);
21810Sstevel@tonic-gate 		return (error);
21820Sstevel@tonic-gate 	}
21830Sstevel@tonic-gate 	/*
21840Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
21850Sstevel@tonic-gate 	 * the AIO poll bit set
21860Sstevel@tonic-gate 	 */
21870Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
21880Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
21890Sstevel@tonic-gate 
21900Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
21910Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = aiocb_arg;
21920Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
21930Sstevel@tonic-gate 	else
21940Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
21950Sstevel@tonic-gate #endif
21960Sstevel@tonic-gate 
21971885Sraf 	if (aio_use_port) {
21981885Sraf 		int event = (run_mode == AIO_LARGEFILE)?
21991885Sraf 		    ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
22001885Sraf 		    ((mode == FREAD)? AIOAREAD : AIOAWRITE);
22011885Sraf 		error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
22021885Sraf 	}
22030Sstevel@tonic-gate 
22040Sstevel@tonic-gate 	/*
22050Sstevel@tonic-gate 	 * send the request to driver.
22060Sstevel@tonic-gate 	 */
22070Sstevel@tonic-gate 	if (error == 0) {
22080Sstevel@tonic-gate 		if (bufsize == 0) {
22090Sstevel@tonic-gate 			clear_active_fd(fd);
22100Sstevel@tonic-gate 			aio_zerolen(reqp);
22110Sstevel@tonic-gate 			return (0);
22120Sstevel@tonic-gate 		}
22130Sstevel@tonic-gate 		error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
22140Sstevel@tonic-gate 	}
22150Sstevel@tonic-gate 
22160Sstevel@tonic-gate 	/*
22170Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
22180Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
22190Sstevel@tonic-gate 	 * completed.
22200Sstevel@tonic-gate 	 */
22210Sstevel@tonic-gate 	if (error) {
22220Sstevel@tonic-gate 		releasef(fd);
22230Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
22244502Spraks 		if (aio_use_port)
22254502Spraks 			aio_deq(&aiop->aio_portpending, reqp);
22260Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
22270Sstevel@tonic-gate 		aiop->aio_pending--;
22280Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
22290Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
22300Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
22310Sstevel@tonic-gate 		return (error);
22320Sstevel@tonic-gate 	}
22330Sstevel@tonic-gate 	clear_active_fd(fd);
22340Sstevel@tonic-gate 	return (0);
22350Sstevel@tonic-gate }
22360Sstevel@tonic-gate 
22370Sstevel@tonic-gate 
22380Sstevel@tonic-gate /*
22390Sstevel@tonic-gate  * set error for a list IO entry that failed.
22400Sstevel@tonic-gate  */
22410Sstevel@tonic-gate static void
lio_set_error(aio_req_t * reqp,int portused)22424502Spraks lio_set_error(aio_req_t *reqp, int portused)
22430Sstevel@tonic-gate {
22440Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22450Sstevel@tonic-gate 
22460Sstevel@tonic-gate 	if (aiop == NULL)
22470Sstevel@tonic-gate 		return;
22480Sstevel@tonic-gate 
22490Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
22504502Spraks 	if (portused)
22514502Spraks 		aio_deq(&aiop->aio_portpending, reqp);
22520Sstevel@tonic-gate 	aiop->aio_pending--;
22530Sstevel@tonic-gate 	/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
22540Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PHYSIODONE;
22550Sstevel@tonic-gate 	/*
22560Sstevel@tonic-gate 	 * Need to free the request now as its never
22570Sstevel@tonic-gate 	 * going to get on the done queue
22580Sstevel@tonic-gate 	 *
22590Sstevel@tonic-gate 	 * Note: aio_outstanding is decremented in
22600Sstevel@tonic-gate 	 *	 aio_req_free()
22610Sstevel@tonic-gate 	 */
22620Sstevel@tonic-gate 	aio_req_free(aiop, reqp);
22630Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK)
22640Sstevel@tonic-gate 		cv_signal(&aiop->aio_cleanupcv);
22650Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
22660Sstevel@tonic-gate }
22670Sstevel@tonic-gate 
22680Sstevel@tonic-gate /*
22690Sstevel@tonic-gate  * check if a specified request is done, and remove it from
22700Sstevel@tonic-gate  * the done queue. otherwise remove anybody from the done queue
22710Sstevel@tonic-gate  * if NULL is specified.
22720Sstevel@tonic-gate  */
22730Sstevel@tonic-gate static aio_req_t *
aio_req_done(void * resultp)22740Sstevel@tonic-gate aio_req_done(void *resultp)
22750Sstevel@tonic-gate {
22760Sstevel@tonic-gate 	aio_req_t **bucket;
22770Sstevel@tonic-gate 	aio_req_t *ent;
22780Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22790Sstevel@tonic-gate 	long index;
22800Sstevel@tonic-gate 
22810Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
22820Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
22830Sstevel@tonic-gate 
22840Sstevel@tonic-gate 	if (resultp) {
22850Sstevel@tonic-gate 		index = AIO_HASH(resultp);
22860Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
22870Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
22880Sstevel@tonic-gate 			if (ent->aio_req_resultp == (aio_result_t *)resultp) {
22890Sstevel@tonic-gate 				if (ent->aio_req_flags & AIO_DONEQ) {
22900Sstevel@tonic-gate 					return (aio_req_remove(ent));
22910Sstevel@tonic-gate 				}
22920Sstevel@tonic-gate 				return (NULL);
22930Sstevel@tonic-gate 			}
22940Sstevel@tonic-gate 		}
22950Sstevel@tonic-gate 		/* no match, resultp is invalid */
22960Sstevel@tonic-gate 		return (NULL);
22970Sstevel@tonic-gate 	}
22980Sstevel@tonic-gate 	return (aio_req_remove(NULL));
22990Sstevel@tonic-gate }
23000Sstevel@tonic-gate 
23010Sstevel@tonic-gate /*
23020Sstevel@tonic-gate  * determine if a user-level resultp pointer is associated with an
23030Sstevel@tonic-gate  * active IO request. Zero is returned when the request is done,
23040Sstevel@tonic-gate  * and the request is removed from the done queue. Only when the
23050Sstevel@tonic-gate  * return value is zero, is the "reqp" pointer valid. One is returned
23060Sstevel@tonic-gate  * when the request is inprogress. Two is returned when the request
23070Sstevel@tonic-gate  * is invalid.
23080Sstevel@tonic-gate  */
23090Sstevel@tonic-gate static int
aio_req_find(aio_result_t * resultp,aio_req_t ** reqp)23100Sstevel@tonic-gate aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
23110Sstevel@tonic-gate {
23120Sstevel@tonic-gate 	aio_req_t **bucket;
23130Sstevel@tonic-gate 	aio_req_t *ent;
23140Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23150Sstevel@tonic-gate 	long index;
23160Sstevel@tonic-gate 
23170Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
23180Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23190Sstevel@tonic-gate 
23200Sstevel@tonic-gate 	index = AIO_HASH(resultp);
23210Sstevel@tonic-gate 	bucket = &aiop->aio_hash[index];
23220Sstevel@tonic-gate 	for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
23230Sstevel@tonic-gate 		if (ent->aio_req_resultp == resultp) {
23240Sstevel@tonic-gate 			if (ent->aio_req_flags & AIO_DONEQ) {
23250Sstevel@tonic-gate 				*reqp = aio_req_remove(ent);
23260Sstevel@tonic-gate 				return (0);
23270Sstevel@tonic-gate 			}
23280Sstevel@tonic-gate 			return (1);
23290Sstevel@tonic-gate 		}
23300Sstevel@tonic-gate 	}
23310Sstevel@tonic-gate 	/* no match, resultp is invalid */
23320Sstevel@tonic-gate 	return (2);
23330Sstevel@tonic-gate }
23340Sstevel@tonic-gate 
23350Sstevel@tonic-gate /*
23360Sstevel@tonic-gate  * remove a request from the done queue.
23370Sstevel@tonic-gate  */
23380Sstevel@tonic-gate static aio_req_t *
aio_req_remove(aio_req_t * reqp)23390Sstevel@tonic-gate aio_req_remove(aio_req_t *reqp)
23400Sstevel@tonic-gate {
23410Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23420Sstevel@tonic-gate 
23430Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23440Sstevel@tonic-gate 
23451885Sraf 	if (reqp != NULL) {
23460Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23470Sstevel@tonic-gate 		if (reqp->aio_req_next == reqp) {
23480Sstevel@tonic-gate 			/* only one request on queue */
23490Sstevel@tonic-gate 			if (reqp ==  aiop->aio_doneq) {
23500Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
23510Sstevel@tonic-gate 			} else {
23520Sstevel@tonic-gate 				ASSERT(reqp == aiop->aio_cleanupq);
23530Sstevel@tonic-gate 				aiop->aio_cleanupq = NULL;
23540Sstevel@tonic-gate 			}
23550Sstevel@tonic-gate 		} else {
23560Sstevel@tonic-gate 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23570Sstevel@tonic-gate 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23580Sstevel@tonic-gate 			/*
23590Sstevel@tonic-gate 			 * The request can be either on the aio_doneq or the
23600Sstevel@tonic-gate 			 * aio_cleanupq
23610Sstevel@tonic-gate 			 */
23620Sstevel@tonic-gate 			if (reqp == aiop->aio_doneq)
23630Sstevel@tonic-gate 				aiop->aio_doneq = reqp->aio_req_next;
23640Sstevel@tonic-gate 
23650Sstevel@tonic-gate 			if (reqp == aiop->aio_cleanupq)
23660Sstevel@tonic-gate 				aiop->aio_cleanupq = reqp->aio_req_next;
23670Sstevel@tonic-gate 		}
23680Sstevel@tonic-gate 		reqp->aio_req_flags &= ~AIO_DONEQ;
23691885Sraf 		reqp->aio_req_next = NULL;
23701885Sraf 		reqp->aio_req_prev = NULL;
23711885Sraf 	} else if ((reqp = aiop->aio_doneq) != NULL) {
23721885Sraf 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23731885Sraf 		if (reqp == reqp->aio_req_next) {
23740Sstevel@tonic-gate 			/* only one request on queue */
23750Sstevel@tonic-gate 			aiop->aio_doneq = NULL;
23760Sstevel@tonic-gate 		} else {
23771885Sraf 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23781885Sraf 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23791885Sraf 			aiop->aio_doneq = reqp->aio_req_next;
23800Sstevel@tonic-gate 		}
23811885Sraf 		reqp->aio_req_flags &= ~AIO_DONEQ;
23821885Sraf 		reqp->aio_req_next = NULL;
23831885Sraf 		reqp->aio_req_prev = NULL;
23840Sstevel@tonic-gate 	}
23851885Sraf 	if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
23861885Sraf 		cv_broadcast(&aiop->aio_waitcv);
23871885Sraf 	return (reqp);
23880Sstevel@tonic-gate }
23890Sstevel@tonic-gate 
23900Sstevel@tonic-gate static int
aio_req_setup(aio_req_t ** reqpp,aio_t * aiop,aiocb_t * arg,aio_result_t * resultp,vnode_t * vp,int old_solaris_req)23910Sstevel@tonic-gate aio_req_setup(
23920Sstevel@tonic-gate 	aio_req_t	**reqpp,
23930Sstevel@tonic-gate 	aio_t 		*aiop,
23940Sstevel@tonic-gate 	aiocb_t 	*arg,
23950Sstevel@tonic-gate 	aio_result_t 	*resultp,
2396*10719SRoger.Faulkner@Sun.COM 	vnode_t		*vp,
2397*10719SRoger.Faulkner@Sun.COM 	int		old_solaris_req)
23980Sstevel@tonic-gate {
23991885Sraf 	sigqueue_t	*sqp = NULL;
24000Sstevel@tonic-gate 	aio_req_t 	*reqp;
24010Sstevel@tonic-gate 	struct uio 	*uio;
24020Sstevel@tonic-gate 	struct sigevent *sigev;
24030Sstevel@tonic-gate 	int		error;
24040Sstevel@tonic-gate 
24050Sstevel@tonic-gate 	sigev = &arg->aio_sigevent;
24061885Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
24071885Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
24080Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
24090Sstevel@tonic-gate 		if (sqp == NULL)
24100Sstevel@tonic-gate 			return (EAGAIN);
24110Sstevel@tonic-gate 		sqp->sq_func = NULL;
24120Sstevel@tonic-gate 		sqp->sq_next = NULL;
24130Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
24140Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
24150Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
24160Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
24170Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
24180Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
24190Sstevel@tonic-gate 		sqp->sq_info.si_value = sigev->sigev_value;
24201885Sraf 	}
24210Sstevel@tonic-gate 
24220Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
24230Sstevel@tonic-gate 
24240Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
24250Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24260Sstevel@tonic-gate 		if (sqp)
24270Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24280Sstevel@tonic-gate 		return (EIO);
24290Sstevel@tonic-gate 	}
24300Sstevel@tonic-gate 	/*
24310Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
24320Sstevel@tonic-gate 	 * from dynamic memory.
24330Sstevel@tonic-gate 	 */
24340Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
24350Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24360Sstevel@tonic-gate 		if (sqp)
24370Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24380Sstevel@tonic-gate 		return (error);
24390Sstevel@tonic-gate 	}
24400Sstevel@tonic-gate 	aiop->aio_pending++;
24410Sstevel@tonic-gate 	aiop->aio_outstanding++;
24420Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
2443*10719SRoger.Faulkner@Sun.COM 	if (old_solaris_req) {
2444*10719SRoger.Faulkner@Sun.COM 		/* this is an old solaris aio request */
2445*10719SRoger.Faulkner@Sun.COM 		reqp->aio_req_flags |= AIO_SOLARIS;
2446*10719SRoger.Faulkner@Sun.COM 		aiop->aio_flags |= AIO_SOLARIS_REQ;
2447*10719SRoger.Faulkner@Sun.COM 	}
24481885Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
24491885Sraf 	    sigev->sigev_notify == SIGEV_PORT)
24501885Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
24510Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
24520Sstevel@tonic-gate 	/*
24530Sstevel@tonic-gate 	 * initialize aio request.
24540Sstevel@tonic-gate 	 */
24550Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
24560Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
24570Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
24581885Sraf 	reqp->aio_req_lio = NULL;
24590Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
24600Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
24610Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
24620Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
24630Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
24640Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
24650Sstevel@tonic-gate 	*reqpp = reqp;
24660Sstevel@tonic-gate 	return (0);
24670Sstevel@tonic-gate }
24680Sstevel@tonic-gate 
24690Sstevel@tonic-gate /*
24700Sstevel@tonic-gate  * Allocate p_aio struct.
24710Sstevel@tonic-gate  */
24720Sstevel@tonic-gate static aio_t *
aio_aiop_alloc(void)24730Sstevel@tonic-gate aio_aiop_alloc(void)
24740Sstevel@tonic-gate {
24750Sstevel@tonic-gate 	aio_t	*aiop;
24760Sstevel@tonic-gate 
24770Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&curproc->p_lock));
24780Sstevel@tonic-gate 
24790Sstevel@tonic-gate 	aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
24800Sstevel@tonic-gate 	if (aiop) {
24810Sstevel@tonic-gate 		mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
24820Sstevel@tonic-gate 		mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
24834502Spraks 		    NULL);
24840Sstevel@tonic-gate 		mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
24850Sstevel@tonic-gate 	}
24860Sstevel@tonic-gate 	return (aiop);
24870Sstevel@tonic-gate }
24880Sstevel@tonic-gate 
24890Sstevel@tonic-gate /*
24900Sstevel@tonic-gate  * Allocate an aio_req struct.
24910Sstevel@tonic-gate  */
24920Sstevel@tonic-gate static int
aio_req_alloc(aio_req_t ** nreqp,aio_result_t * resultp)24930Sstevel@tonic-gate aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
24940Sstevel@tonic-gate {
24950Sstevel@tonic-gate 	aio_req_t *reqp;
24960Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
24970Sstevel@tonic-gate 
24980Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
24990Sstevel@tonic-gate 
25000Sstevel@tonic-gate 	if ((reqp = aiop->aio_free) != NULL) {
25010Sstevel@tonic-gate 		aiop->aio_free = reqp->aio_req_next;
25021885Sraf 		bzero(reqp, sizeof (*reqp));
25030Sstevel@tonic-gate 	} else {
25040Sstevel@tonic-gate 		/*
25050Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25060Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25070Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25080Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25090Sstevel@tonic-gate 		 */
25100Sstevel@tonic-gate 		if (freemem < desfree)
25110Sstevel@tonic-gate 			return (EAGAIN);
25120Sstevel@tonic-gate 		reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
25130Sstevel@tonic-gate 		if (reqp == NULL)
25140Sstevel@tonic-gate 			return (EAGAIN);
25150Sstevel@tonic-gate 	}
25161885Sraf 	reqp->aio_req.aio_uio = &reqp->aio_req_uio;
25171885Sraf 	reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
25181885Sraf 	reqp->aio_req.aio_private = reqp;
25190Sstevel@tonic-gate 	reqp->aio_req_buf.b_offset = -1;
25200Sstevel@tonic-gate 	reqp->aio_req_resultp = resultp;
25210Sstevel@tonic-gate 	if (aio_hash_insert(reqp, aiop)) {
25220Sstevel@tonic-gate 		reqp->aio_req_next = aiop->aio_free;
25230Sstevel@tonic-gate 		aiop->aio_free = reqp;
252410011SPrakash.Sangappa@Sun.COM 		return (EBUSY);
25250Sstevel@tonic-gate 	}
25260Sstevel@tonic-gate 	*nreqp = reqp;
25270Sstevel@tonic-gate 	return (0);
25280Sstevel@tonic-gate }
25290Sstevel@tonic-gate 
25300Sstevel@tonic-gate /*
25310Sstevel@tonic-gate  * Allocate an aio_lio_t struct.
25320Sstevel@tonic-gate  */
25330Sstevel@tonic-gate static int
aio_lio_alloc(aio_lio_t ** head)25340Sstevel@tonic-gate aio_lio_alloc(aio_lio_t **head)
25350Sstevel@tonic-gate {
25360Sstevel@tonic-gate 	aio_lio_t *liop;
25370Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
25380Sstevel@tonic-gate 
25390Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
25400Sstevel@tonic-gate 
25410Sstevel@tonic-gate 	if ((liop = aiop->aio_lio_free) != NULL) {
25420Sstevel@tonic-gate 		aiop->aio_lio_free = liop->lio_next;
25430Sstevel@tonic-gate 	} else {
25440Sstevel@tonic-gate 		/*
25450Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25460Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25470Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25480Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25490Sstevel@tonic-gate 		 */
25500Sstevel@tonic-gate 		if (freemem < desfree)
25510Sstevel@tonic-gate 			return (EAGAIN);
25520Sstevel@tonic-gate 
25530Sstevel@tonic-gate 		liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
25540Sstevel@tonic-gate 		if (liop == NULL)
25550Sstevel@tonic-gate 			return (EAGAIN);
25560Sstevel@tonic-gate 	}
25570Sstevel@tonic-gate 	*head = liop;
25580Sstevel@tonic-gate 	return (0);
25590Sstevel@tonic-gate }
25600Sstevel@tonic-gate 
25610Sstevel@tonic-gate /*
25620Sstevel@tonic-gate  * this is a special per-process thread that is only activated if
25630Sstevel@tonic-gate  * the process is unmapping a segment with outstanding aio. normally,
25640Sstevel@tonic-gate  * the process will have completed the aio before unmapping the
25650Sstevel@tonic-gate  * segment. If the process does unmap a segment with outstanding aio,
25660Sstevel@tonic-gate  * this special thread will guarentee that the locked pages due to
25670Sstevel@tonic-gate  * aphysio() are released, thereby permitting the segment to be
2568304Spraks  * unmapped. In addition to this, the cleanup thread is woken up
2569304Spraks  * during DR operations to release the locked pages.
25700Sstevel@tonic-gate  */
25710Sstevel@tonic-gate 
25720Sstevel@tonic-gate static int
aio_cleanup_thread(aio_t * aiop)25730Sstevel@tonic-gate aio_cleanup_thread(aio_t *aiop)
25740Sstevel@tonic-gate {
25750Sstevel@tonic-gate 	proc_t *p = curproc;
25760Sstevel@tonic-gate 	struct as *as = p->p_as;
25770Sstevel@tonic-gate 	int poked = 0;
25780Sstevel@tonic-gate 	kcondvar_t *cvp;
25790Sstevel@tonic-gate 	int exit_flag = 0;
2580304Spraks 	int rqclnup = 0;
25810Sstevel@tonic-gate 
25820Sstevel@tonic-gate 	sigfillset(&curthread->t_hold);
25830Sstevel@tonic-gate 	sigdiffset(&curthread->t_hold, &cantmask);
25840Sstevel@tonic-gate 	for (;;) {
25850Sstevel@tonic-gate 		/*
25860Sstevel@tonic-gate 		 * if a segment is being unmapped, and the current
25870Sstevel@tonic-gate 		 * process's done queue is not empty, then every request
25880Sstevel@tonic-gate 		 * on the doneq with locked resources should be forced
25890Sstevel@tonic-gate 		 * to release their locks. By moving the doneq request
25900Sstevel@tonic-gate 		 * to the cleanupq, aio_cleanup() will process the cleanupq,
25910Sstevel@tonic-gate 		 * and place requests back onto the doneq. All requests
25920Sstevel@tonic-gate 		 * processed by aio_cleanup() will have their physical
25930Sstevel@tonic-gate 		 * resources unlocked.
25940Sstevel@tonic-gate 		 */
25950Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
25960Sstevel@tonic-gate 		if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
25970Sstevel@tonic-gate 			aiop->aio_flags |= AIO_CLEANUP;
25980Sstevel@tonic-gate 			mutex_enter(&as->a_contents);
2599304Spraks 			if (aiop->aio_rqclnup) {
2600304Spraks 				aiop->aio_rqclnup = 0;
2601304Spraks 				rqclnup = 1;
2602304Spraks 			}
26039973SSurya.Prakki@Sun.COM 			mutex_exit(&as->a_contents);
26049973SSurya.Prakki@Sun.COM 			if (aiop->aio_doneq) {
26050Sstevel@tonic-gate 				aio_req_t *doneqhead = aiop->aio_doneq;
26060Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
26070Sstevel@tonic-gate 				aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
26080Sstevel@tonic-gate 			}
26090Sstevel@tonic-gate 		}
26100Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26110Sstevel@tonic-gate 		aio_cleanup(AIO_CLEANUP_THREAD);
26120Sstevel@tonic-gate 		/*
26130Sstevel@tonic-gate 		 * thread should block on the cleanupcv while
26140Sstevel@tonic-gate 		 * AIO_CLEANUP is set.
26150Sstevel@tonic-gate 		 */
26160Sstevel@tonic-gate 		cvp = &aiop->aio_cleanupcv;
26170Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
26180Sstevel@tonic-gate 
26190Sstevel@tonic-gate 		if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
26200Sstevel@tonic-gate 		    aiop->aio_notifyq != NULL ||
26210Sstevel@tonic-gate 		    aiop->aio_portcleanupq != NULL) {
26220Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
26230Sstevel@tonic-gate 			continue;
26240Sstevel@tonic-gate 		}
26250Sstevel@tonic-gate 		mutex_enter(&as->a_contents);
26260Sstevel@tonic-gate 
26270Sstevel@tonic-gate 		/*
26280Sstevel@tonic-gate 		 * AIO_CLEANUP determines when the cleanup thread
2629304Spraks 		 * should be active. This flag is set when
2630304Spraks 		 * the cleanup thread is awakened by as_unmap() or
2631304Spraks 		 * due to DR operations.
26320Sstevel@tonic-gate 		 * The flag is cleared when the blocking as_unmap()
26330Sstevel@tonic-gate 		 * that originally awakened us is allowed to
26340Sstevel@tonic-gate 		 * complete. as_unmap() blocks when trying to
26350Sstevel@tonic-gate 		 * unmap a segment that has SOFTLOCKed pages. when
26360Sstevel@tonic-gate 		 * the segment's pages are all SOFTUNLOCKed,
2637304Spraks 		 * as->a_flags & AS_UNMAPWAIT should be zero.
2638304Spraks 		 *
2639304Spraks 		 * In case of cleanup request by DR, the flag is cleared
2640304Spraks 		 * once all the pending aio requests have been processed.
2641304Spraks 		 *
2642304Spraks 		 * The flag shouldn't be cleared right away if the
2643304Spraks 		 * cleanup thread was interrupted because the process
2644304Spraks 		 * is doing forkall(). This happens when cv_wait_sig()
2645304Spraks 		 * returns zero, because it was awakened by a pokelwps().
2646304Spraks 		 * If the process is not exiting, it must be doing forkall().
26470Sstevel@tonic-gate 		 */
26480Sstevel@tonic-gate 		if ((poked == 0) &&
26494502Spraks 		    ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
26504502Spraks 		    (aiop->aio_pending == 0))) {
26510Sstevel@tonic-gate 			aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
26520Sstevel@tonic-gate 			cvp = &as->a_cv;
2653304Spraks 			rqclnup = 0;
26540Sstevel@tonic-gate 		}
26550Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26560Sstevel@tonic-gate 		if (poked) {
26570Sstevel@tonic-gate 			/*
26580Sstevel@tonic-gate 			 * If the process is exiting/killed, don't return
26590Sstevel@tonic-gate 			 * immediately without waiting for pending I/O's
26600Sstevel@tonic-gate 			 * and releasing the page locks.
26610Sstevel@tonic-gate 			 */
26620Sstevel@tonic-gate 			if (p->p_flag & (SEXITLWPS|SKILLED)) {
26630Sstevel@tonic-gate 				/*
26640Sstevel@tonic-gate 				 * If exit_flag is set, then it is
26650Sstevel@tonic-gate 				 * safe to exit because we have released
26660Sstevel@tonic-gate 				 * page locks of completed I/O's.
26670Sstevel@tonic-gate 				 */
26680Sstevel@tonic-gate 				if (exit_flag)
26690Sstevel@tonic-gate 					break;
26700Sstevel@tonic-gate 
26710Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26720Sstevel@tonic-gate 
26730Sstevel@tonic-gate 				/*
26740Sstevel@tonic-gate 				 * Wait for all the pending aio to complete.
26750Sstevel@tonic-gate 				 */
26760Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
26770Sstevel@tonic-gate 				aiop->aio_flags |= AIO_REQ_BLOCK;
26780Sstevel@tonic-gate 				while (aiop->aio_pending != 0)
26790Sstevel@tonic-gate 					cv_wait(&aiop->aio_cleanupcv,
26804502Spraks 					    &aiop->aio_mutex);
26810Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
26820Sstevel@tonic-gate 				exit_flag = 1;
26830Sstevel@tonic-gate 				continue;
26840Sstevel@tonic-gate 			} else if (p->p_flag &
26850Sstevel@tonic-gate 			    (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
26860Sstevel@tonic-gate 				/*
26870Sstevel@tonic-gate 				 * hold LWP until it
26880Sstevel@tonic-gate 				 * is continued.
26890Sstevel@tonic-gate 				 */
26900Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26910Sstevel@tonic-gate 				mutex_enter(&p->p_lock);
26920Sstevel@tonic-gate 				stop(PR_SUSPENDED, SUSPEND_NORMAL);
26930Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
26940Sstevel@tonic-gate 				poked = 0;
26950Sstevel@tonic-gate 				continue;
26960Sstevel@tonic-gate 			}
26970Sstevel@tonic-gate 		} else {
26980Sstevel@tonic-gate 			/*
26990Sstevel@tonic-gate 			 * When started this thread will sleep on as->a_cv.
27000Sstevel@tonic-gate 			 * as_unmap will awake this thread if the
27010Sstevel@tonic-gate 			 * segment has SOFTLOCKed pages (poked = 0).
27020Sstevel@tonic-gate 			 * 1. pokelwps() awakes this thread =>
27030Sstevel@tonic-gate 			 *    break the loop to check SEXITLWPS, SHOLDFORK, etc
27040Sstevel@tonic-gate 			 * 2. as_unmap awakes this thread =>
27050Sstevel@tonic-gate 			 *    to break the loop it is necessary that
27060Sstevel@tonic-gate 			 *    - AS_UNMAPWAIT is set (as_unmap is waiting for
27070Sstevel@tonic-gate 			 *	memory to be unlocked)
27080Sstevel@tonic-gate 			 *    - AIO_CLEANUP is not set
27090Sstevel@tonic-gate 			 *	(if AIO_CLEANUP is set we have to wait for
27100Sstevel@tonic-gate 			 *	pending requests. aio_done will send a signal
27110Sstevel@tonic-gate 			 *	for every request which completes to continue
27120Sstevel@tonic-gate 			 *	unmapping the corresponding address range)
2713304Spraks 			 * 3. A cleanup request will wake this thread up, ex.
2714304Spraks 			 *    by the DR operations. The aio_rqclnup flag will
2715304Spraks 			 *    be set.
27160Sstevel@tonic-gate 			 */
27170Sstevel@tonic-gate 			while (poked == 0) {
2718304Spraks 				/*
27194532Ssp92102 				 * The clean up requests that came in
27204532Ssp92102 				 * after we had just cleaned up, couldn't
27214532Ssp92102 				 * be causing the unmap thread to block - as
27224532Ssp92102 				 * unmap event happened first.
27234532Ssp92102 				 * Let aio_done() wake us up if it sees a need.
2724304Spraks 				 */
27254532Ssp92102 				if (aiop->aio_rqclnup &&
27264502Spraks 				    (aiop->aio_flags & AIO_CLEANUP) == 0)
27270Sstevel@tonic-gate 					break;
27280Sstevel@tonic-gate 				poked = !cv_wait_sig(cvp, &as->a_contents);
27290Sstevel@tonic-gate 				if (AS_ISUNMAPWAIT(as) == 0)
27300Sstevel@tonic-gate 					cv_signal(cvp);
27310Sstevel@tonic-gate 				if (aiop->aio_outstanding != 0)
27320Sstevel@tonic-gate 					break;
27330Sstevel@tonic-gate 			}
27340Sstevel@tonic-gate 		}
27350Sstevel@tonic-gate 		mutex_exit(&as->a_contents);
27360Sstevel@tonic-gate 	}
27370Sstevel@tonic-gate exit:
27380Sstevel@tonic-gate 	mutex_exit(&as->a_contents);
27390Sstevel@tonic-gate 	ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
27400Sstevel@tonic-gate 	aston(curthread);	/* make thread do post_syscall */
27410Sstevel@tonic-gate 	return (0);
27420Sstevel@tonic-gate }
27430Sstevel@tonic-gate 
27440Sstevel@tonic-gate /*
27450Sstevel@tonic-gate  * save a reference to a user's outstanding aio in a hash list.
27460Sstevel@tonic-gate  */
27470Sstevel@tonic-gate static int
aio_hash_insert(aio_req_t * aio_reqp,aio_t * aiop)27480Sstevel@tonic-gate aio_hash_insert(
27490Sstevel@tonic-gate 	aio_req_t *aio_reqp,
27500Sstevel@tonic-gate 	aio_t *aiop)
27510Sstevel@tonic-gate {
27520Sstevel@tonic-gate 	long index;
27530Sstevel@tonic-gate 	aio_result_t *resultp = aio_reqp->aio_req_resultp;
27540Sstevel@tonic-gate 	aio_req_t *current;
27550Sstevel@tonic-gate 	aio_req_t **nextp;
27560Sstevel@tonic-gate 
27570Sstevel@tonic-gate 	index = AIO_HASH(resultp);
27580Sstevel@tonic-gate 	nextp = &aiop->aio_hash[index];
27590Sstevel@tonic-gate 	while ((current = *nextp) != NULL) {
27600Sstevel@tonic-gate 		if (current->aio_req_resultp == resultp)
27610Sstevel@tonic-gate 			return (DUPLICATE);
27620Sstevel@tonic-gate 		nextp = &current->aio_hash_next;
27630Sstevel@tonic-gate 	}
27640Sstevel@tonic-gate 	*nextp = aio_reqp;
27650Sstevel@tonic-gate 	aio_reqp->aio_hash_next = NULL;
27660Sstevel@tonic-gate 	return (0);
27670Sstevel@tonic-gate }
27680Sstevel@tonic-gate 
27690Sstevel@tonic-gate static int
check_vp(struct vnode * vp,int mode)27700Sstevel@tonic-gate (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
27710Sstevel@tonic-gate     cred_t *)
27720Sstevel@tonic-gate {
27730Sstevel@tonic-gate 	struct snode *sp;
27740Sstevel@tonic-gate 	dev_t		dev;
27750Sstevel@tonic-gate 	struct cb_ops  	*cb;
27760Sstevel@tonic-gate 	major_t		major;
27770Sstevel@tonic-gate 	int		(*aio_func)();
27780Sstevel@tonic-gate 
27790Sstevel@tonic-gate 	dev = vp->v_rdev;
27800Sstevel@tonic-gate 	major = getmajor(dev);
27810Sstevel@tonic-gate 
27820Sstevel@tonic-gate 	/*
27830Sstevel@tonic-gate 	 * return NULL for requests to files and STREAMs so
27840Sstevel@tonic-gate 	 * that libaio takes care of them.
27850Sstevel@tonic-gate 	 */
27860Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
27870Sstevel@tonic-gate 		/* no stream device for kaio */
27880Sstevel@tonic-gate 		if (STREAMSTAB(major)) {
27890Sstevel@tonic-gate 			return (NULL);
27900Sstevel@tonic-gate 		}
27910Sstevel@tonic-gate 	} else {
27920Sstevel@tonic-gate 		return (NULL);
27930Sstevel@tonic-gate 	}
27940Sstevel@tonic-gate 
27950Sstevel@tonic-gate 	/*
27960Sstevel@tonic-gate 	 * Check old drivers which do not have async I/O entry points.
27970Sstevel@tonic-gate 	 */
27980Sstevel@tonic-gate 	if (devopsp[major]->devo_rev < 3)
27990Sstevel@tonic-gate 		return (NULL);
28000Sstevel@tonic-gate 
28010Sstevel@tonic-gate 	cb = devopsp[major]->devo_cb_ops;
28020Sstevel@tonic-gate 
28030Sstevel@tonic-gate 	if (cb->cb_rev < 1)
28040Sstevel@tonic-gate 		return (NULL);
28050Sstevel@tonic-gate 
28060Sstevel@tonic-gate 	/*
28070Sstevel@tonic-gate 	 * Check whether this device is a block device.
28080Sstevel@tonic-gate 	 * Kaio is not supported for devices like tty.
28090Sstevel@tonic-gate 	 */
28100Sstevel@tonic-gate 	if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
28110Sstevel@tonic-gate 		return (NULL);
28120Sstevel@tonic-gate 
28130Sstevel@tonic-gate 	/*
28140Sstevel@tonic-gate 	 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
28150Sstevel@tonic-gate 	 * We cannot call the driver directly. Instead return the
28160Sstevel@tonic-gate 	 * PXFS functions.
28170Sstevel@tonic-gate 	 */
28180Sstevel@tonic-gate 
28190Sstevel@tonic-gate 	if (IS_PXFSVP(vp)) {
28200Sstevel@tonic-gate 		if (mode & FREAD)
28210Sstevel@tonic-gate 			return (clpxfs_aio_read);
28220Sstevel@tonic-gate 		else
28230Sstevel@tonic-gate 			return (clpxfs_aio_write);
28240Sstevel@tonic-gate 	}
28250Sstevel@tonic-gate 	if (mode & FREAD)
28260Sstevel@tonic-gate 		aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
28270Sstevel@tonic-gate 	else
28280Sstevel@tonic-gate 		aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
28290Sstevel@tonic-gate 
28300Sstevel@tonic-gate 	/*
28310Sstevel@tonic-gate 	 * Do we need this ?
28320Sstevel@tonic-gate 	 * nodev returns ENXIO anyway.
28330Sstevel@tonic-gate 	 */
28340Sstevel@tonic-gate 	if (aio_func == nodev)
28350Sstevel@tonic-gate 		return (NULL);
28360Sstevel@tonic-gate 
28370Sstevel@tonic-gate 	sp = VTOS(vp);
28380Sstevel@tonic-gate 	smark(sp, SACC);
28390Sstevel@tonic-gate 	return (aio_func);
28400Sstevel@tonic-gate }
28410Sstevel@tonic-gate 
28420Sstevel@tonic-gate /*
28430Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28440Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28450Sstevel@tonic-gate  * We define this intermediate function that will do the right
28460Sstevel@tonic-gate  * thing for driver cases.
28470Sstevel@tonic-gate  */
28480Sstevel@tonic-gate 
28490Sstevel@tonic-gate static int
driver_aio_write(vnode_t * vp,struct aio_req * aio,cred_t * cred_p)28500Sstevel@tonic-gate driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28510Sstevel@tonic-gate {
28520Sstevel@tonic-gate 	dev_t dev;
28530Sstevel@tonic-gate 	struct cb_ops  	*cb;
28540Sstevel@tonic-gate 
28550Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28560Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28570Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28580Sstevel@tonic-gate 	ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
28590Sstevel@tonic-gate 
28600Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28610Sstevel@tonic-gate 
28620Sstevel@tonic-gate 	ASSERT(cb->cb_awrite != nodev);
28630Sstevel@tonic-gate 	return ((*cb->cb_awrite)(dev, aio, cred_p));
28640Sstevel@tonic-gate }
28650Sstevel@tonic-gate 
28660Sstevel@tonic-gate /*
28670Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28680Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28690Sstevel@tonic-gate  * We define this intermediate function that will do the right
28700Sstevel@tonic-gate  * thing for driver cases.
28710Sstevel@tonic-gate  */
28720Sstevel@tonic-gate 
28730Sstevel@tonic-gate static int
driver_aio_read(vnode_t * vp,struct aio_req * aio,cred_t * cred_p)28740Sstevel@tonic-gate driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28750Sstevel@tonic-gate {
28760Sstevel@tonic-gate 	dev_t dev;
28770Sstevel@tonic-gate 	struct cb_ops  	*cb;
28780Sstevel@tonic-gate 
28790Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28800Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28810Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28820Sstevel@tonic-gate 	ASSERT(!STREAMSTAB(getmajor(dev)));
28830Sstevel@tonic-gate 
28840Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28850Sstevel@tonic-gate 
28860Sstevel@tonic-gate 	ASSERT(cb->cb_aread != nodev);
28870Sstevel@tonic-gate 	return ((*cb->cb_aread)(dev, aio, cred_p));
28880Sstevel@tonic-gate }
28890Sstevel@tonic-gate 
28900Sstevel@tonic-gate /*
28910Sstevel@tonic-gate  * This routine is called when a largefile call is made by a 32bit
28920Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel. All 64bit processes are large
28930Sstevel@tonic-gate  * file by definition and will call alio() instead.
28940Sstevel@tonic-gate  */
28950Sstevel@tonic-gate static int
alioLF(int mode_arg,void * aiocb_arg,int nent,void * sigev)28960Sstevel@tonic-gate alioLF(
28970Sstevel@tonic-gate 	int		mode_arg,
28980Sstevel@tonic-gate 	void		*aiocb_arg,
28990Sstevel@tonic-gate 	int		nent,
29000Sstevel@tonic-gate 	void		*sigev)
29010Sstevel@tonic-gate {
29020Sstevel@tonic-gate 	file_t		*fp;
29030Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
29040Sstevel@tonic-gate 	int		prev_mode = -1;
29050Sstevel@tonic-gate 	struct vnode	*vp;
29060Sstevel@tonic-gate 	aio_lio_t	*head;
29070Sstevel@tonic-gate 	aio_req_t	*reqp;
29080Sstevel@tonic-gate 	aio_t		*aiop;
29090Sstevel@tonic-gate 	caddr_t		cbplist;
29101885Sraf 	aiocb64_32_t	cb64;
29111885Sraf 	aiocb64_32_t	*aiocb = &cb64;
29120Sstevel@tonic-gate 	aiocb64_32_t	*cbp;
29130Sstevel@tonic-gate 	caddr32_t	*ucbp;
29140Sstevel@tonic-gate #ifdef _LP64
29150Sstevel@tonic-gate 	aiocb_t		aiocb_n;
29160Sstevel@tonic-gate #endif
29170Sstevel@tonic-gate 	struct sigevent32	sigevk;
29180Sstevel@tonic-gate 	sigqueue_t	*sqp;
29190Sstevel@tonic-gate 	int		(*aio_func)();
29200Sstevel@tonic-gate 	int		mode;
29211885Sraf 	int		error = 0;
29221885Sraf 	int		aio_errors = 0;
29230Sstevel@tonic-gate 	int		i;
29240Sstevel@tonic-gate 	size_t		ssize;
29250Sstevel@tonic-gate 	int		deadhead = 0;
29260Sstevel@tonic-gate 	int		aio_notsupported = 0;
29271885Sraf 	int		lio_head_port;
29281885Sraf 	int		aio_port;
29291885Sraf 	int		aio_thread;
29300Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
29314502Spraks 	int		portused = 0;
29320Sstevel@tonic-gate 	port_notify32_t	pnotify;
29331885Sraf 	int		event;
29340Sstevel@tonic-gate 
29350Sstevel@tonic-gate 	aiop = curproc->p_aio;
29360Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
29370Sstevel@tonic-gate 		return (EINVAL);
29380Sstevel@tonic-gate 
29390Sstevel@tonic-gate 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
29400Sstevel@tonic-gate 
29410Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
29420Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
29430Sstevel@tonic-gate 	ucbp = (caddr32_t *)cbplist;
29440Sstevel@tonic-gate 
29451885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
29461885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
29470Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
29480Sstevel@tonic-gate 		return (EFAULT);
29490Sstevel@tonic-gate 	}
29500Sstevel@tonic-gate 
29511885Sraf 	/* Event Ports  */
29521885Sraf 	if (sigev &&
29531885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
29541885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
29551885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
29561885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
29571885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
29581885Sraf 		} else if (copyin(
29591885Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
29601885Sraf 		    &pnotify, sizeof (pnotify))) {
29610Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
29620Sstevel@tonic-gate 			return (EFAULT);
29630Sstevel@tonic-gate 		}
29641885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
29651885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
29661885Sraf 		if (error) {
29671885Sraf 			if (error == ENOMEM || error == EAGAIN)
29681885Sraf 				error = EAGAIN;
29691885Sraf 			else
29701885Sraf 				error = EINVAL;
29711885Sraf 			kmem_free(cbplist, ssize);
29721885Sraf 			return (error);
29731885Sraf 		}
29741885Sraf 		lio_head_port = pnotify.portnfy_port;
29754502Spraks 		portused = 1;
29760Sstevel@tonic-gate 	}
29770Sstevel@tonic-gate 
29780Sstevel@tonic-gate 	/*
29790Sstevel@tonic-gate 	 * a list head should be allocated if notification is
29800Sstevel@tonic-gate 	 * enabled for this list.
29810Sstevel@tonic-gate 	 */
29820Sstevel@tonic-gate 	head = NULL;
29830Sstevel@tonic-gate 
29841885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
29850Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
29860Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
29870Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
29880Sstevel@tonic-gate 		if (error)
29890Sstevel@tonic-gate 			goto done;
29900Sstevel@tonic-gate 		deadhead = 1;
29910Sstevel@tonic-gate 		head->lio_nent = nent;
29920Sstevel@tonic-gate 		head->lio_refcnt = nent;
29931885Sraf 		head->lio_port = -1;
29941885Sraf 		head->lio_portkev = NULL;
29951885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
29961885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
29970Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
29980Sstevel@tonic-gate 			if (sqp == NULL) {
29990Sstevel@tonic-gate 				error = EAGAIN;
30000Sstevel@tonic-gate 				goto done;
30010Sstevel@tonic-gate 			}
30020Sstevel@tonic-gate 			sqp->sq_func = NULL;
30030Sstevel@tonic-gate 			sqp->sq_next = NULL;
30040Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
30050Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
30060Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
30070Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
30080Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
30090Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
30100Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
30110Sstevel@tonic-gate 			    sigevk.sigev_value.sival_int;
30120Sstevel@tonic-gate 			head->lio_sigqp = sqp;
30130Sstevel@tonic-gate 		} else {
30140Sstevel@tonic-gate 			head->lio_sigqp = NULL;
30150Sstevel@tonic-gate 		}
30161885Sraf 		if (pkevtp) {
30171885Sraf 			/*
30181885Sraf 			 * Prepare data to send when list of aiocb's
30191885Sraf 			 * has completed.
30201885Sraf 			 */
30211885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
30221885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
30231885Sraf 			    NULL, head);
30241885Sraf 			pkevtp->portkev_events = AIOLIO64;
30251885Sraf 			head->lio_portkev = pkevtp;
30261885Sraf 			head->lio_port = pnotify.portnfy_port;
30271885Sraf 		}
30280Sstevel@tonic-gate 	}
30290Sstevel@tonic-gate 
30300Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
30310Sstevel@tonic-gate 
30320Sstevel@tonic-gate 		cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
30330Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
30341885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
30350Sstevel@tonic-gate 			if (head) {
30360Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30370Sstevel@tonic-gate 				head->lio_nent--;
30380Sstevel@tonic-gate 				head->lio_refcnt--;
30390Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30400Sstevel@tonic-gate 			}
30410Sstevel@tonic-gate 			continue;
30420Sstevel@tonic-gate 		}
30430Sstevel@tonic-gate 
30440Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
30450Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
30460Sstevel@tonic-gate 		if (mode == LIO_NOP) {
30470Sstevel@tonic-gate 			cbp = NULL;
30480Sstevel@tonic-gate 			if (head) {
30490Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30500Sstevel@tonic-gate 				head->lio_nent--;
30510Sstevel@tonic-gate 				head->lio_refcnt--;
30520Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30530Sstevel@tonic-gate 			}
30540Sstevel@tonic-gate 			continue;
30550Sstevel@tonic-gate 		}
30560Sstevel@tonic-gate 
30570Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
30580Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
30590Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30600Sstevel@tonic-gate 			if (head) {
30610Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30620Sstevel@tonic-gate 				head->lio_nent--;
30630Sstevel@tonic-gate 				head->lio_refcnt--;
30640Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30650Sstevel@tonic-gate 			}
30660Sstevel@tonic-gate 			aio_errors++;
30670Sstevel@tonic-gate 			continue;
30680Sstevel@tonic-gate 		}
30690Sstevel@tonic-gate 
30700Sstevel@tonic-gate 		/*
30710Sstevel@tonic-gate 		 * check the permission of the partition
30720Sstevel@tonic-gate 		 */
30730Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
30740Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
30750Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30760Sstevel@tonic-gate 			if (head) {
30770Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30780Sstevel@tonic-gate 				head->lio_nent--;
30790Sstevel@tonic-gate 				head->lio_refcnt--;
30800Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30810Sstevel@tonic-gate 			}
30820Sstevel@tonic-gate 			aio_errors++;
30830Sstevel@tonic-gate 			continue;
30840Sstevel@tonic-gate 		}
30850Sstevel@tonic-gate 
30860Sstevel@tonic-gate 		/*
30870Sstevel@tonic-gate 		 * common case where requests are to the same fd
30880Sstevel@tonic-gate 		 * for the same r/w operation
30890Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
30900Sstevel@tonic-gate 		 */
30911885Sraf 		vp = fp->f_vnode;
30921885Sraf 		if (fp != prev_fp || mode != prev_mode) {
30930Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
30940Sstevel@tonic-gate 			if (aio_func == NULL) {
30950Sstevel@tonic-gate 				prev_fp = NULL;
30960Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
30970Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
30980Sstevel@tonic-gate 				aio_notsupported++;
30990Sstevel@tonic-gate 				if (head) {
31000Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
31010Sstevel@tonic-gate 					head->lio_nent--;
31020Sstevel@tonic-gate 					head->lio_refcnt--;
31030Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
31040Sstevel@tonic-gate 				}
31050Sstevel@tonic-gate 				continue;
31060Sstevel@tonic-gate 			} else {
31070Sstevel@tonic-gate 				prev_fp = fp;
31080Sstevel@tonic-gate 				prev_mode = mode;
31090Sstevel@tonic-gate 			}
31100Sstevel@tonic-gate 		}
31111885Sraf 
31120Sstevel@tonic-gate #ifdef	_LP64
31130Sstevel@tonic-gate 		aiocb_LFton(aiocb, &aiocb_n);
31140Sstevel@tonic-gate 		error = aio_req_setup(&reqp, aiop, &aiocb_n,
3115*10719SRoger.Faulkner@Sun.COM 		    (aio_result_t *)&cbp->aio_resultp, vp, 0);
31160Sstevel@tonic-gate #else
31170Sstevel@tonic-gate 		error = aio_req_setupLF(&reqp, aiop, aiocb,
3118*10719SRoger.Faulkner@Sun.COM 		    (aio_result_t *)&cbp->aio_resultp, vp, 0);
31190Sstevel@tonic-gate #endif  /* _LP64 */
31200Sstevel@tonic-gate 		if (error) {
31210Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
31221885Sraf 			lio_set_uerror(&cbp->aio_resultp, error);
31230Sstevel@tonic-gate 			if (head) {
31240Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
31250Sstevel@tonic-gate 				head->lio_nent--;
31260Sstevel@tonic-gate 				head->lio_refcnt--;
31270Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
31280Sstevel@tonic-gate 			}
31290Sstevel@tonic-gate 			aio_errors++;
31300Sstevel@tonic-gate 			continue;
31310Sstevel@tonic-gate 		}
31320Sstevel@tonic-gate 
31330Sstevel@tonic-gate 		reqp->aio_req_lio = head;
31340Sstevel@tonic-gate 		deadhead = 0;
31350Sstevel@tonic-gate 
31360Sstevel@tonic-gate 		/*
31370Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
31380Sstevel@tonic-gate 		 * the driver to avoid a race condition
31390Sstevel@tonic-gate 		 */
31400Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
31410Sstevel@tonic-gate 		    EINPROGRESS);
31420Sstevel@tonic-gate 
31430Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = *ucbp;
31440Sstevel@tonic-gate 
31451885Sraf 		event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
31461885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
31471885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
31481885Sraf 		if (aio_port | aio_thread) {
31491885Sraf 			port_kevent_t *lpkevp;
31501885Sraf 			/*
31511885Sraf 			 * Prepare data to send with each aiocb completed.
31521885Sraf 			 */
31531885Sraf 			if (aio_port) {
31541885Sraf 				void *paddr = (void *)(uintptr_t)
31551885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
31561885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
31571885Sraf 					error = EFAULT;
31581885Sraf 			} else {	/* aio_thread */
31591885Sraf 				pnotify.portnfy_port =
31601885Sraf 				    aiocb->aio_sigevent.sigev_signo;
31611885Sraf 				pnotify.portnfy_user =
31621885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
31631885Sraf 			}
31641885Sraf 			if (error)
31651885Sraf 				/* EMPTY */;
31661885Sraf 			else if (pkevtp != NULL &&
31671885Sraf 			    pnotify.portnfy_port == lio_head_port)
31681885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
31691885Sraf 				    PORT_ALLOC_DEFAULT);
31701885Sraf 			else
31711885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
31721885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
31731885Sraf 				    &lpkevp);
31741885Sraf 			if (error == 0) {
31751885Sraf 				port_init_event(lpkevp, (uintptr_t)*ucbp,
31761885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
31771885Sraf 				    aio_port_callback, reqp);
31781885Sraf 				lpkevp->portkev_events = event;
31791885Sraf 				reqp->aio_req_portkev = lpkevp;
31801885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
31811885Sraf 			}
31820Sstevel@tonic-gate 		}
31830Sstevel@tonic-gate 
31840Sstevel@tonic-gate 		/*
31850Sstevel@tonic-gate 		 * send the request to driver.
31860Sstevel@tonic-gate 		 */
31870Sstevel@tonic-gate 		if (error == 0) {
31880Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
31890Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
31900Sstevel@tonic-gate 				aio_zerolen(reqp);
31910Sstevel@tonic-gate 				continue;
31920Sstevel@tonic-gate 			}
31930Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
31940Sstevel@tonic-gate 			    CRED());
31950Sstevel@tonic-gate 		}
31960Sstevel@tonic-gate 
31970Sstevel@tonic-gate 		/*
31980Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
31990Sstevel@tonic-gate 		 * completed unless there was an error.
32000Sstevel@tonic-gate 		 */
32010Sstevel@tonic-gate 		if (error) {
32020Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
32030Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
32040Sstevel@tonic-gate 			if (head) {
32050Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
32060Sstevel@tonic-gate 				head->lio_nent--;
32070Sstevel@tonic-gate 				head->lio_refcnt--;
32080Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32090Sstevel@tonic-gate 			}
32100Sstevel@tonic-gate 			if (error == ENOTSUP)
32110Sstevel@tonic-gate 				aio_notsupported++;
32120Sstevel@tonic-gate 			else
32130Sstevel@tonic-gate 				aio_errors++;
32144502Spraks 			lio_set_error(reqp, portused);
32150Sstevel@tonic-gate 		} else {
32160Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
32170Sstevel@tonic-gate 		}
32180Sstevel@tonic-gate 	}
32190Sstevel@tonic-gate 
32200Sstevel@tonic-gate 	if (aio_notsupported) {
32210Sstevel@tonic-gate 		error = ENOTSUP;
32220Sstevel@tonic-gate 	} else if (aio_errors) {
32230Sstevel@tonic-gate 		/*
32240Sstevel@tonic-gate 		 * return EIO if any request failed
32250Sstevel@tonic-gate 		 */
32260Sstevel@tonic-gate 		error = EIO;
32270Sstevel@tonic-gate 	}
32280Sstevel@tonic-gate 
32290Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
32300Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
32310Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
32320Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
32330Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32340Sstevel@tonic-gate 				error = EINTR;
32350Sstevel@tonic-gate 				goto done;
32360Sstevel@tonic-gate 			}
32370Sstevel@tonic-gate 		}
32380Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
32390Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
32400Sstevel@tonic-gate 	}
32410Sstevel@tonic-gate 
32420Sstevel@tonic-gate done:
32430Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
32440Sstevel@tonic-gate 	if (deadhead) {
32450Sstevel@tonic-gate 		if (head->lio_sigqp)
32460Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
32471885Sraf 		if (head->lio_portkev)
32481885Sraf 			port_free_event(head->lio_portkev);
32490Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
32500Sstevel@tonic-gate 	}
32510Sstevel@tonic-gate 	return (error);
32520Sstevel@tonic-gate }
32530Sstevel@tonic-gate 
32540Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
32550Sstevel@tonic-gate static void
aiocb_LFton(aiocb64_32_t * src,aiocb_t * dest)32560Sstevel@tonic-gate aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
32570Sstevel@tonic-gate {
32580Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
32590Sstevel@tonic-gate 	dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
32600Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
32610Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
32620Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
32630Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
32640Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
32650Sstevel@tonic-gate 
32660Sstevel@tonic-gate 	/*
32670Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
32680Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
32690Sstevel@tonic-gate 	 */
32700Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
32710Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
32720Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
32730Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
32740Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
32750Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
32760Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
32770Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
32780Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
32790Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
32800Sstevel@tonic-gate }
32810Sstevel@tonic-gate #endif
32820Sstevel@tonic-gate 
32830Sstevel@tonic-gate /*
32840Sstevel@tonic-gate  * This function is used only for largefile calls made by
32851885Sraf  * 32 bit applications.
32860Sstevel@tonic-gate  */
32870Sstevel@tonic-gate static int
aio_req_setupLF(aio_req_t ** reqpp,aio_t * aiop,aiocb64_32_t * arg,aio_result_t * resultp,vnode_t * vp,int old_solaris_req)32880Sstevel@tonic-gate aio_req_setupLF(
32890Sstevel@tonic-gate 	aio_req_t	**reqpp,
32900Sstevel@tonic-gate 	aio_t		*aiop,
32910Sstevel@tonic-gate 	aiocb64_32_t	*arg,
32920Sstevel@tonic-gate 	aio_result_t	*resultp,
3293*10719SRoger.Faulkner@Sun.COM 	vnode_t		*vp,
3294*10719SRoger.Faulkner@Sun.COM 	int		old_solaris_req)
32950Sstevel@tonic-gate {
32961885Sraf 	sigqueue_t	*sqp = NULL;
32970Sstevel@tonic-gate 	aio_req_t	*reqp;
32981885Sraf 	struct uio	*uio;
32991885Sraf 	struct sigevent32 *sigev;
33000Sstevel@tonic-gate 	int 		error;
33010Sstevel@tonic-gate 
33021885Sraf 	sigev = &arg->aio_sigevent;
33031885Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
33041885Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
33050Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
33060Sstevel@tonic-gate 		if (sqp == NULL)
33070Sstevel@tonic-gate 			return (EAGAIN);
33080Sstevel@tonic-gate 		sqp->sq_func = NULL;
33090Sstevel@tonic-gate 		sqp->sq_next = NULL;
33100Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
33110Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
33120Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
33130Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
33140Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
33150Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
33161885Sraf 		sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
33171885Sraf 	}
33180Sstevel@tonic-gate 
33190Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
33200Sstevel@tonic-gate 
33210Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
33220Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33230Sstevel@tonic-gate 		if (sqp)
33240Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33250Sstevel@tonic-gate 		return (EIO);
33260Sstevel@tonic-gate 	}
33270Sstevel@tonic-gate 	/*
33280Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
33290Sstevel@tonic-gate 	 * from dynamic memory.
33300Sstevel@tonic-gate 	 */
33310Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
33320Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33330Sstevel@tonic-gate 		if (sqp)
33340Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33350Sstevel@tonic-gate 		return (error);
33360Sstevel@tonic-gate 	}
33370Sstevel@tonic-gate 	aiop->aio_pending++;
33380Sstevel@tonic-gate 	aiop->aio_outstanding++;
33390Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
3340*10719SRoger.Faulkner@Sun.COM 	if (old_solaris_req) {
3341*10719SRoger.Faulkner@Sun.COM 		/* this is an old solaris aio request */
3342*10719SRoger.Faulkner@Sun.COM 		reqp->aio_req_flags |= AIO_SOLARIS;
3343*10719SRoger.Faulkner@Sun.COM 		aiop->aio_flags |= AIO_SOLARIS_REQ;
3344*10719SRoger.Faulkner@Sun.COM 	}
33451885Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
33461885Sraf 	    sigev->sigev_notify == SIGEV_PORT)
33471885Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
33480Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
33490Sstevel@tonic-gate 	/*
33500Sstevel@tonic-gate 	 * initialize aio request.
33510Sstevel@tonic-gate 	 */
33520Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
33530Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
33540Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
33551885Sraf 	reqp->aio_req_lio = NULL;
33560Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
33570Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
33580Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
33590Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
33600Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
33610Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
33620Sstevel@tonic-gate 	*reqpp = reqp;
33630Sstevel@tonic-gate 	return (0);
33640Sstevel@tonic-gate }
33650Sstevel@tonic-gate 
33660Sstevel@tonic-gate /*
33670Sstevel@tonic-gate  * This routine is called when a non largefile call is made by a 32bit
33680Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel.
33690Sstevel@tonic-gate  */
33700Sstevel@tonic-gate static int
alio32(int mode_arg,void * aiocb_arg,int nent,void * sigev)33710Sstevel@tonic-gate alio32(
33720Sstevel@tonic-gate 	int		mode_arg,
33730Sstevel@tonic-gate 	void		*aiocb_arg,
33740Sstevel@tonic-gate 	int		nent,
33751885Sraf 	void		*sigev)
33760Sstevel@tonic-gate {
33770Sstevel@tonic-gate 	file_t		*fp;
33780Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
33790Sstevel@tonic-gate 	int		prev_mode = -1;
33800Sstevel@tonic-gate 	struct vnode	*vp;
33810Sstevel@tonic-gate 	aio_lio_t	*head;
33820Sstevel@tonic-gate 	aio_req_t	*reqp;
33830Sstevel@tonic-gate 	aio_t		*aiop;
33841885Sraf 	caddr_t		cbplist;
33850Sstevel@tonic-gate 	aiocb_t		cb;
33860Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
33870Sstevel@tonic-gate #ifdef	_LP64
33880Sstevel@tonic-gate 	aiocb32_t	*cbp;
33890Sstevel@tonic-gate 	caddr32_t	*ucbp;
33900Sstevel@tonic-gate 	aiocb32_t	cb32;
33910Sstevel@tonic-gate 	aiocb32_t	*aiocb32 = &cb32;
33921885Sraf 	struct sigevent32	sigevk;
33930Sstevel@tonic-gate #else
33940Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
33951885Sraf 	struct sigevent	sigevk;
33960Sstevel@tonic-gate #endif
33970Sstevel@tonic-gate 	sigqueue_t	*sqp;
33980Sstevel@tonic-gate 	int		(*aio_func)();
33990Sstevel@tonic-gate 	int		mode;
34001885Sraf 	int		error = 0;
34011885Sraf 	int		aio_errors = 0;
34020Sstevel@tonic-gate 	int		i;
34030Sstevel@tonic-gate 	size_t		ssize;
34040Sstevel@tonic-gate 	int		deadhead = 0;
34050Sstevel@tonic-gate 	int		aio_notsupported = 0;
34061885Sraf 	int		lio_head_port;
34071885Sraf 	int		aio_port;
34081885Sraf 	int		aio_thread;
34090Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
34104502Spraks 	int		portused = 0;
34110Sstevel@tonic-gate #ifdef	_LP64
34120Sstevel@tonic-gate 	port_notify32_t	pnotify;
34130Sstevel@tonic-gate #else
34140Sstevel@tonic-gate 	port_notify_t	pnotify;
34150Sstevel@tonic-gate #endif
34161885Sraf 	int		event;
34171885Sraf 
34180Sstevel@tonic-gate 	aiop = curproc->p_aio;
34190Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
34200Sstevel@tonic-gate 		return (EINVAL);
34210Sstevel@tonic-gate 
34220Sstevel@tonic-gate #ifdef	_LP64
34230Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
34240Sstevel@tonic-gate #else
34250Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
34260Sstevel@tonic-gate #endif
34270Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
34280Sstevel@tonic-gate 	ucbp = (void *)cbplist;
34290Sstevel@tonic-gate 
34301885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
34311885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
34320Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
34330Sstevel@tonic-gate 		return (EFAULT);
34340Sstevel@tonic-gate 	}
34350Sstevel@tonic-gate 
34361885Sraf 	/* Event Ports  */
34371885Sraf 	if (sigev &&
34381885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
34391885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
34401885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
34411885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
34421885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
34431885Sraf 		} else if (copyin(
34441885Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
34451885Sraf 		    &pnotify, sizeof (pnotify))) {
34460Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
34470Sstevel@tonic-gate 			return (EFAULT);
34480Sstevel@tonic-gate 		}
34491885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
34501885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
34511885Sraf 		if (error) {
34521885Sraf 			if (error == ENOMEM || error == EAGAIN)
34531885Sraf 				error = EAGAIN;
34541885Sraf 			else
34551885Sraf 				error = EINVAL;
34561885Sraf 			kmem_free(cbplist, ssize);
34571885Sraf 			return (error);
34581885Sraf 		}
34591885Sraf 		lio_head_port = pnotify.portnfy_port;
34604502Spraks 		portused = 1;
34610Sstevel@tonic-gate 	}
34620Sstevel@tonic-gate 
34630Sstevel@tonic-gate 	/*
34640Sstevel@tonic-gate 	 * a list head should be allocated if notification is
34650Sstevel@tonic-gate 	 * enabled for this list.
34660Sstevel@tonic-gate 	 */
34670Sstevel@tonic-gate 	head = NULL;
34680Sstevel@tonic-gate 
34691885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
34700Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
34710Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
34720Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
34730Sstevel@tonic-gate 		if (error)
34740Sstevel@tonic-gate 			goto done;
34750Sstevel@tonic-gate 		deadhead = 1;
34760Sstevel@tonic-gate 		head->lio_nent = nent;
34770Sstevel@tonic-gate 		head->lio_refcnt = nent;
34781885Sraf 		head->lio_port = -1;
34791885Sraf 		head->lio_portkev = NULL;
34801885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
34811885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
34820Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
34830Sstevel@tonic-gate 			if (sqp == NULL) {
34840Sstevel@tonic-gate 				error = EAGAIN;
34850Sstevel@tonic-gate 				goto done;
34860Sstevel@tonic-gate 			}
34870Sstevel@tonic-gate 			sqp->sq_func = NULL;
34880Sstevel@tonic-gate 			sqp->sq_next = NULL;
34890Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
34900Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
34910Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
34920Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
34930Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
34941885Sraf 			sqp->sq_info.si_signo = sigevk.sigev_signo;
34950Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
34961885Sraf 			    sigevk.sigev_value.sival_int;
34970Sstevel@tonic-gate 			head->lio_sigqp = sqp;
34980Sstevel@tonic-gate 		} else {
34990Sstevel@tonic-gate 			head->lio_sigqp = NULL;
35000Sstevel@tonic-gate 		}
35011885Sraf 		if (pkevtp) {
35021885Sraf 			/*
35031885Sraf 			 * Prepare data to send when list of aiocb's has
35041885Sraf 			 * completed.
35051885Sraf 			 */
35061885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
35071885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
35081885Sraf 			    NULL, head);
35091885Sraf 			pkevtp->portkev_events = AIOLIO;
35101885Sraf 			head->lio_portkev = pkevtp;
35111885Sraf 			head->lio_port = pnotify.portnfy_port;
35121885Sraf 		}
35130Sstevel@tonic-gate 	}
35140Sstevel@tonic-gate 
35150Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
35160Sstevel@tonic-gate 
35170Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
35180Sstevel@tonic-gate #ifdef	_LP64
35190Sstevel@tonic-gate 		cbp = (aiocb32_t *)(uintptr_t)*ucbp;
35201885Sraf 		if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
35210Sstevel@tonic-gate #else
35220Sstevel@tonic-gate 		cbp = (aiocb_t *)*ucbp;
35231885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
35240Sstevel@tonic-gate #endif
35251885Sraf 		{
35260Sstevel@tonic-gate 			if (head) {
35270Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35280Sstevel@tonic-gate 				head->lio_nent--;
35290Sstevel@tonic-gate 				head->lio_refcnt--;
35300Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35310Sstevel@tonic-gate 			}
35320Sstevel@tonic-gate 			continue;
35330Sstevel@tonic-gate 		}
35340Sstevel@tonic-gate #ifdef	_LP64
35350Sstevel@tonic-gate 		/*
35360Sstevel@tonic-gate 		 * copy 32 bit structure into 64 bit structure
35370Sstevel@tonic-gate 		 */
35380Sstevel@tonic-gate 		aiocb_32ton(aiocb32, aiocb);
35390Sstevel@tonic-gate #endif /* _LP64 */
35400Sstevel@tonic-gate 
35410Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
35420Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
35430Sstevel@tonic-gate 		if (mode == LIO_NOP) {
35440Sstevel@tonic-gate 			cbp = NULL;
35450Sstevel@tonic-gate 			if (head) {
35460Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35470Sstevel@tonic-gate 				head->lio_nent--;
35480Sstevel@tonic-gate 				head->lio_refcnt--;
35490Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35500Sstevel@tonic-gate 			}
35510Sstevel@tonic-gate 			continue;
35520Sstevel@tonic-gate 		}
35530Sstevel@tonic-gate 
35540Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
35550Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
35560Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35570Sstevel@tonic-gate 			if (head) {
35580Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35590Sstevel@tonic-gate 				head->lio_nent--;
35600Sstevel@tonic-gate 				head->lio_refcnt--;
35610Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35620Sstevel@tonic-gate 			}
35630Sstevel@tonic-gate 			aio_errors++;
35640Sstevel@tonic-gate 			continue;
35650Sstevel@tonic-gate 		}
35660Sstevel@tonic-gate 
35670Sstevel@tonic-gate 		/*
35680Sstevel@tonic-gate 		 * check the permission of the partition
35690Sstevel@tonic-gate 		 */
35700Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
35710Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
35720Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35730Sstevel@tonic-gate 			if (head) {
35740Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35750Sstevel@tonic-gate 				head->lio_nent--;
35760Sstevel@tonic-gate 				head->lio_refcnt--;
35770Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35780Sstevel@tonic-gate 			}
35790Sstevel@tonic-gate 			aio_errors++;
35800Sstevel@tonic-gate 			continue;
35810Sstevel@tonic-gate 		}
35820Sstevel@tonic-gate 
35830Sstevel@tonic-gate 		/*
35840Sstevel@tonic-gate 		 * common case where requests are to the same fd
35850Sstevel@tonic-gate 		 * for the same r/w operation
35860Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
35870Sstevel@tonic-gate 		 */
35881885Sraf 		vp = fp->f_vnode;
35891885Sraf 		if (fp != prev_fp || mode != prev_mode) {
35900Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
35910Sstevel@tonic-gate 			if (aio_func == NULL) {
35920Sstevel@tonic-gate 				prev_fp = NULL;
35930Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
35941885Sraf 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
35950Sstevel@tonic-gate 				aio_notsupported++;
35960Sstevel@tonic-gate 				if (head) {
35970Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
35980Sstevel@tonic-gate 					head->lio_nent--;
35990Sstevel@tonic-gate 					head->lio_refcnt--;
36000Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
36010Sstevel@tonic-gate 				}
36020Sstevel@tonic-gate 				continue;
36030Sstevel@tonic-gate 			} else {
36040Sstevel@tonic-gate 				prev_fp = fp;
36050Sstevel@tonic-gate 				prev_mode = mode;
36060Sstevel@tonic-gate 			}
36070Sstevel@tonic-gate 		}
36081885Sraf 
36091885Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
3610*10719SRoger.Faulkner@Sun.COM 		    (aio_result_t *)&cbp->aio_resultp, vp, 0);
36111885Sraf 		if (error) {
36120Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
36130Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
36140Sstevel@tonic-gate 			if (head) {
36150Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
36160Sstevel@tonic-gate 				head->lio_nent--;
36170Sstevel@tonic-gate 				head->lio_refcnt--;
36180Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
36190Sstevel@tonic-gate 			}
36200Sstevel@tonic-gate 			aio_errors++;
36210Sstevel@tonic-gate 			continue;
36220Sstevel@tonic-gate 		}
36230Sstevel@tonic-gate 
36240Sstevel@tonic-gate 		reqp->aio_req_lio = head;
36250Sstevel@tonic-gate 		deadhead = 0;
36260Sstevel@tonic-gate 
36270Sstevel@tonic-gate 		/*
36280Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
36290Sstevel@tonic-gate 		 * the driver to avoid a race condition
36300Sstevel@tonic-gate 		 */
36310Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
36320Sstevel@tonic-gate 		    EINPROGRESS);
36330Sstevel@tonic-gate 
36341885Sraf 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
36351885Sraf 
36361885Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
36371885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
36381885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
36391885Sraf 		if (aio_port | aio_thread) {
36401885Sraf 			port_kevent_t *lpkevp;
36411885Sraf 			/*
36421885Sraf 			 * Prepare data to send with each aiocb completed.
36431885Sraf 			 */
36440Sstevel@tonic-gate #ifdef _LP64
36451885Sraf 			if (aio_port) {
36461885Sraf 				void *paddr = (void  *)(uintptr_t)
36471885Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
36481885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
36491885Sraf 					error = EFAULT;
36501885Sraf 			} else {	/* aio_thread */
36511885Sraf 				pnotify.portnfy_port =
36521885Sraf 				    aiocb32->aio_sigevent.sigev_signo;
36531885Sraf 				pnotify.portnfy_user =
36541885Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
36551885Sraf 			}
36560Sstevel@tonic-gate #else
36571885Sraf 			if (aio_port) {
36581885Sraf 				void *paddr =
36591885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
36601885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
36611885Sraf 					error = EFAULT;
36621885Sraf 			} else {	/* aio_thread */
36631885Sraf 				pnotify.portnfy_port =
36641885Sraf 				    aiocb->aio_sigevent.sigev_signo;
36651885Sraf 				pnotify.portnfy_user =
36661885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
36671885Sraf 			}
36680Sstevel@tonic-gate #endif
36691885Sraf 			if (error)
36701885Sraf 				/* EMPTY */;
36711885Sraf 			else if (pkevtp != NULL &&
36721885Sraf 			    pnotify.portnfy_port == lio_head_port)
36731885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
36741885Sraf 				    PORT_ALLOC_DEFAULT);
36751885Sraf 			else
36761885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
36771885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
36781885Sraf 				    &lpkevp);
36791885Sraf 			if (error == 0) {
36801885Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
36811885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
36821885Sraf 				    aio_port_callback, reqp);
36831885Sraf 				lpkevp->portkev_events = event;
36841885Sraf 				reqp->aio_req_portkev = lpkevp;
36851885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
36861885Sraf 			}
36870Sstevel@tonic-gate 		}
36880Sstevel@tonic-gate 
36890Sstevel@tonic-gate 		/*
36900Sstevel@tonic-gate 		 * send the request to driver.
36910Sstevel@tonic-gate 		 */
36920Sstevel@tonic-gate 		if (error == 0) {
36930Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
36940Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
36950Sstevel@tonic-gate 				aio_zerolen(reqp);
36960Sstevel@tonic-gate 				continue;
36970Sstevel@tonic-gate 			}
36980Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
36990Sstevel@tonic-gate 			    CRED());
37000Sstevel@tonic-gate 		}
37010Sstevel@tonic-gate 
37020Sstevel@tonic-gate 		/*
37030Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
37040Sstevel@tonic-gate 		 * completed unless there was an error.
37050Sstevel@tonic-gate 		 */
37060Sstevel@tonic-gate 		if (error) {
37070Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
37080Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
37090Sstevel@tonic-gate 			if (head) {
37100Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
37110Sstevel@tonic-gate 				head->lio_nent--;
37120Sstevel@tonic-gate 				head->lio_refcnt--;
37130Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37140Sstevel@tonic-gate 			}
37150Sstevel@tonic-gate 			if (error == ENOTSUP)
37160Sstevel@tonic-gate 				aio_notsupported++;
37170Sstevel@tonic-gate 			else
37180Sstevel@tonic-gate 				aio_errors++;
37194502Spraks 			lio_set_error(reqp, portused);
37200Sstevel@tonic-gate 		} else {
37210Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
37220Sstevel@tonic-gate 		}
37230Sstevel@tonic-gate 	}
37240Sstevel@tonic-gate 
37250Sstevel@tonic-gate 	if (aio_notsupported) {
37260Sstevel@tonic-gate 		error = ENOTSUP;
37270Sstevel@tonic-gate 	} else if (aio_errors) {
37280Sstevel@tonic-gate 		/*
37290Sstevel@tonic-gate 		 * return EIO if any request failed
37300Sstevel@tonic-gate 		 */
37310Sstevel@tonic-gate 		error = EIO;
37320Sstevel@tonic-gate 	}
37330Sstevel@tonic-gate 
37340Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
37350Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
37360Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
37370Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
37380Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37390Sstevel@tonic-gate 				error = EINTR;
37400Sstevel@tonic-gate 				goto done;
37410Sstevel@tonic-gate 			}
37420Sstevel@tonic-gate 		}
37430Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
37440Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
37450Sstevel@tonic-gate 	}
37460Sstevel@tonic-gate 
37470Sstevel@tonic-gate done:
37480Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
37490Sstevel@tonic-gate 	if (deadhead) {
37500Sstevel@tonic-gate 		if (head->lio_sigqp)
37510Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
37521885Sraf 		if (head->lio_portkev)
37531885Sraf 			port_free_event(head->lio_portkev);
37540Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
37550Sstevel@tonic-gate 	}
37560Sstevel@tonic-gate 	return (error);
37570Sstevel@tonic-gate }
37580Sstevel@tonic-gate 
37590Sstevel@tonic-gate 
37600Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
37610Sstevel@tonic-gate void
aiocb_32ton(aiocb32_t * src,aiocb_t * dest)37620Sstevel@tonic-gate aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
37630Sstevel@tonic-gate {
37640Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
37650Sstevel@tonic-gate 	dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
37660Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
37670Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
37680Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
37690Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
37700Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
37710Sstevel@tonic-gate 
37720Sstevel@tonic-gate 	/*
37730Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
37740Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
37750Sstevel@tonic-gate 	 */
37760Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
37770Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
37780Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
37790Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
37800Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
37810Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
37820Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
37830Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
37840Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
37850Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
37860Sstevel@tonic-gate }
37870Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
37880Sstevel@tonic-gate 
37890Sstevel@tonic-gate /*
37900Sstevel@tonic-gate  * aio_port_callback() is called just before the event is retrieved from the
37910Sstevel@tonic-gate  * port. The task of this callback function is to finish the work of the
37920Sstevel@tonic-gate  * transaction for the application, it means :
37930Sstevel@tonic-gate  * - copyout transaction data to the application
37940Sstevel@tonic-gate  *	(this thread is running in the right process context)
37950Sstevel@tonic-gate  * - keep trace of the transaction (update of counters).
37960Sstevel@tonic-gate  * - free allocated buffers
37970Sstevel@tonic-gate  * The aiocb pointer is the object element of the port_kevent_t structure.
37980Sstevel@tonic-gate  *
37990Sstevel@tonic-gate  * flag :
38000Sstevel@tonic-gate  *	PORT_CALLBACK_DEFAULT : do copyout and free resources
38010Sstevel@tonic-gate  *	PORT_CALLBACK_CLOSE   : don't do copyout, free resources
38020Sstevel@tonic-gate  */
38030Sstevel@tonic-gate 
38040Sstevel@tonic-gate /*ARGSUSED*/
38050Sstevel@tonic-gate int
aio_port_callback(void * arg,int * events,pid_t pid,int flag,void * evp)38060Sstevel@tonic-gate aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
38070Sstevel@tonic-gate {
38080Sstevel@tonic-gate 	aio_t		*aiop = curproc->p_aio;
38090Sstevel@tonic-gate 	aio_req_t	*reqp = arg;
38100Sstevel@tonic-gate 	struct	iovec	*iov;
38110Sstevel@tonic-gate 	struct	buf	*bp;
38120Sstevel@tonic-gate 	void		*resultp;
38130Sstevel@tonic-gate 
38140Sstevel@tonic-gate 	if (pid != curproc->p_pid) {
38150Sstevel@tonic-gate 		/* wrong proc !!, can not deliver data here ... */
38160Sstevel@tonic-gate 		return (EACCES);
38170Sstevel@tonic-gate 	}
38180Sstevel@tonic-gate 
38190Sstevel@tonic-gate 	mutex_enter(&aiop->aio_portq_mutex);
38200Sstevel@tonic-gate 	reqp->aio_req_portkev = NULL;
38210Sstevel@tonic-gate 	aio_req_remove_portq(aiop, reqp); /* remove request from portq */
38220Sstevel@tonic-gate 	mutex_exit(&aiop->aio_portq_mutex);
38230Sstevel@tonic-gate 	aphysio_unlock(reqp);		/* unlock used pages */
38240Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
38250Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
38260Sstevel@tonic-gate 		aio_req_free_port(aiop, reqp);	/* back to free list */
38270Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
38280Sstevel@tonic-gate 		return (0);
38290Sstevel@tonic-gate 	}
38300Sstevel@tonic-gate 
38310Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
38320Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
38330Sstevel@tonic-gate 	resultp = (void *)reqp->aio_req_resultp;
38340Sstevel@tonic-gate 	aio_req_free_port(aiop, reqp);	/* request struct back to free list */
38350Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
38360Sstevel@tonic-gate 	if (flag == PORT_CALLBACK_DEFAULT)
38370Sstevel@tonic-gate 		aio_copyout_result_port(iov, bp, resultp);
38380Sstevel@tonic-gate 	return (0);
38390Sstevel@tonic-gate }
3840