xref: /onnv-gate/usr/src/uts/common/os/aio.c (revision 4532:ee67f29bff5d)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51885Sraf  * Common Development and Distribution License (the "License").
61885Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211885Sraf 
220Sstevel@tonic-gate /*
234377Sraf  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * Kernel asynchronous I/O.
310Sstevel@tonic-gate  * This is only for raw devices now (as of Nov. 1993).
320Sstevel@tonic-gate  */
330Sstevel@tonic-gate 
340Sstevel@tonic-gate #include <sys/types.h>
350Sstevel@tonic-gate #include <sys/errno.h>
360Sstevel@tonic-gate #include <sys/conf.h>
370Sstevel@tonic-gate #include <sys/file.h>
380Sstevel@tonic-gate #include <sys/fs/snode.h>
390Sstevel@tonic-gate #include <sys/unistd.h>
400Sstevel@tonic-gate #include <sys/cmn_err.h>
410Sstevel@tonic-gate #include <vm/as.h>
420Sstevel@tonic-gate #include <vm/faultcode.h>
430Sstevel@tonic-gate #include <sys/sysmacros.h>
440Sstevel@tonic-gate #include <sys/procfs.h>
450Sstevel@tonic-gate #include <sys/kmem.h>
460Sstevel@tonic-gate #include <sys/autoconf.h>
470Sstevel@tonic-gate #include <sys/ddi_impldefs.h>
480Sstevel@tonic-gate #include <sys/sunddi.h>
490Sstevel@tonic-gate #include <sys/aio_impl.h>
500Sstevel@tonic-gate #include <sys/debug.h>
510Sstevel@tonic-gate #include <sys/param.h>
520Sstevel@tonic-gate #include <sys/systm.h>
530Sstevel@tonic-gate #include <sys/vmsystm.h>
540Sstevel@tonic-gate #include <sys/fs/pxfs_ki.h>
550Sstevel@tonic-gate #include <sys/contract/process_impl.h>
560Sstevel@tonic-gate 
570Sstevel@tonic-gate /*
580Sstevel@tonic-gate  * external entry point.
590Sstevel@tonic-gate  */
600Sstevel@tonic-gate #ifdef _LP64
610Sstevel@tonic-gate static int64_t kaioc(long, long, long, long, long, long);
620Sstevel@tonic-gate #endif
630Sstevel@tonic-gate static int kaio(ulong_t *, rval_t *);
640Sstevel@tonic-gate 
650Sstevel@tonic-gate 
660Sstevel@tonic-gate #define	AIO_64	0
670Sstevel@tonic-gate #define	AIO_32	1
680Sstevel@tonic-gate #define	AIO_LARGEFILE	2
690Sstevel@tonic-gate 
700Sstevel@tonic-gate /*
710Sstevel@tonic-gate  * implementation specific functions (private)
720Sstevel@tonic-gate  */
730Sstevel@tonic-gate #ifdef _LP64
741885Sraf static int alio(int, aiocb_t **, int, struct sigevent *);
750Sstevel@tonic-gate #endif
760Sstevel@tonic-gate static int aionotify(void);
770Sstevel@tonic-gate static int aioinit(void);
780Sstevel@tonic-gate static int aiostart(void);
790Sstevel@tonic-gate static void alio_cleanup(aio_t *, aiocb_t **, int, int);
800Sstevel@tonic-gate static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
810Sstevel@tonic-gate     cred_t *);
824502Spraks static void lio_set_error(aio_req_t *, int portused);
830Sstevel@tonic-gate static aio_t *aio_aiop_alloc();
840Sstevel@tonic-gate static int aio_req_alloc(aio_req_t **, aio_result_t *);
850Sstevel@tonic-gate static int aio_lio_alloc(aio_lio_t **);
860Sstevel@tonic-gate static aio_req_t *aio_req_done(void *);
870Sstevel@tonic-gate static aio_req_t *aio_req_remove(aio_req_t *);
880Sstevel@tonic-gate static int aio_req_find(aio_result_t *, aio_req_t **);
890Sstevel@tonic-gate static int aio_hash_insert(struct aio_req_t *, aio_t *);
900Sstevel@tonic-gate static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
911885Sraf     aio_result_t *, vnode_t *);
920Sstevel@tonic-gate static int aio_cleanup_thread(aio_t *);
930Sstevel@tonic-gate static aio_lio_t *aio_list_get(aio_result_t *);
940Sstevel@tonic-gate static void lio_set_uerror(void *, int);
950Sstevel@tonic-gate extern void aio_zerolen(aio_req_t *);
960Sstevel@tonic-gate static int aiowait(struct timeval *, int, long	*);
970Sstevel@tonic-gate static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
980Sstevel@tonic-gate static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
990Sstevel@tonic-gate     aio_req_t *reqlist, aio_t *aiop, model_t model);
1000Sstevel@tonic-gate static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
1010Sstevel@tonic-gate static int aiosuspend(void *, int, struct  timespec *, int,
1020Sstevel@tonic-gate     long	*, int);
1030Sstevel@tonic-gate static int aliowait(int, void *, int, void *, int);
1040Sstevel@tonic-gate static int aioerror(void *, int);
1050Sstevel@tonic-gate static int aio_cancel(int, void *, long	*, int);
1060Sstevel@tonic-gate static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
1070Sstevel@tonic-gate static int aiorw(int, void *, int, int);
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate static int alioLF(int, void *, int, void *);
1101885Sraf static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
1111885Sraf     aio_result_t *, vnode_t *);
1120Sstevel@tonic-gate static int alio32(int, void *, int, void *);
1130Sstevel@tonic-gate static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1140Sstevel@tonic-gate static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1170Sstevel@tonic-gate static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
1180Sstevel@tonic-gate void	aiocb_32ton(aiocb32_t *, aiocb_t *);
1190Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
1200Sstevel@tonic-gate 
1210Sstevel@tonic-gate /*
1220Sstevel@tonic-gate  * implementation specific functions (external)
1230Sstevel@tonic-gate  */
1240Sstevel@tonic-gate void aio_req_free(aio_t *, aio_req_t *);
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate /*
1270Sstevel@tonic-gate  * Event Port framework
1280Sstevel@tonic-gate  */
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate void aio_req_free_port(aio_t *, aio_req_t *);
1310Sstevel@tonic-gate static int aio_port_callback(void *, int *, pid_t, int, void *);
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate /*
1340Sstevel@tonic-gate  * This is the loadable module wrapper.
1350Sstevel@tonic-gate  */
1360Sstevel@tonic-gate #include <sys/modctl.h>
1370Sstevel@tonic-gate #include <sys/syscall.h>
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate #ifdef _LP64
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate static struct sysent kaio_sysent = {
1420Sstevel@tonic-gate 	6,
1430Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
1440Sstevel@tonic-gate 	(int (*)())kaioc
1450Sstevel@tonic-gate };
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1480Sstevel@tonic-gate static struct sysent kaio_sysent32 = {
1490Sstevel@tonic-gate 	7,
1500Sstevel@tonic-gate 	SE_NOUNLOAD | SE_64RVAL,
1510Sstevel@tonic-gate 	kaio
1520Sstevel@tonic-gate };
1530Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate #else   /* _LP64 */
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate static struct sysent kaio_sysent = {
1580Sstevel@tonic-gate 	7,
1590Sstevel@tonic-gate 	SE_NOUNLOAD | SE_32RVAL1,
1600Sstevel@tonic-gate 	kaio
1610Sstevel@tonic-gate };
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate #endif  /* _LP64 */
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate /*
1660Sstevel@tonic-gate  * Module linkage information for the kernel.
1670Sstevel@tonic-gate  */
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate static struct modlsys modlsys = {
1700Sstevel@tonic-gate 	&mod_syscallops,
1710Sstevel@tonic-gate 	"kernel Async I/O",
1720Sstevel@tonic-gate 	&kaio_sysent
1730Sstevel@tonic-gate };
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1760Sstevel@tonic-gate static struct modlsys modlsys32 = {
1770Sstevel@tonic-gate 	&mod_syscallops32,
1780Sstevel@tonic-gate 	"kernel Async I/O for 32 bit compatibility",
1790Sstevel@tonic-gate 	&kaio_sysent32
1800Sstevel@tonic-gate };
1810Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate static struct modlinkage modlinkage = {
1850Sstevel@tonic-gate 	MODREV_1,
1860Sstevel@tonic-gate 	&modlsys,
1870Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
1880Sstevel@tonic-gate 	&modlsys32,
1890Sstevel@tonic-gate #endif
1900Sstevel@tonic-gate 	NULL
1910Sstevel@tonic-gate };
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate int
1940Sstevel@tonic-gate _init(void)
1950Sstevel@tonic-gate {
1960Sstevel@tonic-gate 	int retval;
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate 	if ((retval = mod_install(&modlinkage)) != 0)
1990Sstevel@tonic-gate 		return (retval);
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 	return (0);
2020Sstevel@tonic-gate }
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate int
2050Sstevel@tonic-gate _fini(void)
2060Sstevel@tonic-gate {
2070Sstevel@tonic-gate 	int retval;
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate 	retval = mod_remove(&modlinkage);
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate 	return (retval);
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate int
2150Sstevel@tonic-gate _info(struct modinfo *modinfop)
2160Sstevel@tonic-gate {
2170Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
2180Sstevel@tonic-gate }
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate #ifdef	_LP64
2210Sstevel@tonic-gate static int64_t
2220Sstevel@tonic-gate kaioc(
2230Sstevel@tonic-gate 	long	a0,
2240Sstevel@tonic-gate 	long	a1,
2250Sstevel@tonic-gate 	long	a2,
2260Sstevel@tonic-gate 	long	a3,
2270Sstevel@tonic-gate 	long	a4,
2280Sstevel@tonic-gate 	long	a5)
2290Sstevel@tonic-gate {
2300Sstevel@tonic-gate 	int	error;
2310Sstevel@tonic-gate 	long	rval = 0;
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	switch ((int)a0 & ~AIO_POLL_BIT) {
2340Sstevel@tonic-gate 	case AIOREAD:
2350Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2360Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FREAD);
2370Sstevel@tonic-gate 		break;
2380Sstevel@tonic-gate 	case AIOWRITE:
2390Sstevel@tonic-gate 		error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2400Sstevel@tonic-gate 		    (offset_t)a4, (aio_result_t *)a5, FWRITE);
2410Sstevel@tonic-gate 		break;
2420Sstevel@tonic-gate 	case AIOWAIT:
2430Sstevel@tonic-gate 		error = aiowait((struct timeval *)a1, (int)a2, &rval);
2440Sstevel@tonic-gate 		break;
2450Sstevel@tonic-gate 	case AIOWAITN:
2460Sstevel@tonic-gate 		error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
2470Sstevel@tonic-gate 		    (timespec_t *)a4);
2480Sstevel@tonic-gate 		break;
2490Sstevel@tonic-gate 	case AIONOTIFY:
2500Sstevel@tonic-gate 		error = aionotify();
2510Sstevel@tonic-gate 		break;
2520Sstevel@tonic-gate 	case AIOINIT:
2530Sstevel@tonic-gate 		error = aioinit();
2540Sstevel@tonic-gate 		break;
2550Sstevel@tonic-gate 	case AIOSTART:
2560Sstevel@tonic-gate 		error = aiostart();
2570Sstevel@tonic-gate 		break;
2580Sstevel@tonic-gate 	case AIOLIO:
2591885Sraf 		error = alio((int)a1, (aiocb_t **)a2, (int)a3,
2600Sstevel@tonic-gate 		    (struct sigevent *)a4);
2610Sstevel@tonic-gate 		break;
2620Sstevel@tonic-gate 	case AIOLIOWAIT:
2630Sstevel@tonic-gate 		error = aliowait((int)a1, (void *)a2, (int)a3,
2640Sstevel@tonic-gate 		    (struct sigevent *)a4, AIO_64);
2650Sstevel@tonic-gate 		break;
2660Sstevel@tonic-gate 	case AIOSUSPEND:
2670Sstevel@tonic-gate 		error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
2680Sstevel@tonic-gate 		    (int)a4, &rval, AIO_64);
2690Sstevel@tonic-gate 		break;
2700Sstevel@tonic-gate 	case AIOERROR:
2710Sstevel@tonic-gate 		error = aioerror((void *)a1, AIO_64);
2720Sstevel@tonic-gate 		break;
2730Sstevel@tonic-gate 	case AIOAREAD:
2740Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
2750Sstevel@tonic-gate 		break;
2760Sstevel@tonic-gate 	case AIOAWRITE:
2770Sstevel@tonic-gate 		error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
2780Sstevel@tonic-gate 		break;
2790Sstevel@tonic-gate 	case AIOCANCEL:
2800Sstevel@tonic-gate 		error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
2810Sstevel@tonic-gate 		break;
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 	/*
2840Sstevel@tonic-gate 	 * The large file related stuff is valid only for
2850Sstevel@tonic-gate 	 * 32 bit kernel and not for 64 bit kernel
2860Sstevel@tonic-gate 	 * On 64 bit kernel we convert large file calls
2870Sstevel@tonic-gate 	 * to regular 64bit calls.
2880Sstevel@tonic-gate 	 */
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate 	default:
2910Sstevel@tonic-gate 		error = EINVAL;
2920Sstevel@tonic-gate 	}
2930Sstevel@tonic-gate 	if (error)
2940Sstevel@tonic-gate 		return ((int64_t)set_errno(error));
2950Sstevel@tonic-gate 	return (rval);
2960Sstevel@tonic-gate }
2970Sstevel@tonic-gate #endif
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate static int
3000Sstevel@tonic-gate kaio(
3010Sstevel@tonic-gate 	ulong_t *uap,
3020Sstevel@tonic-gate 	rval_t *rvp)
3030Sstevel@tonic-gate {
3040Sstevel@tonic-gate 	long rval = 0;
3050Sstevel@tonic-gate 	int	error = 0;
3060Sstevel@tonic-gate 	offset_t	off;
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate 		rvp->r_vals = 0;
3100Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
3110Sstevel@tonic-gate 	off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
3120Sstevel@tonic-gate #else
3130Sstevel@tonic-gate 	off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
3140Sstevel@tonic-gate #endif
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	switch (uap[0] & ~AIO_POLL_BIT) {
3170Sstevel@tonic-gate 	/*
3180Sstevel@tonic-gate 	 * It must be the 32 bit system call on 64 bit kernel
3190Sstevel@tonic-gate 	 */
3200Sstevel@tonic-gate 	case AIOREAD:
3210Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3220Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
3230Sstevel@tonic-gate 	case AIOWRITE:
3240Sstevel@tonic-gate 		return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3250Sstevel@tonic-gate 		    (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
3260Sstevel@tonic-gate 	case AIOWAIT:
3270Sstevel@tonic-gate 		error = aiowait((struct	timeval *)uap[1], (int)uap[2],
3280Sstevel@tonic-gate 		    &rval);
3290Sstevel@tonic-gate 		break;
3300Sstevel@tonic-gate 	case AIOWAITN:
3310Sstevel@tonic-gate 		error = aiowaitn((void *)uap[1], (uint_t)uap[2],
3320Sstevel@tonic-gate 		    (uint_t *)uap[3], (timespec_t *)uap[4]);
3330Sstevel@tonic-gate 		break;
3340Sstevel@tonic-gate 	case AIONOTIFY:
3350Sstevel@tonic-gate 		return (aionotify());
3360Sstevel@tonic-gate 	case AIOINIT:
3370Sstevel@tonic-gate 		return (aioinit());
3380Sstevel@tonic-gate 	case AIOSTART:
3390Sstevel@tonic-gate 		return (aiostart());
3400Sstevel@tonic-gate 	case AIOLIO:
3410Sstevel@tonic-gate 		return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
3420Sstevel@tonic-gate 		    (void *)uap[4]));
3430Sstevel@tonic-gate 	case AIOLIOWAIT:
3440Sstevel@tonic-gate 		return (aliowait((int)uap[1], (void *)uap[2],
3450Sstevel@tonic-gate 		    (int)uap[3], (struct sigevent *)uap[4], AIO_32));
3460Sstevel@tonic-gate 	case AIOSUSPEND:
3470Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3480Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4],
3490Sstevel@tonic-gate 		    &rval, AIO_32);
3500Sstevel@tonic-gate 		break;
3510Sstevel@tonic-gate 	case AIOERROR:
3520Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_32));
3530Sstevel@tonic-gate 	case AIOAREAD:
3540Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3550Sstevel@tonic-gate 		    FREAD, AIO_32));
3560Sstevel@tonic-gate 	case AIOAWRITE:
3570Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1],
3580Sstevel@tonic-gate 		    FWRITE, AIO_32));
3590Sstevel@tonic-gate 	case AIOCANCEL:
3600Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
3610Sstevel@tonic-gate 		    AIO_32));
3620Sstevel@tonic-gate 		break;
3630Sstevel@tonic-gate 	case AIOLIO64:
3640Sstevel@tonic-gate 		return (alioLF((int)uap[1], (void *)uap[2],
3650Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4]));
3660Sstevel@tonic-gate 	case AIOLIOWAIT64:
3670Sstevel@tonic-gate 		return (aliowait(uap[1], (void *)uap[2],
3680Sstevel@tonic-gate 		    (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
3690Sstevel@tonic-gate 	case AIOSUSPEND64:
3700Sstevel@tonic-gate 		error = aiosuspend((void *)uap[1], (int)uap[2],
3710Sstevel@tonic-gate 		    (timespec_t *)uap[3], (int)uap[4], &rval,
3720Sstevel@tonic-gate 		    AIO_LARGEFILE);
3730Sstevel@tonic-gate 		break;
3740Sstevel@tonic-gate 	case AIOERROR64:
3750Sstevel@tonic-gate 		return (aioerror((void *)uap[1], AIO_LARGEFILE));
3760Sstevel@tonic-gate 	case AIOAREAD64:
3770Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FREAD,
3780Sstevel@tonic-gate 		    AIO_LARGEFILE));
3790Sstevel@tonic-gate 	case AIOAWRITE64:
3800Sstevel@tonic-gate 		return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
3810Sstevel@tonic-gate 		    AIO_LARGEFILE));
3820Sstevel@tonic-gate 	case AIOCANCEL64:
3830Sstevel@tonic-gate 		error = (aio_cancel((int)uap[1], (void *)uap[2],
3840Sstevel@tonic-gate 		    &rval, AIO_LARGEFILE));
3850Sstevel@tonic-gate 		break;
3860Sstevel@tonic-gate 	default:
3870Sstevel@tonic-gate 		return (EINVAL);
3880Sstevel@tonic-gate 	}
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate 	rvp->r_val1 = rval;
3910Sstevel@tonic-gate 	return (error);
3920Sstevel@tonic-gate }
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate /*
3950Sstevel@tonic-gate  * wake up LWPs in this process that are sleeping in
3960Sstevel@tonic-gate  * aiowait().
3970Sstevel@tonic-gate  */
3980Sstevel@tonic-gate static int
3990Sstevel@tonic-gate aionotify(void)
4000Sstevel@tonic-gate {
4010Sstevel@tonic-gate 	aio_t	*aiop;
4020Sstevel@tonic-gate 
4030Sstevel@tonic-gate 	aiop = curproc->p_aio;
4040Sstevel@tonic-gate 	if (aiop == NULL)
4050Sstevel@tonic-gate 		return (0);
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
4080Sstevel@tonic-gate 	aiop->aio_notifycnt++;
4090Sstevel@tonic-gate 	cv_broadcast(&aiop->aio_waitcv);
4100Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 	return (0);
4130Sstevel@tonic-gate }
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate static int
4160Sstevel@tonic-gate timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
4170Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4180Sstevel@tonic-gate {
4190Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4200Sstevel@tonic-gate 	struct timeval32 wait_time_32;
4210Sstevel@tonic-gate #endif
4220Sstevel@tonic-gate 	struct timeval wait_time;
4230Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate 	*rqtp = NULL;
4260Sstevel@tonic-gate 	if (timout == NULL) {		/* wait indefinitely */
4270Sstevel@tonic-gate 		*blocking = 1;
4280Sstevel@tonic-gate 		return (0);
4290Sstevel@tonic-gate 	}
4300Sstevel@tonic-gate 
4310Sstevel@tonic-gate 	/*
4320Sstevel@tonic-gate 	 * Need to correctly compare with the -1 passed in for a user
4330Sstevel@tonic-gate 	 * address pointer, with both 32 bit and 64 bit apps.
4340Sstevel@tonic-gate 	 */
4350Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4360Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)-1) {	/* don't wait */
4370Sstevel@tonic-gate 			*blocking = 0;
4380Sstevel@tonic-gate 			return (0);
4390Sstevel@tonic-gate 		}
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 		if (copyin(timout, &wait_time, sizeof (wait_time)))
4420Sstevel@tonic-gate 			return (EFAULT);
4430Sstevel@tonic-gate 	}
4440Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4450Sstevel@tonic-gate 	else {
4460Sstevel@tonic-gate 		/*
4470Sstevel@tonic-gate 		 * -1 from a 32bit app. It will not get sign extended.
4480Sstevel@tonic-gate 		 * don't wait if -1.
4490Sstevel@tonic-gate 		 */
4500Sstevel@tonic-gate 		if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
4510Sstevel@tonic-gate 			*blocking = 0;
4520Sstevel@tonic-gate 			return (0);
4530Sstevel@tonic-gate 		}
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
4560Sstevel@tonic-gate 			return (EFAULT);
4570Sstevel@tonic-gate 		TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
4580Sstevel@tonic-gate 	}
4590Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate 	if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {	/* don't wait */
4620Sstevel@tonic-gate 		*blocking = 0;
4630Sstevel@tonic-gate 		return (0);
4640Sstevel@tonic-gate 	}
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate 	if (wait_time.tv_sec < 0 ||
4670Sstevel@tonic-gate 	    wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
4680Sstevel@tonic-gate 		return (EINVAL);
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 	rqtime->tv_sec = wait_time.tv_sec;
4710Sstevel@tonic-gate 	rqtime->tv_nsec = wait_time.tv_usec * 1000;
4720Sstevel@tonic-gate 	*rqtp = rqtime;
4730Sstevel@tonic-gate 	*blocking = 1;
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate 	return (0);
4760Sstevel@tonic-gate }
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate static int
4790Sstevel@tonic-gate timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
4800Sstevel@tonic-gate 	timestruc_t **rqtp, int *blocking)
4810Sstevel@tonic-gate {
4820Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4830Sstevel@tonic-gate 	timespec32_t wait_time_32;
4840Sstevel@tonic-gate #endif
4850Sstevel@tonic-gate 	model_t	model = get_udatamodel();
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 	*rqtp = NULL;
4880Sstevel@tonic-gate 	if (timout == NULL) {
4890Sstevel@tonic-gate 		*blocking = 1;
4900Sstevel@tonic-gate 		return (0);
4910Sstevel@tonic-gate 	}
4920Sstevel@tonic-gate 
4930Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
4940Sstevel@tonic-gate 		if (copyin(timout, rqtime, sizeof (*rqtime)))
4950Sstevel@tonic-gate 			return (EFAULT);
4960Sstevel@tonic-gate 	}
4970Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
4980Sstevel@tonic-gate 	else {
4990Sstevel@tonic-gate 		if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
5000Sstevel@tonic-gate 			return (EFAULT);
5010Sstevel@tonic-gate 		TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
5020Sstevel@tonic-gate 	}
5030Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate 	if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
5060Sstevel@tonic-gate 		*blocking = 0;
5070Sstevel@tonic-gate 		return (0);
5080Sstevel@tonic-gate 	}
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate 	if (rqtime->tv_sec < 0 ||
5110Sstevel@tonic-gate 	    rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
5120Sstevel@tonic-gate 		return (EINVAL);
5130Sstevel@tonic-gate 
5140Sstevel@tonic-gate 	*rqtp = rqtime;
5150Sstevel@tonic-gate 	*blocking = 1;
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate 	return (0);
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate 
5200Sstevel@tonic-gate /*ARGSUSED*/
5210Sstevel@tonic-gate static int
5220Sstevel@tonic-gate aiowait(
5230Sstevel@tonic-gate 	struct timeval	*timout,
5240Sstevel@tonic-gate 	int	dontblockflg,
5250Sstevel@tonic-gate 	long	*rval)
5260Sstevel@tonic-gate {
5270Sstevel@tonic-gate 	int 		error;
5280Sstevel@tonic-gate 	aio_t		*aiop;
5290Sstevel@tonic-gate 	aio_req_t	*reqp;
5300Sstevel@tonic-gate 	clock_t		status;
5310Sstevel@tonic-gate 	int		blocking;
5324123Sdm120769 	int		timecheck;
5330Sstevel@tonic-gate 	timestruc_t	rqtime;
5340Sstevel@tonic-gate 	timestruc_t	*rqtp;
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate 	aiop = curproc->p_aio;
5370Sstevel@tonic-gate 	if (aiop == NULL)
5380Sstevel@tonic-gate 		return (EINVAL);
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 	/*
5410Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
5420Sstevel@tonic-gate 	 */
5430Sstevel@tonic-gate 	error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
5440Sstevel@tonic-gate 	if (error)
5450Sstevel@tonic-gate 		return (error);
5460Sstevel@tonic-gate 	if (rqtp) {
5470Sstevel@tonic-gate 		timestruc_t now;
5484123Sdm120769 		timecheck = timechanged;
5490Sstevel@tonic-gate 		gethrestime(&now);
5500Sstevel@tonic-gate 		timespecadd(rqtp, &now);
5510Sstevel@tonic-gate 	}
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
5540Sstevel@tonic-gate 	for (;;) {
5550Sstevel@tonic-gate 		/* process requests on poll queue */
5560Sstevel@tonic-gate 		if (aiop->aio_pollq) {
5570Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
5580Sstevel@tonic-gate 			aio_cleanup(0);
5590Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
5600Sstevel@tonic-gate 		}
5610Sstevel@tonic-gate 		if ((reqp = aio_req_remove(NULL)) != NULL) {
5620Sstevel@tonic-gate 			*rval = (long)reqp->aio_req_resultp;
5630Sstevel@tonic-gate 			break;
5640Sstevel@tonic-gate 		}
5650Sstevel@tonic-gate 		/* user-level done queue might not be empty */
5660Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
5670Sstevel@tonic-gate 			aiop->aio_notifycnt--;
5680Sstevel@tonic-gate 			*rval = 1;
5690Sstevel@tonic-gate 			break;
5700Sstevel@tonic-gate 		}
5710Sstevel@tonic-gate 		/* don't block if no outstanding aio */
5720Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0 && dontblockflg) {
5730Sstevel@tonic-gate 			error = EINVAL;
5740Sstevel@tonic-gate 			break;
5750Sstevel@tonic-gate 		}
5760Sstevel@tonic-gate 		if (blocking) {
5770Sstevel@tonic-gate 			status = cv_waituntil_sig(&aiop->aio_waitcv,
5784123Sdm120769 			    &aiop->aio_mutex, rqtp, timecheck);
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate 			if (status > 0)		/* check done queue again */
5810Sstevel@tonic-gate 				continue;
5820Sstevel@tonic-gate 			if (status == 0) {	/* interrupted by a signal */
5830Sstevel@tonic-gate 				error = EINTR;
5840Sstevel@tonic-gate 				*rval = -1;
5850Sstevel@tonic-gate 			} else {		/* timer expired */
5860Sstevel@tonic-gate 				error = ETIME;
5870Sstevel@tonic-gate 			}
5880Sstevel@tonic-gate 		}
5890Sstevel@tonic-gate 		break;
5900Sstevel@tonic-gate 	}
5910Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
5920Sstevel@tonic-gate 	if (reqp) {
5930Sstevel@tonic-gate 		aphysio_unlock(reqp);
5940Sstevel@tonic-gate 		aio_copyout_result(reqp);
5950Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
5960Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
5970Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
5980Sstevel@tonic-gate 	}
5990Sstevel@tonic-gate 	return (error);
6000Sstevel@tonic-gate }
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate /*
6030Sstevel@tonic-gate  * aiowaitn can be used to reap completed asynchronous requests submitted with
6040Sstevel@tonic-gate  * lio_listio, aio_read or aio_write.
6050Sstevel@tonic-gate  * This function only reaps asynchronous raw I/Os.
6060Sstevel@tonic-gate  */
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate /*ARGSUSED*/
6090Sstevel@tonic-gate static int
6100Sstevel@tonic-gate aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
6110Sstevel@tonic-gate {
6120Sstevel@tonic-gate 	int 		error = 0;
6130Sstevel@tonic-gate 	aio_t		*aiop;
6140Sstevel@tonic-gate 	aio_req_t	*reqlist = NULL;
6150Sstevel@tonic-gate 	caddr_t		iocblist = NULL;	/* array of iocb ptr's */
6160Sstevel@tonic-gate 	uint_t		waitcnt, cnt = 0;	/* iocb cnt */
6170Sstevel@tonic-gate 	size_t		iocbsz;			/* users iocb size */
6180Sstevel@tonic-gate 	size_t		riocbsz;		/* returned iocb size */
6190Sstevel@tonic-gate 	int		iocb_index = 0;
6200Sstevel@tonic-gate 	model_t		model = get_udatamodel();
6210Sstevel@tonic-gate 	int		blocking = 1;
6224123Sdm120769 	int		timecheck;
6230Sstevel@tonic-gate 	timestruc_t	rqtime;
6240Sstevel@tonic-gate 	timestruc_t	*rqtp;
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	aiop = curproc->p_aio;
6274377Sraf 
6284377Sraf 	if (aiop == NULL || aiop->aio_outstanding == 0)
6290Sstevel@tonic-gate 		return (EAGAIN);
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate 	if (copyin(nwait, &waitcnt, sizeof (uint_t)))
6320Sstevel@tonic-gate 		return (EFAULT);
6330Sstevel@tonic-gate 
6340Sstevel@tonic-gate 	/* set *nwait to zero, if we must return prematurely */
6350Sstevel@tonic-gate 	if (copyout(&cnt, nwait, sizeof (uint_t)))
6360Sstevel@tonic-gate 		return (EFAULT);
6370Sstevel@tonic-gate 
6380Sstevel@tonic-gate 	if (waitcnt == 0) {
6390Sstevel@tonic-gate 		blocking = 0;
6400Sstevel@tonic-gate 		rqtp = NULL;
6410Sstevel@tonic-gate 		waitcnt = nent;
6420Sstevel@tonic-gate 	} else {
6430Sstevel@tonic-gate 		error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
6440Sstevel@tonic-gate 		if (error)
6450Sstevel@tonic-gate 			return (error);
6460Sstevel@tonic-gate 	}
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
6490Sstevel@tonic-gate 		iocbsz = (sizeof (aiocb_t *) * nent);
6500Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
6510Sstevel@tonic-gate 	else
6520Sstevel@tonic-gate 		iocbsz = (sizeof (caddr32_t) * nent);
6530Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate 	/*
6560Sstevel@tonic-gate 	 * Only one aio_waitn call is allowed at a time.
6570Sstevel@tonic-gate 	 * The active aio_waitn will collect all requests
6580Sstevel@tonic-gate 	 * out of the "done" list and if necessary it will wait
6590Sstevel@tonic-gate 	 * for some/all pending requests to fulfill the nwait
6600Sstevel@tonic-gate 	 * parameter.
6610Sstevel@tonic-gate 	 * A second or further aio_waitn calls will sleep here
6620Sstevel@tonic-gate 	 * until the active aio_waitn finishes and leaves the kernel
6630Sstevel@tonic-gate 	 * If the second call does not block (poll), then return
6640Sstevel@tonic-gate 	 * immediately with the error code : EAGAIN.
6650Sstevel@tonic-gate 	 * If the second call should block, then sleep here, but
6660Sstevel@tonic-gate 	 * do not touch the timeout. The timeout starts when this
6670Sstevel@tonic-gate 	 * aio_waitn-call becomes active.
6680Sstevel@tonic-gate 	 */
6690Sstevel@tonic-gate 
6700Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	while (aiop->aio_flags & AIO_WAITN) {
6730Sstevel@tonic-gate 		if (blocking == 0) {
6740Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6750Sstevel@tonic-gate 			return (EAGAIN);
6760Sstevel@tonic-gate 		}
6770Sstevel@tonic-gate 
6780Sstevel@tonic-gate 		/* block, no timeout */
6790Sstevel@tonic-gate 		aiop->aio_flags |= AIO_WAITN_PENDING;
6800Sstevel@tonic-gate 		if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
6810Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
6820Sstevel@tonic-gate 			return (EINTR);
6830Sstevel@tonic-gate 		}
6840Sstevel@tonic-gate 	}
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 	/*
6870Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
6880Sstevel@tonic-gate 	 */
6890Sstevel@tonic-gate 	if (rqtp) {
6900Sstevel@tonic-gate 		timestruc_t now;
6914123Sdm120769 		timecheck = timechanged;
6920Sstevel@tonic-gate 		gethrestime(&now);
6930Sstevel@tonic-gate 		timespecadd(rqtp, &now);
6940Sstevel@tonic-gate 	}
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate 	if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
6970Sstevel@tonic-gate 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
6980Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
6990Sstevel@tonic-gate 	}
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate 	if (aiop->aio_iocb == NULL) {
7020Sstevel@tonic-gate 		iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
7030Sstevel@tonic-gate 		if (iocblist == NULL) {
7040Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7050Sstevel@tonic-gate 			return (ENOMEM);
7060Sstevel@tonic-gate 		}
7070Sstevel@tonic-gate 		aiop->aio_iocb = (aiocb_t **)iocblist;
7080Sstevel@tonic-gate 		aiop->aio_iocbsz = iocbsz;
7090Sstevel@tonic-gate 	} else {
7100Sstevel@tonic-gate 		iocblist = (char *)aiop->aio_iocb;
7110Sstevel@tonic-gate 	}
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate 	aiop->aio_waitncnt = waitcnt;
7140Sstevel@tonic-gate 	aiop->aio_flags |= AIO_WAITN;
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 	for (;;) {
7170Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
7180Sstevel@tonic-gate 		if (aiop->aio_pollq) {
7190Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
7200Sstevel@tonic-gate 			aio_cleanup(0);
7210Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
7220Sstevel@tonic-gate 		}
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate 		/* check for requests on done queue */
7250Sstevel@tonic-gate 		if (aiop->aio_doneq) {
7260Sstevel@tonic-gate 			cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
7270Sstevel@tonic-gate 			aiop->aio_waitncnt = waitcnt - cnt;
7280Sstevel@tonic-gate 		}
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 		/* user-level done queue might not be empty */
7310Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
7320Sstevel@tonic-gate 			aiop->aio_notifycnt--;
7330Sstevel@tonic-gate 			error = 0;
7340Sstevel@tonic-gate 			break;
7350Sstevel@tonic-gate 		}
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 		/*
7380Sstevel@tonic-gate 		 * if we are here second time as a result of timer
7390Sstevel@tonic-gate 		 * expiration, we reset error if there are enough
7400Sstevel@tonic-gate 		 * aiocb's to satisfy request.
7410Sstevel@tonic-gate 		 * We return also if all requests are already done
7420Sstevel@tonic-gate 		 * and we picked up the whole done queue.
7430Sstevel@tonic-gate 		 */
7440Sstevel@tonic-gate 
7450Sstevel@tonic-gate 		if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
7460Sstevel@tonic-gate 		    aiop->aio_doneq == NULL)) {
7470Sstevel@tonic-gate 			error = 0;
7480Sstevel@tonic-gate 			break;
7490Sstevel@tonic-gate 		}
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate 		if ((cnt < waitcnt) && blocking) {
7520Sstevel@tonic-gate 			int rval = cv_waituntil_sig(&aiop->aio_waitcv,
7534502Spraks 			    &aiop->aio_mutex, rqtp, timecheck);
7540Sstevel@tonic-gate 			if (rval > 0)
7550Sstevel@tonic-gate 				continue;
7560Sstevel@tonic-gate 			if (rval < 0) {
7570Sstevel@tonic-gate 				error = ETIME;
7580Sstevel@tonic-gate 				blocking = 0;
7590Sstevel@tonic-gate 				continue;
7600Sstevel@tonic-gate 			}
7610Sstevel@tonic-gate 			error = EINTR;
7620Sstevel@tonic-gate 		}
7630Sstevel@tonic-gate 		break;
7640Sstevel@tonic-gate 	}
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	if (cnt > 0) {
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate 		iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
7710Sstevel@tonic-gate 		    aiop, model);
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE)
7740Sstevel@tonic-gate 			riocbsz = (sizeof (aiocb_t *) * cnt);
7750Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
7760Sstevel@tonic-gate 		else
7770Sstevel@tonic-gate 			riocbsz = (sizeof (caddr32_t) * cnt);
7780Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate 		if (copyout(iocblist, uiocb, riocbsz) ||
7810Sstevel@tonic-gate 		    copyout(&cnt, nwait, sizeof (uint_t)))
7820Sstevel@tonic-gate 			error = EFAULT;
7830Sstevel@tonic-gate 	}
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 	if (aiop->aio_iocbsz > AIO_IOCB_MAX) {
7860Sstevel@tonic-gate 		kmem_free(iocblist, aiop->aio_iocbsz);
7870Sstevel@tonic-gate 		aiop->aio_iocb = NULL;
7880Sstevel@tonic-gate 	}
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate 	/* check if there is another thread waiting for execution */
7910Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
7920Sstevel@tonic-gate 	aiop->aio_flags &= ~AIO_WAITN;
7930Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_WAITN_PENDING) {
7940Sstevel@tonic-gate 		aiop->aio_flags &= ~AIO_WAITN_PENDING;
7950Sstevel@tonic-gate 		cv_signal(&aiop->aio_waitncv);
7960Sstevel@tonic-gate 	}
7970Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 	return (error);
8000Sstevel@tonic-gate }
8010Sstevel@tonic-gate 
8020Sstevel@tonic-gate /*
8030Sstevel@tonic-gate  * aio_unlock_requests
8040Sstevel@tonic-gate  * copyouts the result of the request as well as the return value.
8050Sstevel@tonic-gate  * It builds the list of completed asynchronous requests,
8060Sstevel@tonic-gate  * unlocks the allocated memory ranges and
8070Sstevel@tonic-gate  * put the aio request structure back into the free list.
8080Sstevel@tonic-gate  */
8090Sstevel@tonic-gate 
8100Sstevel@tonic-gate static int
8110Sstevel@tonic-gate aio_unlock_requests(
8120Sstevel@tonic-gate 	caddr_t	iocblist,
8130Sstevel@tonic-gate 	int	iocb_index,
8140Sstevel@tonic-gate 	aio_req_t *reqlist,
8150Sstevel@tonic-gate 	aio_t	*aiop,
8160Sstevel@tonic-gate 	model_t	model)
8170Sstevel@tonic-gate {
8180Sstevel@tonic-gate 	aio_req_t	*reqp, *nreqp;
8190Sstevel@tonic-gate 
8200Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
8210Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8220Sstevel@tonic-gate 			(((caddr_t *)iocblist)[iocb_index++]) =
8230Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb;
8240Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8250Sstevel@tonic-gate 			aphysio_unlock(reqp);
8260Sstevel@tonic-gate 			aio_copyout_result(reqp);
8270Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8280Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8290Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8300Sstevel@tonic-gate 		}
8310Sstevel@tonic-gate 	}
8320Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
8330Sstevel@tonic-gate 	else {
8340Sstevel@tonic-gate 		for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
8350Sstevel@tonic-gate 			((caddr32_t *)iocblist)[iocb_index++] =
8360Sstevel@tonic-gate 			    reqp->aio_req_iocb.iocb32;
8370Sstevel@tonic-gate 			nreqp = reqp->aio_req_next;
8380Sstevel@tonic-gate 			aphysio_unlock(reqp);
8390Sstevel@tonic-gate 			aio_copyout_result(reqp);
8400Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8410Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8420Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8430Sstevel@tonic-gate 		}
8440Sstevel@tonic-gate 	}
8450Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
8460Sstevel@tonic-gate 	return (iocb_index);
8470Sstevel@tonic-gate }
8480Sstevel@tonic-gate 
8490Sstevel@tonic-gate /*
8500Sstevel@tonic-gate  * aio_reqlist_concat
8510Sstevel@tonic-gate  * moves "max" elements from the done queue to the reqlist queue and removes
8520Sstevel@tonic-gate  * the AIO_DONEQ flag.
8530Sstevel@tonic-gate  * - reqlist queue is a simple linked list
8540Sstevel@tonic-gate  * - done queue is a double linked list
8550Sstevel@tonic-gate  */
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate static int
8580Sstevel@tonic-gate aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
8590Sstevel@tonic-gate {
8600Sstevel@tonic-gate 	aio_req_t *q2, *q2work, *list;
8610Sstevel@tonic-gate 	int count = 0;
8620Sstevel@tonic-gate 
8630Sstevel@tonic-gate 	list = *reqlist;
8640Sstevel@tonic-gate 	q2 = aiop->aio_doneq;
8650Sstevel@tonic-gate 	q2work = q2;
8660Sstevel@tonic-gate 	while (max-- > 0) {
8670Sstevel@tonic-gate 		q2work->aio_req_flags &= ~AIO_DONEQ;
8680Sstevel@tonic-gate 		q2work = q2work->aio_req_next;
8690Sstevel@tonic-gate 		count++;
8700Sstevel@tonic-gate 		if (q2work == q2)
8710Sstevel@tonic-gate 			break;
8720Sstevel@tonic-gate 	}
8730Sstevel@tonic-gate 
8740Sstevel@tonic-gate 	if (q2work == q2) {
8750Sstevel@tonic-gate 		/* all elements revised */
8760Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = list;
8770Sstevel@tonic-gate 		list = q2;
8780Sstevel@tonic-gate 		aiop->aio_doneq = NULL;
8790Sstevel@tonic-gate 	} else {
8800Sstevel@tonic-gate 		/*
8810Sstevel@tonic-gate 		 * max < elements in the doneq
8820Sstevel@tonic-gate 		 * detach only the required amount of elements
8830Sstevel@tonic-gate 		 * out of the doneq
8840Sstevel@tonic-gate 		 */
8850Sstevel@tonic-gate 		q2work->aio_req_prev->aio_req_next = list;
8860Sstevel@tonic-gate 		list = q2;
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate 		aiop->aio_doneq = q2work;
8890Sstevel@tonic-gate 		q2work->aio_req_prev = q2->aio_req_prev;
8900Sstevel@tonic-gate 		q2->aio_req_prev->aio_req_next = q2work;
8910Sstevel@tonic-gate 	}
8920Sstevel@tonic-gate 	*reqlist = list;
8930Sstevel@tonic-gate 	return (count);
8940Sstevel@tonic-gate }
8950Sstevel@tonic-gate 
8960Sstevel@tonic-gate /*ARGSUSED*/
8970Sstevel@tonic-gate static int
8980Sstevel@tonic-gate aiosuspend(
8990Sstevel@tonic-gate 	void	*aiocb,
9000Sstevel@tonic-gate 	int	nent,
9010Sstevel@tonic-gate 	struct	timespec	*timout,
9020Sstevel@tonic-gate 	int	flag,
9030Sstevel@tonic-gate 	long	*rval,
9040Sstevel@tonic-gate 	int	run_mode)
9050Sstevel@tonic-gate {
9060Sstevel@tonic-gate 	int 		error;
9070Sstevel@tonic-gate 	aio_t		*aiop;
9080Sstevel@tonic-gate 	aio_req_t	*reqp, *found, *next;
9090Sstevel@tonic-gate 	caddr_t		cbplist = NULL;
9100Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
9110Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9120Sstevel@tonic-gate 	aiocb32_t	*cbp32;
9130Sstevel@tonic-gate 	caddr32_t	*ucbp32;
9140Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9150Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
9160Sstevel@tonic-gate 	int		rv;
9170Sstevel@tonic-gate 	int		i;
9180Sstevel@tonic-gate 	size_t		ssize;
9190Sstevel@tonic-gate 	model_t		model = get_udatamodel();
9200Sstevel@tonic-gate 	int		blocking;
9214123Sdm120769 	int		timecheck;
9220Sstevel@tonic-gate 	timestruc_t	rqtime;
9230Sstevel@tonic-gate 	timestruc_t	*rqtp;
9240Sstevel@tonic-gate 
9250Sstevel@tonic-gate 	aiop = curproc->p_aio;
9260Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0)
9270Sstevel@tonic-gate 		return (EINVAL);
9280Sstevel@tonic-gate 
9290Sstevel@tonic-gate 	/*
9300Sstevel@tonic-gate 	 * Establish the absolute future time for the timeout.
9310Sstevel@tonic-gate 	 */
9320Sstevel@tonic-gate 	error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
9330Sstevel@tonic-gate 	if (error)
9340Sstevel@tonic-gate 		return (error);
9350Sstevel@tonic-gate 	if (rqtp) {
9360Sstevel@tonic-gate 		timestruc_t now;
9374123Sdm120769 		timecheck = timechanged;
9380Sstevel@tonic-gate 		gethrestime(&now);
9390Sstevel@tonic-gate 		timespecadd(rqtp, &now);
9400Sstevel@tonic-gate 	}
9410Sstevel@tonic-gate 
9420Sstevel@tonic-gate 	/*
9430Sstevel@tonic-gate 	 * If we are not blocking and there's no IO complete
9440Sstevel@tonic-gate 	 * skip aiocb copyin.
9450Sstevel@tonic-gate 	 */
9460Sstevel@tonic-gate 	if (!blocking && (aiop->aio_pollq == NULL) &&
9470Sstevel@tonic-gate 	    (aiop->aio_doneq == NULL)) {
9480Sstevel@tonic-gate 		return (EAGAIN);
9490Sstevel@tonic-gate 	}
9500Sstevel@tonic-gate 
9510Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
9520Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
9530Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9540Sstevel@tonic-gate 	else
9550Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
9560Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9570Sstevel@tonic-gate 
9580Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_NOSLEEP);
9590Sstevel@tonic-gate 	if (cbplist == NULL)
9600Sstevel@tonic-gate 		return (ENOMEM);
9610Sstevel@tonic-gate 
9620Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
9630Sstevel@tonic-gate 		error = EFAULT;
9640Sstevel@tonic-gate 		goto done;
9650Sstevel@tonic-gate 	}
9660Sstevel@tonic-gate 
9670Sstevel@tonic-gate 	found = NULL;
9680Sstevel@tonic-gate 	/*
9690Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
9700Sstevel@tonic-gate 	 * aio_req_done().
9710Sstevel@tonic-gate 	 */
9720Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
9730Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
9740Sstevel@tonic-gate 	for (;;) {
9750Sstevel@tonic-gate 		/* push requests on poll queue to done queue */
9760Sstevel@tonic-gate 		if (aiop->aio_pollq) {
9770Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
9780Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
9790Sstevel@tonic-gate 			aio_cleanup(0);
9800Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
9810Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9820Sstevel@tonic-gate 		}
9830Sstevel@tonic-gate 		/* check for requests on done queue */
9840Sstevel@tonic-gate 		if (aiop->aio_doneq) {
9850Sstevel@tonic-gate 			if (model == DATAMODEL_NATIVE)
9860Sstevel@tonic-gate 				ucbp = (aiocb_t **)cbplist;
9870Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
9880Sstevel@tonic-gate 			else
9890Sstevel@tonic-gate 				ucbp32 = (caddr32_t *)cbplist;
9900Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
9910Sstevel@tonic-gate 			for (i = 0; i < nent; i++) {
9920Sstevel@tonic-gate 				if (model == DATAMODEL_NATIVE) {
9930Sstevel@tonic-gate 					if ((cbp = *ucbp++) == NULL)
9940Sstevel@tonic-gate 						continue;
9950Sstevel@tonic-gate 					if (run_mode != AIO_LARGEFILE)
9960Sstevel@tonic-gate 						reqp = aio_req_done(
9970Sstevel@tonic-gate 						    &cbp->aio_resultp);
9980Sstevel@tonic-gate 					else {
9990Sstevel@tonic-gate 						cbp64 = (aiocb64_32_t *)cbp;
10000Sstevel@tonic-gate 						reqp = aio_req_done(
10010Sstevel@tonic-gate 						    &cbp64->aio_resultp);
10020Sstevel@tonic-gate 					}
10030Sstevel@tonic-gate 				}
10040Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
10050Sstevel@tonic-gate 				else {
10060Sstevel@tonic-gate 					if (run_mode == AIO_32) {
10070Sstevel@tonic-gate 						if ((cbp32 =
10080Sstevel@tonic-gate 						    (aiocb32_t *)(uintptr_t)
10090Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10100Sstevel@tonic-gate 							continue;
10110Sstevel@tonic-gate 						reqp = aio_req_done(
10120Sstevel@tonic-gate 						    &cbp32->aio_resultp);
10130Sstevel@tonic-gate 					} else if (run_mode == AIO_LARGEFILE) {
10140Sstevel@tonic-gate 						if ((cbp64 =
10150Sstevel@tonic-gate 						    (aiocb64_32_t *)(uintptr_t)
10160Sstevel@tonic-gate 						    *ucbp32++) == NULL)
10170Sstevel@tonic-gate 							continue;
10184502Spraks 						reqp = aio_req_done(
10194502Spraks 						    &cbp64->aio_resultp);
10200Sstevel@tonic-gate 					}
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 				}
10230Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
10240Sstevel@tonic-gate 				if (reqp) {
10250Sstevel@tonic-gate 					reqp->aio_req_next = found;
10260Sstevel@tonic-gate 					found = reqp;
10270Sstevel@tonic-gate 				}
10280Sstevel@tonic-gate 				if (aiop->aio_doneq == NULL)
10290Sstevel@tonic-gate 					break;
10300Sstevel@tonic-gate 			}
10310Sstevel@tonic-gate 			if (found)
10320Sstevel@tonic-gate 				break;
10330Sstevel@tonic-gate 		}
10340Sstevel@tonic-gate 		if (aiop->aio_notifycnt > 0) {
10350Sstevel@tonic-gate 			/*
10360Sstevel@tonic-gate 			 * nothing on the kernel's queue. the user
10370Sstevel@tonic-gate 			 * has notified the kernel that it has items
10380Sstevel@tonic-gate 			 * on a user-level queue.
10390Sstevel@tonic-gate 			 */
10400Sstevel@tonic-gate 			aiop->aio_notifycnt--;
10410Sstevel@tonic-gate 			*rval = 1;
10420Sstevel@tonic-gate 			error = 0;
10430Sstevel@tonic-gate 			break;
10440Sstevel@tonic-gate 		}
10450Sstevel@tonic-gate 		/* don't block if nothing is outstanding */
10460Sstevel@tonic-gate 		if (aiop->aio_outstanding == 0) {
10470Sstevel@tonic-gate 			error = EAGAIN;
10480Sstevel@tonic-gate 			break;
10490Sstevel@tonic-gate 		}
10500Sstevel@tonic-gate 		if (blocking) {
10510Sstevel@tonic-gate 			/*
10520Sstevel@tonic-gate 			 * drop the aio_cleanupq_mutex as we are
10530Sstevel@tonic-gate 			 * going to block.
10540Sstevel@tonic-gate 			 */
10550Sstevel@tonic-gate 			mutex_exit(&aiop->aio_cleanupq_mutex);
10560Sstevel@tonic-gate 			rv = cv_waituntil_sig(&aiop->aio_waitcv,
10574502Spraks 			    &aiop->aio_mutex, rqtp, timecheck);
10580Sstevel@tonic-gate 			/*
10590Sstevel@tonic-gate 			 * we have to drop aio_mutex and
10600Sstevel@tonic-gate 			 * grab it in the right order.
10610Sstevel@tonic-gate 			 */
10620Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
10630Sstevel@tonic-gate 			mutex_enter(&aiop->aio_cleanupq_mutex);
10640Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
10650Sstevel@tonic-gate 			if (rv > 0)	/* check done queue again */
10660Sstevel@tonic-gate 				continue;
10670Sstevel@tonic-gate 			if (rv == 0)	/* interrupted by a signal */
10680Sstevel@tonic-gate 				error = EINTR;
10690Sstevel@tonic-gate 			else		/* timer expired */
10700Sstevel@tonic-gate 				error = ETIME;
10710Sstevel@tonic-gate 		} else {
10720Sstevel@tonic-gate 			error = EAGAIN;
10730Sstevel@tonic-gate 		}
10740Sstevel@tonic-gate 		break;
10750Sstevel@tonic-gate 	}
10760Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
10770Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
10780Sstevel@tonic-gate 	for (reqp = found; reqp != NULL; reqp = next) {
10790Sstevel@tonic-gate 		next = reqp->aio_req_next;
10800Sstevel@tonic-gate 		aphysio_unlock(reqp);
10810Sstevel@tonic-gate 		aio_copyout_result(reqp);
10820Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
10830Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
10840Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
10850Sstevel@tonic-gate 	}
10860Sstevel@tonic-gate done:
10870Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
10880Sstevel@tonic-gate 	return (error);
10890Sstevel@tonic-gate }
10900Sstevel@tonic-gate 
10910Sstevel@tonic-gate /*
10920Sstevel@tonic-gate  * initialize aio by allocating an aio_t struct for this
10930Sstevel@tonic-gate  * process.
10940Sstevel@tonic-gate  */
10950Sstevel@tonic-gate static int
10960Sstevel@tonic-gate aioinit(void)
10970Sstevel@tonic-gate {
10980Sstevel@tonic-gate 	proc_t *p = curproc;
10990Sstevel@tonic-gate 	aio_t *aiop;
11000Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11010Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL) {
11020Sstevel@tonic-gate 		aiop = aio_aiop_alloc();
11030Sstevel@tonic-gate 		p->p_aio = aiop;
11040Sstevel@tonic-gate 	}
11050Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11060Sstevel@tonic-gate 	if (aiop == NULL)
11070Sstevel@tonic-gate 		return (ENOMEM);
11080Sstevel@tonic-gate 	return (0);
11090Sstevel@tonic-gate }
11100Sstevel@tonic-gate 
11110Sstevel@tonic-gate /*
11120Sstevel@tonic-gate  * start a special thread that will cleanup after aio requests
11130Sstevel@tonic-gate  * that are preventing a segment from being unmapped. as_unmap()
11140Sstevel@tonic-gate  * blocks until all phsyio to this segment is completed. this
11150Sstevel@tonic-gate  * doesn't happen until all the pages in this segment are not
11160Sstevel@tonic-gate  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
11170Sstevel@tonic-gate  * requests still outstanding. this special thread will make sure
11180Sstevel@tonic-gate  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
11190Sstevel@tonic-gate  *
11200Sstevel@tonic-gate  * this function will return an error if the process has only
11210Sstevel@tonic-gate  * one LWP. the assumption is that the caller is a separate LWP
11220Sstevel@tonic-gate  * that remains blocked in the kernel for the life of this process.
11230Sstevel@tonic-gate  */
11240Sstevel@tonic-gate static int
11250Sstevel@tonic-gate aiostart(void)
11260Sstevel@tonic-gate {
11270Sstevel@tonic-gate 	proc_t *p = curproc;
11280Sstevel@tonic-gate 	aio_t *aiop;
11290Sstevel@tonic-gate 	int first, error = 0;
11300Sstevel@tonic-gate 
11310Sstevel@tonic-gate 	if (p->p_lwpcnt == 1)
11320Sstevel@tonic-gate 		return (EDEADLK);
11330Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
11340Sstevel@tonic-gate 	if ((aiop = p->p_aio) == NULL)
11350Sstevel@tonic-gate 		error = EINVAL;
11360Sstevel@tonic-gate 	else {
11370Sstevel@tonic-gate 		first = aiop->aio_ok;
11380Sstevel@tonic-gate 		if (aiop->aio_ok == 0)
11390Sstevel@tonic-gate 			aiop->aio_ok = 1;
11400Sstevel@tonic-gate 	}
11410Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
11420Sstevel@tonic-gate 	if (error == 0 && first == 0) {
11430Sstevel@tonic-gate 		return (aio_cleanup_thread(aiop));
11440Sstevel@tonic-gate 		/* should return only to exit */
11450Sstevel@tonic-gate 	}
11460Sstevel@tonic-gate 	return (error);
11470Sstevel@tonic-gate }
11480Sstevel@tonic-gate 
11490Sstevel@tonic-gate /*
11500Sstevel@tonic-gate  * Associate an aiocb with a port.
11510Sstevel@tonic-gate  * This function is used by aiorw() to associate a transaction with a port.
11520Sstevel@tonic-gate  * Allocate an event port structure (port_alloc_event()) and store the
11530Sstevel@tonic-gate  * delivered user pointer (portnfy_user) in the portkev_user field of the
11540Sstevel@tonic-gate  * port_kevent_t structure..
11550Sstevel@tonic-gate  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
11560Sstevel@tonic-gate  * the port association.
11570Sstevel@tonic-gate  */
11580Sstevel@tonic-gate 
11590Sstevel@tonic-gate static int
11601885Sraf aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
11611885Sraf 	aio_req_t *reqp, int event)
11620Sstevel@tonic-gate {
11630Sstevel@tonic-gate 	port_kevent_t	*pkevp = NULL;
11640Sstevel@tonic-gate 	int		error;
11650Sstevel@tonic-gate 
11660Sstevel@tonic-gate 	error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
11670Sstevel@tonic-gate 	    PORT_SOURCE_AIO, &pkevp);
11680Sstevel@tonic-gate 	if (error) {
11690Sstevel@tonic-gate 		if ((error == ENOMEM) || (error == EAGAIN))
11700Sstevel@tonic-gate 			error = EAGAIN;
11710Sstevel@tonic-gate 		else
11720Sstevel@tonic-gate 			error = EINVAL;
11730Sstevel@tonic-gate 	} else {
11740Sstevel@tonic-gate 		port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
11750Sstevel@tonic-gate 		    aio_port_callback, reqp);
11761885Sraf 		pkevp->portkev_events = event;
11770Sstevel@tonic-gate 		reqp->aio_req_portkev = pkevp;
11780Sstevel@tonic-gate 		reqp->aio_req_port = pntfy->portnfy_port;
11790Sstevel@tonic-gate 	}
11800Sstevel@tonic-gate 	return (error);
11810Sstevel@tonic-gate }
11820Sstevel@tonic-gate 
11830Sstevel@tonic-gate #ifdef _LP64
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate /*
11860Sstevel@tonic-gate  * Asynchronous list IO. A chain of aiocb's are copied in
11870Sstevel@tonic-gate  * one at a time. If the aiocb is invalid, it is skipped.
11880Sstevel@tonic-gate  * For each aiocb, the appropriate driver entry point is
11890Sstevel@tonic-gate  * called. Optimize for the common case where the list
11900Sstevel@tonic-gate  * of requests is to the same file descriptor.
11910Sstevel@tonic-gate  *
11920Sstevel@tonic-gate  * One possible optimization is to define a new driver entry
11930Sstevel@tonic-gate  * point that supports a list of IO requests. Whether this
11940Sstevel@tonic-gate  * improves performance depends somewhat on the driver's
11950Sstevel@tonic-gate  * locking strategy. Processing a list could adversely impact
11960Sstevel@tonic-gate  * the driver's interrupt latency.
11970Sstevel@tonic-gate  */
11980Sstevel@tonic-gate static int
11990Sstevel@tonic-gate alio(
12001885Sraf 	int		mode_arg,
12011885Sraf 	aiocb_t		**aiocb_arg,
12021885Sraf 	int		nent,
12031885Sraf 	struct sigevent	*sigev)
12040Sstevel@tonic-gate {
12050Sstevel@tonic-gate 	file_t		*fp;
12060Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
12070Sstevel@tonic-gate 	int		prev_mode = -1;
12080Sstevel@tonic-gate 	struct vnode	*vp;
12090Sstevel@tonic-gate 	aio_lio_t	*head;
12100Sstevel@tonic-gate 	aio_req_t	*reqp;
12110Sstevel@tonic-gate 	aio_t		*aiop;
12120Sstevel@tonic-gate 	caddr_t		cbplist;
12130Sstevel@tonic-gate 	aiocb_t		cb;
12140Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
12151885Sraf 	aiocb_t		*cbp;
12161885Sraf 	aiocb_t		**ucbp;
12170Sstevel@tonic-gate 	struct sigevent sigevk;
12180Sstevel@tonic-gate 	sigqueue_t	*sqp;
12190Sstevel@tonic-gate 	int		(*aio_func)();
12200Sstevel@tonic-gate 	int		mode;
12210Sstevel@tonic-gate 	int		error = 0;
12220Sstevel@tonic-gate 	int		aio_errors = 0;
12230Sstevel@tonic-gate 	int		i;
12240Sstevel@tonic-gate 	size_t		ssize;
12250Sstevel@tonic-gate 	int		deadhead = 0;
12260Sstevel@tonic-gate 	int		aio_notsupported = 0;
12271885Sraf 	int		lio_head_port;
12281885Sraf 	int		aio_port;
12291885Sraf 	int		aio_thread;
12300Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
12314502Spraks 	int		portused = 0;
12320Sstevel@tonic-gate 	port_notify_t	pnotify;
12331885Sraf 	int		event;
12340Sstevel@tonic-gate 
12350Sstevel@tonic-gate 	aiop = curproc->p_aio;
12360Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
12370Sstevel@tonic-gate 		return (EINVAL);
12380Sstevel@tonic-gate 
12390Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
12400Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
12410Sstevel@tonic-gate 	ucbp = (aiocb_t **)cbplist;
12420Sstevel@tonic-gate 
12431885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
12441885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
12450Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
12460Sstevel@tonic-gate 		return (EFAULT);
12470Sstevel@tonic-gate 	}
12480Sstevel@tonic-gate 
12491885Sraf 	/* Event Ports  */
12501885Sraf 	if (sigev &&
12511885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
12521885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
12531885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
12541885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
12551885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
12561885Sraf 		} else if (copyin(sigevk.sigev_value.sival_ptr,
12571885Sraf 		    &pnotify, sizeof (pnotify))) {
12580Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
12590Sstevel@tonic-gate 			return (EFAULT);
12600Sstevel@tonic-gate 		}
12611885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
12621885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
12631885Sraf 		if (error) {
12641885Sraf 			if (error == ENOMEM || error == EAGAIN)
12651885Sraf 				error = EAGAIN;
12661885Sraf 			else
12671885Sraf 				error = EINVAL;
12681885Sraf 			kmem_free(cbplist, ssize);
12691885Sraf 			return (error);
12701885Sraf 		}
12711885Sraf 		lio_head_port = pnotify.portnfy_port;
12724502Spraks 		portused = 1;
12730Sstevel@tonic-gate 	}
12740Sstevel@tonic-gate 
12750Sstevel@tonic-gate 	/*
12760Sstevel@tonic-gate 	 * a list head should be allocated if notification is
12770Sstevel@tonic-gate 	 * enabled for this list.
12780Sstevel@tonic-gate 	 */
12790Sstevel@tonic-gate 	head = NULL;
12800Sstevel@tonic-gate 
12811885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
12820Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
12830Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
12840Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
12850Sstevel@tonic-gate 		if (error)
12860Sstevel@tonic-gate 			goto done;
12870Sstevel@tonic-gate 		deadhead = 1;
12880Sstevel@tonic-gate 		head->lio_nent = nent;
12890Sstevel@tonic-gate 		head->lio_refcnt = nent;
12901885Sraf 		head->lio_port = -1;
12911885Sraf 		head->lio_portkev = NULL;
12921885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
12931885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
12940Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
12950Sstevel@tonic-gate 			if (sqp == NULL) {
12960Sstevel@tonic-gate 				error = EAGAIN;
12970Sstevel@tonic-gate 				goto done;
12980Sstevel@tonic-gate 			}
12990Sstevel@tonic-gate 			sqp->sq_func = NULL;
13000Sstevel@tonic-gate 			sqp->sq_next = NULL;
13010Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
13020Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
13030Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
13040Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
13050Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
13060Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
13070Sstevel@tonic-gate 			sqp->sq_info.si_value = sigevk.sigev_value;
13080Sstevel@tonic-gate 			head->lio_sigqp = sqp;
13090Sstevel@tonic-gate 		} else {
13100Sstevel@tonic-gate 			head->lio_sigqp = NULL;
13110Sstevel@tonic-gate 		}
13121885Sraf 		if (pkevtp) {
13131885Sraf 			/*
13141885Sraf 			 * Prepare data to send when list of aiocb's
13151885Sraf 			 * has completed.
13161885Sraf 			 */
13171885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
13181885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
13191885Sraf 			    NULL, head);
13201885Sraf 			pkevtp->portkev_events = AIOLIO;
13211885Sraf 			head->lio_portkev = pkevtp;
13221885Sraf 			head->lio_port = pnotify.portnfy_port;
13231885Sraf 		}
13240Sstevel@tonic-gate 	}
13250Sstevel@tonic-gate 
13260Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
13270Sstevel@tonic-gate 
13280Sstevel@tonic-gate 		cbp = *ucbp;
13290Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
13301885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
13310Sstevel@tonic-gate 			if (head) {
13320Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13330Sstevel@tonic-gate 				head->lio_nent--;
13340Sstevel@tonic-gate 				head->lio_refcnt--;
13350Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13360Sstevel@tonic-gate 			}
13370Sstevel@tonic-gate 			continue;
13380Sstevel@tonic-gate 		}
13390Sstevel@tonic-gate 
13400Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
13410Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
13420Sstevel@tonic-gate 		if (mode == LIO_NOP) {
13430Sstevel@tonic-gate 			cbp = NULL;
13440Sstevel@tonic-gate 			if (head) {
13450Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13460Sstevel@tonic-gate 				head->lio_nent--;
13470Sstevel@tonic-gate 				head->lio_refcnt--;
13480Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13490Sstevel@tonic-gate 			}
13500Sstevel@tonic-gate 			continue;
13510Sstevel@tonic-gate 		}
13520Sstevel@tonic-gate 
13530Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
13540Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
13550Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13560Sstevel@tonic-gate 			if (head) {
13570Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13580Sstevel@tonic-gate 				head->lio_nent--;
13590Sstevel@tonic-gate 				head->lio_refcnt--;
13600Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13610Sstevel@tonic-gate 			}
13620Sstevel@tonic-gate 			aio_errors++;
13630Sstevel@tonic-gate 			continue;
13640Sstevel@tonic-gate 		}
13650Sstevel@tonic-gate 
13660Sstevel@tonic-gate 		/*
13670Sstevel@tonic-gate 		 * check the permission of the partition
13680Sstevel@tonic-gate 		 */
13690Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
13700Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
13710Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
13720Sstevel@tonic-gate 			if (head) {
13730Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
13740Sstevel@tonic-gate 				head->lio_nent--;
13750Sstevel@tonic-gate 				head->lio_refcnt--;
13760Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
13770Sstevel@tonic-gate 			}
13780Sstevel@tonic-gate 			aio_errors++;
13790Sstevel@tonic-gate 			continue;
13800Sstevel@tonic-gate 		}
13810Sstevel@tonic-gate 
13820Sstevel@tonic-gate 		/*
13831885Sraf 		 * common case where requests are to the same fd
13841885Sraf 		 * for the same r/w operation.
13850Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
13860Sstevel@tonic-gate 		 */
13871885Sraf 		vp = fp->f_vnode;
13881885Sraf 		if (fp != prev_fp || mode != prev_mode) {
13890Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
13900Sstevel@tonic-gate 			if (aio_func == NULL) {
13910Sstevel@tonic-gate 				prev_fp = NULL;
13920Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
13930Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
13940Sstevel@tonic-gate 				aio_notsupported++;
13950Sstevel@tonic-gate 				if (head) {
13960Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
13970Sstevel@tonic-gate 					head->lio_nent--;
13980Sstevel@tonic-gate 					head->lio_refcnt--;
13990Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
14000Sstevel@tonic-gate 				}
14010Sstevel@tonic-gate 				continue;
14020Sstevel@tonic-gate 			} else {
14030Sstevel@tonic-gate 				prev_fp = fp;
14040Sstevel@tonic-gate 				prev_mode = mode;
14050Sstevel@tonic-gate 			}
14060Sstevel@tonic-gate 		}
14070Sstevel@tonic-gate 
14081885Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
14091885Sraf 		    &cbp->aio_resultp, vp);
14101885Sraf 		if (error) {
14110Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14120Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14130Sstevel@tonic-gate 			if (head) {
14140Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14150Sstevel@tonic-gate 				head->lio_nent--;
14160Sstevel@tonic-gate 				head->lio_refcnt--;
14170Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14180Sstevel@tonic-gate 			}
14190Sstevel@tonic-gate 			aio_errors++;
14200Sstevel@tonic-gate 			continue;
14210Sstevel@tonic-gate 		}
14220Sstevel@tonic-gate 
14230Sstevel@tonic-gate 		reqp->aio_req_lio = head;
14240Sstevel@tonic-gate 		deadhead = 0;
14250Sstevel@tonic-gate 
14260Sstevel@tonic-gate 		/*
14270Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
14280Sstevel@tonic-gate 		 * the driver to avoid a race condition
14290Sstevel@tonic-gate 		 */
14300Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
14310Sstevel@tonic-gate 		    EINPROGRESS);
14320Sstevel@tonic-gate 
14330Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = (caddr_t)cbp;
14340Sstevel@tonic-gate 
14351885Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
14361885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
14371885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
14381885Sraf 		if (aio_port | aio_thread) {
14391885Sraf 			port_kevent_t *lpkevp;
14401885Sraf 			/*
14411885Sraf 			 * Prepare data to send with each aiocb completed.
14421885Sraf 			 */
14431885Sraf 			if (aio_port) {
14441885Sraf 				void *paddr =
14451885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
14461885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
14471885Sraf 					error = EFAULT;
14481885Sraf 			} else {	/* aio_thread */
14491885Sraf 				pnotify.portnfy_port =
14501885Sraf 				    aiocb->aio_sigevent.sigev_signo;
14511885Sraf 				pnotify.portnfy_user =
14521885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
14531885Sraf 			}
14541885Sraf 			if (error)
14551885Sraf 				/* EMPTY */;
14561885Sraf 			else if (pkevtp != NULL &&
14571885Sraf 			    pnotify.portnfy_port == lio_head_port)
14581885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
14591885Sraf 				    PORT_ALLOC_DEFAULT);
14601885Sraf 			else
14611885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
14621885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
14631885Sraf 				    &lpkevp);
14641885Sraf 			if (error == 0) {
14651885Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
14661885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
14671885Sraf 				    aio_port_callback, reqp);
14681885Sraf 				lpkevp->portkev_events = event;
14691885Sraf 				reqp->aio_req_portkev = lpkevp;
14701885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
14711885Sraf 			}
14720Sstevel@tonic-gate 		}
14730Sstevel@tonic-gate 
14740Sstevel@tonic-gate 		/*
14750Sstevel@tonic-gate 		 * send the request to driver.
14760Sstevel@tonic-gate 		 */
14770Sstevel@tonic-gate 		if (error == 0) {
14780Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
14790Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
14800Sstevel@tonic-gate 				aio_zerolen(reqp);
14810Sstevel@tonic-gate 				continue;
14820Sstevel@tonic-gate 			}
14830Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
14840Sstevel@tonic-gate 			    CRED());
14850Sstevel@tonic-gate 		}
14861885Sraf 
14870Sstevel@tonic-gate 		/*
14880Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
14890Sstevel@tonic-gate 		 * completed unless there was an error.
14900Sstevel@tonic-gate 		 */
14910Sstevel@tonic-gate 		if (error) {
14920Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
14930Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
14940Sstevel@tonic-gate 			if (head) {
14950Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
14960Sstevel@tonic-gate 				head->lio_nent--;
14970Sstevel@tonic-gate 				head->lio_refcnt--;
14980Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
14990Sstevel@tonic-gate 			}
15000Sstevel@tonic-gate 			if (error == ENOTSUP)
15010Sstevel@tonic-gate 				aio_notsupported++;
15020Sstevel@tonic-gate 			else
15030Sstevel@tonic-gate 				aio_errors++;
15044502Spraks 			lio_set_error(reqp, portused);
15050Sstevel@tonic-gate 		} else {
15060Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
15070Sstevel@tonic-gate 		}
15080Sstevel@tonic-gate 	}
15090Sstevel@tonic-gate 
15100Sstevel@tonic-gate 	if (aio_notsupported) {
15110Sstevel@tonic-gate 		error = ENOTSUP;
15120Sstevel@tonic-gate 	} else if (aio_errors) {
15130Sstevel@tonic-gate 		/*
15140Sstevel@tonic-gate 		 * return EIO if any request failed
15150Sstevel@tonic-gate 		 */
15160Sstevel@tonic-gate 		error = EIO;
15170Sstevel@tonic-gate 	}
15180Sstevel@tonic-gate 
15190Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
15200Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
15210Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
15220Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
15230Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
15240Sstevel@tonic-gate 				error = EINTR;
15250Sstevel@tonic-gate 				goto done;
15260Sstevel@tonic-gate 			}
15270Sstevel@tonic-gate 		}
15280Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
15290Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
15300Sstevel@tonic-gate 	}
15310Sstevel@tonic-gate 
15320Sstevel@tonic-gate done:
15330Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
15340Sstevel@tonic-gate 	if (deadhead) {
15350Sstevel@tonic-gate 		if (head->lio_sigqp)
15360Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
15371885Sraf 		if (head->lio_portkev)
15381885Sraf 			port_free_event(head->lio_portkev);
15390Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
15400Sstevel@tonic-gate 	}
15410Sstevel@tonic-gate 	return (error);
15420Sstevel@tonic-gate }
15430Sstevel@tonic-gate 
15440Sstevel@tonic-gate #endif /* _LP64 */
15450Sstevel@tonic-gate 
15460Sstevel@tonic-gate /*
15470Sstevel@tonic-gate  * Asynchronous list IO.
15480Sstevel@tonic-gate  * If list I/O is called with LIO_WAIT it can still return
15490Sstevel@tonic-gate  * before all the I/O's are completed if a signal is caught
15500Sstevel@tonic-gate  * or if the list include UFS I/O requests. If this happens,
15510Sstevel@tonic-gate  * libaio will call aliowait() to wait for the I/O's to
15520Sstevel@tonic-gate  * complete
15530Sstevel@tonic-gate  */
15540Sstevel@tonic-gate /*ARGSUSED*/
15550Sstevel@tonic-gate static int
15560Sstevel@tonic-gate aliowait(
15570Sstevel@tonic-gate 	int	mode,
15580Sstevel@tonic-gate 	void	*aiocb,
15590Sstevel@tonic-gate 	int	nent,
15600Sstevel@tonic-gate 	void	*sigev,
15610Sstevel@tonic-gate 	int	run_mode)
15620Sstevel@tonic-gate {
15630Sstevel@tonic-gate 	aio_lio_t	*head;
15640Sstevel@tonic-gate 	aio_t		*aiop;
15650Sstevel@tonic-gate 	caddr_t		cbplist;
15660Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
15670Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15680Sstevel@tonic-gate 	aiocb32_t	*cbp32;
15690Sstevel@tonic-gate 	caddr32_t	*ucbp32;
15700Sstevel@tonic-gate 	aiocb64_32_t	*cbp64;
15710Sstevel@tonic-gate #endif
15720Sstevel@tonic-gate 	int		error = 0;
15730Sstevel@tonic-gate 	int		i;
15740Sstevel@tonic-gate 	size_t		ssize = 0;
15750Sstevel@tonic-gate 	model_t		model = get_udatamodel();
15760Sstevel@tonic-gate 
15770Sstevel@tonic-gate 	aiop = curproc->p_aio;
15780Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
15790Sstevel@tonic-gate 		return (EINVAL);
15800Sstevel@tonic-gate 
15810Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15820Sstevel@tonic-gate 		ssize = (sizeof (aiocb_t *) * nent);
15830Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15840Sstevel@tonic-gate 	else
15850Sstevel@tonic-gate 		ssize = (sizeof (caddr32_t) * nent);
15860Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15870Sstevel@tonic-gate 
15880Sstevel@tonic-gate 	if (ssize == 0)
15890Sstevel@tonic-gate 		return (EINVAL);
15900Sstevel@tonic-gate 
15910Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
15920Sstevel@tonic-gate 
15930Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
15940Sstevel@tonic-gate 		ucbp = (aiocb_t **)cbplist;
15950Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
15960Sstevel@tonic-gate 	else
15970Sstevel@tonic-gate 		ucbp32 = (caddr32_t *)cbplist;
15980Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
15990Sstevel@tonic-gate 
16000Sstevel@tonic-gate 	if (copyin(aiocb, cbplist, ssize)) {
16010Sstevel@tonic-gate 		error = EFAULT;
16020Sstevel@tonic-gate 		goto done;
16030Sstevel@tonic-gate 	}
16040Sstevel@tonic-gate 
16050Sstevel@tonic-gate 	/*
16060Sstevel@tonic-gate 	 * To find the list head, we go through the
16070Sstevel@tonic-gate 	 * list of aiocb structs, find the request
16080Sstevel@tonic-gate 	 * its for, then get the list head that reqp
16090Sstevel@tonic-gate 	 * points to
16100Sstevel@tonic-gate 	 */
16110Sstevel@tonic-gate 	head = NULL;
16120Sstevel@tonic-gate 
16130Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
16140Sstevel@tonic-gate 		if (model == DATAMODEL_NATIVE) {
16150Sstevel@tonic-gate 			/*
16160Sstevel@tonic-gate 			 * Since we are only checking for a NULL pointer
16170Sstevel@tonic-gate 			 * Following should work on both native data sizes
16180Sstevel@tonic-gate 			 * as well as for largefile aiocb.
16190Sstevel@tonic-gate 			 */
16200Sstevel@tonic-gate 			if ((cbp = *ucbp++) == NULL)
16210Sstevel@tonic-gate 				continue;
16220Sstevel@tonic-gate 			if (run_mode != AIO_LARGEFILE)
16230Sstevel@tonic-gate 				if (head = aio_list_get(&cbp->aio_resultp))
16240Sstevel@tonic-gate 					break;
16250Sstevel@tonic-gate 			else {
16260Sstevel@tonic-gate 				/*
16270Sstevel@tonic-gate 				 * This is a case when largefile call is
16280Sstevel@tonic-gate 				 * made on 32 bit kernel.
16290Sstevel@tonic-gate 				 * Treat each pointer as pointer to
16300Sstevel@tonic-gate 				 * aiocb64_32
16310Sstevel@tonic-gate 				 */
16320Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16330Sstevel@tonic-gate 				    &(((aiocb64_32_t *)cbp)->aio_resultp)))
16340Sstevel@tonic-gate 					break;
16350Sstevel@tonic-gate 			}
16360Sstevel@tonic-gate 		}
16370Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
16380Sstevel@tonic-gate 		else {
16390Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
16400Sstevel@tonic-gate 				if ((cbp64 = (aiocb64_32_t *)
16410Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16420Sstevel@tonic-gate 					continue;
16430Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16440Sstevel@tonic-gate 				    &cbp64->aio_resultp))
16450Sstevel@tonic-gate 					break;
16460Sstevel@tonic-gate 			} else if (run_mode == AIO_32) {
16470Sstevel@tonic-gate 				if ((cbp32 = (aiocb32_t *)
16480Sstevel@tonic-gate 				    (uintptr_t)*ucbp32++) == NULL)
16490Sstevel@tonic-gate 					continue;
16500Sstevel@tonic-gate 				if (head = aio_list_get((aio_result_t *)
16510Sstevel@tonic-gate 				    &cbp32->aio_resultp))
16520Sstevel@tonic-gate 					break;
16530Sstevel@tonic-gate 			}
16540Sstevel@tonic-gate 		}
16550Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
16560Sstevel@tonic-gate 	}
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate 	if (head == NULL) {
16590Sstevel@tonic-gate 		error = EINVAL;
16600Sstevel@tonic-gate 		goto done;
16610Sstevel@tonic-gate 	}
16620Sstevel@tonic-gate 
16630Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
16640Sstevel@tonic-gate 	while (head->lio_refcnt > 0) {
16650Sstevel@tonic-gate 		if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
16660Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
16670Sstevel@tonic-gate 			error = EINTR;
16680Sstevel@tonic-gate 			goto done;
16690Sstevel@tonic-gate 		}
16700Sstevel@tonic-gate 	}
16710Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
16720Sstevel@tonic-gate 	alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
16730Sstevel@tonic-gate done:
16740Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
16750Sstevel@tonic-gate 	return (error);
16760Sstevel@tonic-gate }
16770Sstevel@tonic-gate 
16780Sstevel@tonic-gate aio_lio_t *
16790Sstevel@tonic-gate aio_list_get(aio_result_t *resultp)
16800Sstevel@tonic-gate {
16810Sstevel@tonic-gate 	aio_lio_t	*head = NULL;
16820Sstevel@tonic-gate 	aio_t		*aiop;
16830Sstevel@tonic-gate 	aio_req_t 	**bucket;
16840Sstevel@tonic-gate 	aio_req_t 	*reqp;
16850Sstevel@tonic-gate 	long		index;
16860Sstevel@tonic-gate 
16870Sstevel@tonic-gate 	aiop = curproc->p_aio;
16880Sstevel@tonic-gate 	if (aiop == NULL)
16890Sstevel@tonic-gate 		return (NULL);
16900Sstevel@tonic-gate 
16910Sstevel@tonic-gate 	if (resultp) {
16920Sstevel@tonic-gate 		index = AIO_HASH(resultp);
16930Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
16940Sstevel@tonic-gate 		for (reqp = *bucket; reqp != NULL;
16950Sstevel@tonic-gate 		    reqp = reqp->aio_hash_next) {
16960Sstevel@tonic-gate 			if (reqp->aio_req_resultp == resultp) {
16970Sstevel@tonic-gate 				head = reqp->aio_req_lio;
16980Sstevel@tonic-gate 				return (head);
16990Sstevel@tonic-gate 			}
17000Sstevel@tonic-gate 		}
17010Sstevel@tonic-gate 	}
17020Sstevel@tonic-gate 	return (NULL);
17030Sstevel@tonic-gate }
17040Sstevel@tonic-gate 
17050Sstevel@tonic-gate 
17060Sstevel@tonic-gate static void
17070Sstevel@tonic-gate lio_set_uerror(void *resultp, int error)
17080Sstevel@tonic-gate {
17090Sstevel@tonic-gate 	/*
17100Sstevel@tonic-gate 	 * the resultp field is a pointer to where the
17110Sstevel@tonic-gate 	 * error should be written out to the user's
17120Sstevel@tonic-gate 	 * aiocb.
17130Sstevel@tonic-gate 	 *
17140Sstevel@tonic-gate 	 */
17150Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
17160Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return,
17170Sstevel@tonic-gate 		    (ssize_t)-1);
17180Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
17190Sstevel@tonic-gate 	}
17200Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17210Sstevel@tonic-gate 	else {
17220Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
17230Sstevel@tonic-gate 		    (uint_t)-1);
17240Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
17250Sstevel@tonic-gate 	}
17260Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17270Sstevel@tonic-gate }
17280Sstevel@tonic-gate 
17290Sstevel@tonic-gate /*
17300Sstevel@tonic-gate  * do cleanup completion for all requests in list. memory for
17310Sstevel@tonic-gate  * each request is also freed.
17320Sstevel@tonic-gate  */
17330Sstevel@tonic-gate static void
17340Sstevel@tonic-gate alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
17350Sstevel@tonic-gate {
17360Sstevel@tonic-gate 	int i;
17370Sstevel@tonic-gate 	aio_req_t *reqp;
17380Sstevel@tonic-gate 	aio_result_t *resultp;
17391885Sraf 	aiocb64_32_t *aiocb_64;
17400Sstevel@tonic-gate 
17410Sstevel@tonic-gate 	for (i = 0; i < nent; i++) {
17420Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
17430Sstevel@tonic-gate 			if (cbp[i] == NULL)
17440Sstevel@tonic-gate 				continue;
17450Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE) {
17460Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)cbp[i];
17471885Sraf 				resultp = (aio_result_t *)
17481885Sraf 				    &aiocb_64->aio_resultp;
17490Sstevel@tonic-gate 			} else
17500Sstevel@tonic-gate 				resultp = &cbp[i]->aio_resultp;
17510Sstevel@tonic-gate 		}
17520Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
17530Sstevel@tonic-gate 		else {
17541885Sraf 			aiocb32_t *aiocb_32;
17551885Sraf 			caddr32_t *cbp32;
17560Sstevel@tonic-gate 
17570Sstevel@tonic-gate 			cbp32 = (caddr32_t *)cbp;
17580Sstevel@tonic-gate 			if (cbp32[i] == NULL)
17590Sstevel@tonic-gate 				continue;
17600Sstevel@tonic-gate 			if (run_mode == AIO_32) {
17610Sstevel@tonic-gate 				aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
17620Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_32->
17630Sstevel@tonic-gate 				    aio_resultp;
17640Sstevel@tonic-gate 			} else if (run_mode == AIO_LARGEFILE) {
17650Sstevel@tonic-gate 				aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
17660Sstevel@tonic-gate 				resultp = (aio_result_t *)&aiocb_64->
17670Sstevel@tonic-gate 				    aio_resultp;
17680Sstevel@tonic-gate 			}
17690Sstevel@tonic-gate 		}
17700Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
17710Sstevel@tonic-gate 		/*
17720Sstevel@tonic-gate 		 * we need to get the aio_cleanupq_mutex since we call
17730Sstevel@tonic-gate 		 * aio_req_done().
17740Sstevel@tonic-gate 		 */
17750Sstevel@tonic-gate 		mutex_enter(&aiop->aio_cleanupq_mutex);
17760Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
17770Sstevel@tonic-gate 		reqp = aio_req_done(resultp);
17780Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
17790Sstevel@tonic-gate 		mutex_exit(&aiop->aio_cleanupq_mutex);
17800Sstevel@tonic-gate 		if (reqp != NULL) {
17810Sstevel@tonic-gate 			aphysio_unlock(reqp);
17820Sstevel@tonic-gate 			aio_copyout_result(reqp);
17830Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
17840Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
17850Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
17860Sstevel@tonic-gate 		}
17870Sstevel@tonic-gate 	}
17880Sstevel@tonic-gate }
17890Sstevel@tonic-gate 
17900Sstevel@tonic-gate /*
17911885Sraf  * Write out the results for an aio request that is done.
17920Sstevel@tonic-gate  */
17930Sstevel@tonic-gate static int
17940Sstevel@tonic-gate aioerror(void *cb, int run_mode)
17950Sstevel@tonic-gate {
17960Sstevel@tonic-gate 	aio_result_t *resultp;
17970Sstevel@tonic-gate 	aio_t *aiop;
17980Sstevel@tonic-gate 	aio_req_t *reqp;
17990Sstevel@tonic-gate 	int retval;
18000Sstevel@tonic-gate 
18010Sstevel@tonic-gate 	aiop = curproc->p_aio;
18020Sstevel@tonic-gate 	if (aiop == NULL || cb == NULL)
18030Sstevel@tonic-gate 		return (EINVAL);
18040Sstevel@tonic-gate 
18050Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
18060Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18070Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18080Sstevel@tonic-gate 			    aio_resultp;
18090Sstevel@tonic-gate 		else
18100Sstevel@tonic-gate 			resultp = &((aiocb_t *)cb)->aio_resultp;
18110Sstevel@tonic-gate 	}
18120Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18130Sstevel@tonic-gate 	else {
18140Sstevel@tonic-gate 		if (run_mode == AIO_LARGEFILE)
18150Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18160Sstevel@tonic-gate 			    aio_resultp;
18170Sstevel@tonic-gate 		else if (run_mode == AIO_32)
18180Sstevel@tonic-gate 			resultp = (aio_result_t *)&((aiocb32_t *)cb)->
18190Sstevel@tonic-gate 			    aio_resultp;
18200Sstevel@tonic-gate 	}
18210Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
18220Sstevel@tonic-gate 	/*
18230Sstevel@tonic-gate 	 * we need to get the aio_cleanupq_mutex since we call
18240Sstevel@tonic-gate 	 * aio_req_find().
18250Sstevel@tonic-gate 	 */
18260Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
18270Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18280Sstevel@tonic-gate 	retval = aio_req_find(resultp, &reqp);
18290Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
18300Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
18310Sstevel@tonic-gate 	if (retval == 0) {
18320Sstevel@tonic-gate 		aphysio_unlock(reqp);
18330Sstevel@tonic-gate 		aio_copyout_result(reqp);
18340Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
18350Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
18360Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
18370Sstevel@tonic-gate 		return (0);
18380Sstevel@tonic-gate 	} else if (retval == 1)
18390Sstevel@tonic-gate 		return (EINPROGRESS);
18400Sstevel@tonic-gate 	else if (retval == 2)
18410Sstevel@tonic-gate 		return (EINVAL);
18420Sstevel@tonic-gate 	return (0);
18430Sstevel@tonic-gate }
18440Sstevel@tonic-gate 
18450Sstevel@tonic-gate /*
18460Sstevel@tonic-gate  * 	aio_cancel - if no requests outstanding,
18470Sstevel@tonic-gate  *			return AIO_ALLDONE
18480Sstevel@tonic-gate  *			else
18490Sstevel@tonic-gate  *			return AIO_NOTCANCELED
18500Sstevel@tonic-gate  */
18510Sstevel@tonic-gate static int
18520Sstevel@tonic-gate aio_cancel(
18530Sstevel@tonic-gate 	int	fildes,
18540Sstevel@tonic-gate 	void 	*cb,
18550Sstevel@tonic-gate 	long	*rval,
18560Sstevel@tonic-gate 	int	run_mode)
18570Sstevel@tonic-gate {
18580Sstevel@tonic-gate 	aio_t *aiop;
18590Sstevel@tonic-gate 	void *resultp;
18600Sstevel@tonic-gate 	int index;
18610Sstevel@tonic-gate 	aio_req_t **bucket;
18620Sstevel@tonic-gate 	aio_req_t *ent;
18630Sstevel@tonic-gate 
18640Sstevel@tonic-gate 
18650Sstevel@tonic-gate 	/*
18660Sstevel@tonic-gate 	 * Verify valid file descriptor
18670Sstevel@tonic-gate 	 */
18680Sstevel@tonic-gate 	if ((getf(fildes)) == NULL) {
18690Sstevel@tonic-gate 		return (EBADF);
18700Sstevel@tonic-gate 	}
18710Sstevel@tonic-gate 	releasef(fildes);
18720Sstevel@tonic-gate 
18730Sstevel@tonic-gate 	aiop = curproc->p_aio;
18740Sstevel@tonic-gate 	if (aiop == NULL)
18750Sstevel@tonic-gate 		return (EINVAL);
18760Sstevel@tonic-gate 
18770Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0) {
18780Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
18790Sstevel@tonic-gate 		return (0);
18800Sstevel@tonic-gate 	}
18810Sstevel@tonic-gate 
18820Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
18830Sstevel@tonic-gate 	if (cb != NULL) {
18840Sstevel@tonic-gate 		if (get_udatamodel() == DATAMODEL_NATIVE) {
18850Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18860Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18870Sstevel@tonic-gate 				    ->aio_resultp;
18880Sstevel@tonic-gate 			else
18890Sstevel@tonic-gate 				resultp = &((aiocb_t *)cb)->aio_resultp;
18900Sstevel@tonic-gate 		}
18910Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
18920Sstevel@tonic-gate 		else {
18930Sstevel@tonic-gate 			if (run_mode == AIO_LARGEFILE)
18940Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18950Sstevel@tonic-gate 				    ->aio_resultp;
18960Sstevel@tonic-gate 			else if (run_mode == AIO_32)
18970Sstevel@tonic-gate 				resultp = (aio_result_t *)&((aiocb32_t *)cb)
18980Sstevel@tonic-gate 				    ->aio_resultp;
18990Sstevel@tonic-gate 		}
19000Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
19010Sstevel@tonic-gate 		index = AIO_HASH(resultp);
19020Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19030Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19040Sstevel@tonic-gate 			if (ent->aio_req_resultp == resultp) {
19050Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) == 0) {
19060Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19070Sstevel@tonic-gate 					*rval = AIO_ALLDONE;
19080Sstevel@tonic-gate 					return (0);
19090Sstevel@tonic-gate 				}
19100Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
19110Sstevel@tonic-gate 				*rval = AIO_NOTCANCELED;
19120Sstevel@tonic-gate 				return (0);
19130Sstevel@tonic-gate 			}
19140Sstevel@tonic-gate 		}
19150Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
19160Sstevel@tonic-gate 		*rval = AIO_ALLDONE;
19170Sstevel@tonic-gate 		return (0);
19180Sstevel@tonic-gate 	}
19190Sstevel@tonic-gate 
19200Sstevel@tonic-gate 	for (index = 0; index < AIO_HASHSZ; index++) {
19210Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
19220Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19230Sstevel@tonic-gate 			if (ent->aio_req_fd == fildes) {
19240Sstevel@tonic-gate 				if ((ent->aio_req_flags & AIO_PENDING) != 0) {
19250Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
19260Sstevel@tonic-gate 					*rval = AIO_NOTCANCELED;
19270Sstevel@tonic-gate 					return (0);
19280Sstevel@tonic-gate 				}
19290Sstevel@tonic-gate 			}
19300Sstevel@tonic-gate 		}
19310Sstevel@tonic-gate 	}
19320Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
19330Sstevel@tonic-gate 	*rval = AIO_ALLDONE;
19340Sstevel@tonic-gate 	return (0);
19350Sstevel@tonic-gate }
19360Sstevel@tonic-gate 
19370Sstevel@tonic-gate /*
19380Sstevel@tonic-gate  * solaris version of asynchronous read and write
19390Sstevel@tonic-gate  */
19400Sstevel@tonic-gate static int
19410Sstevel@tonic-gate arw(
19420Sstevel@tonic-gate 	int	opcode,
19430Sstevel@tonic-gate 	int	fdes,
19440Sstevel@tonic-gate 	char	*bufp,
19450Sstevel@tonic-gate 	int	bufsize,
19460Sstevel@tonic-gate 	offset_t	offset,
19470Sstevel@tonic-gate 	aio_result_t	*resultp,
19480Sstevel@tonic-gate 	int		mode)
19490Sstevel@tonic-gate {
19500Sstevel@tonic-gate 	file_t		*fp;
19510Sstevel@tonic-gate 	int		error;
19520Sstevel@tonic-gate 	struct vnode	*vp;
19530Sstevel@tonic-gate 	aio_req_t	*reqp;
19540Sstevel@tonic-gate 	aio_t		*aiop;
19550Sstevel@tonic-gate 	int		(*aio_func)();
19560Sstevel@tonic-gate #ifdef _LP64
19570Sstevel@tonic-gate 	aiocb_t		aiocb;
19580Sstevel@tonic-gate #else
19590Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
19600Sstevel@tonic-gate #endif
19610Sstevel@tonic-gate 
19620Sstevel@tonic-gate 	aiop = curproc->p_aio;
19630Sstevel@tonic-gate 	if (aiop == NULL)
19640Sstevel@tonic-gate 		return (EINVAL);
19650Sstevel@tonic-gate 
19660Sstevel@tonic-gate 	if ((fp = getf(fdes)) == NULL) {
19670Sstevel@tonic-gate 		return (EBADF);
19680Sstevel@tonic-gate 	}
19690Sstevel@tonic-gate 
19700Sstevel@tonic-gate 	/*
19710Sstevel@tonic-gate 	 * check the permission of the partition
19720Sstevel@tonic-gate 	 */
19730Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
19740Sstevel@tonic-gate 		releasef(fdes);
19750Sstevel@tonic-gate 		return (EBADF);
19760Sstevel@tonic-gate 	}
19770Sstevel@tonic-gate 
19780Sstevel@tonic-gate 	vp = fp->f_vnode;
19790Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
19800Sstevel@tonic-gate 	if (aio_func == NULL) {
19810Sstevel@tonic-gate 		releasef(fdes);
19820Sstevel@tonic-gate 		return (EBADFD);
19830Sstevel@tonic-gate 	}
19840Sstevel@tonic-gate #ifdef _LP64
19850Sstevel@tonic-gate 	aiocb.aio_fildes = fdes;
19860Sstevel@tonic-gate 	aiocb.aio_buf = bufp;
19870Sstevel@tonic-gate 	aiocb.aio_nbytes = bufsize;
19880Sstevel@tonic-gate 	aiocb.aio_offset = offset;
19890Sstevel@tonic-gate 	aiocb.aio_sigevent.sigev_notify = 0;
19901885Sraf 	error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
19910Sstevel@tonic-gate #else
19920Sstevel@tonic-gate 	aiocb64.aio_fildes = fdes;
19930Sstevel@tonic-gate 	aiocb64.aio_buf = (caddr32_t)bufp;
19940Sstevel@tonic-gate 	aiocb64.aio_nbytes = bufsize;
19950Sstevel@tonic-gate 	aiocb64.aio_offset = offset;
19960Sstevel@tonic-gate 	aiocb64.aio_sigevent.sigev_notify = 0;
19971885Sraf 	error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
19980Sstevel@tonic-gate #endif
19990Sstevel@tonic-gate 	if (error) {
20000Sstevel@tonic-gate 		releasef(fdes);
20010Sstevel@tonic-gate 		return (error);
20020Sstevel@tonic-gate 	}
20030Sstevel@tonic-gate 
20040Sstevel@tonic-gate 	/*
20050Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
20060Sstevel@tonic-gate 	 * the AIO poll bit set
20070Sstevel@tonic-gate 	 */
20080Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
20090Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
20100Sstevel@tonic-gate 
20110Sstevel@tonic-gate 	if (bufsize == 0) {
20120Sstevel@tonic-gate 		clear_active_fd(fdes);
20130Sstevel@tonic-gate 		aio_zerolen(reqp);
20140Sstevel@tonic-gate 		return (0);
20150Sstevel@tonic-gate 	}
20160Sstevel@tonic-gate 	/*
20170Sstevel@tonic-gate 	 * send the request to driver.
20180Sstevel@tonic-gate 	 */
20190Sstevel@tonic-gate 	error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
20200Sstevel@tonic-gate 	/*
20210Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
20220Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
20230Sstevel@tonic-gate 	 * completed.
20240Sstevel@tonic-gate 	 */
20250Sstevel@tonic-gate 	if (error) {
20260Sstevel@tonic-gate 		releasef(fdes);
20270Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
20280Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
20290Sstevel@tonic-gate 		aiop->aio_pending--;
20300Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
20310Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
20320Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
20330Sstevel@tonic-gate 		return (error);
20340Sstevel@tonic-gate 	}
20350Sstevel@tonic-gate 	clear_active_fd(fdes);
20360Sstevel@tonic-gate 	return (0);
20370Sstevel@tonic-gate }
20380Sstevel@tonic-gate 
20390Sstevel@tonic-gate /*
20400Sstevel@tonic-gate  * posix version of asynchronous read and write
20410Sstevel@tonic-gate  */
20421885Sraf static int
20430Sstevel@tonic-gate aiorw(
20440Sstevel@tonic-gate 	int		opcode,
20450Sstevel@tonic-gate 	void		*aiocb_arg,
20460Sstevel@tonic-gate 	int		mode,
20470Sstevel@tonic-gate 	int		run_mode)
20480Sstevel@tonic-gate {
20490Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
20500Sstevel@tonic-gate 	aiocb32_t	aiocb32;
20510Sstevel@tonic-gate 	struct	sigevent32 *sigev32;
20520Sstevel@tonic-gate 	port_notify32_t	pntfy32;
20530Sstevel@tonic-gate #endif
20540Sstevel@tonic-gate 	aiocb64_32_t	aiocb64;
20550Sstevel@tonic-gate 	aiocb_t		aiocb;
20560Sstevel@tonic-gate 	file_t		*fp;
20570Sstevel@tonic-gate 	int		error, fd;
20580Sstevel@tonic-gate 	size_t		bufsize;
20590Sstevel@tonic-gate 	struct vnode	*vp;
20600Sstevel@tonic-gate 	aio_req_t	*reqp;
20610Sstevel@tonic-gate 	aio_t		*aiop;
20620Sstevel@tonic-gate 	int		(*aio_func)();
20630Sstevel@tonic-gate 	aio_result_t	*resultp;
20640Sstevel@tonic-gate 	struct	sigevent *sigev;
20650Sstevel@tonic-gate 	model_t		model;
20660Sstevel@tonic-gate 	int		aio_use_port = 0;
20670Sstevel@tonic-gate 	port_notify_t	pntfy;
20680Sstevel@tonic-gate 
20690Sstevel@tonic-gate 	model = get_udatamodel();
20700Sstevel@tonic-gate 	aiop = curproc->p_aio;
20710Sstevel@tonic-gate 	if (aiop == NULL)
20720Sstevel@tonic-gate 		return (EINVAL);
20730Sstevel@tonic-gate 
20740Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
20750Sstevel@tonic-gate 		if (run_mode != AIO_LARGEFILE) {
20760Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
20770Sstevel@tonic-gate 				return (EFAULT);
20780Sstevel@tonic-gate 			bufsize = aiocb.aio_nbytes;
20790Sstevel@tonic-gate 			resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
20800Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
20810Sstevel@tonic-gate 				return (EBADF);
20820Sstevel@tonic-gate 			}
20830Sstevel@tonic-gate 			sigev = &aiocb.aio_sigevent;
20840Sstevel@tonic-gate 		} else {
20850Sstevel@tonic-gate 			/*
20860Sstevel@tonic-gate 			 * We come here only when we make largefile
20870Sstevel@tonic-gate 			 * call on 32 bit kernel using 32 bit library.
20880Sstevel@tonic-gate 			 */
20890Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
20900Sstevel@tonic-gate 				return (EFAULT);
20910Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
20920Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
20930Sstevel@tonic-gate 			    ->aio_resultp);
20941885Sraf 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
20950Sstevel@tonic-gate 				return (EBADF);
20960Sstevel@tonic-gate 			sigev = (struct sigevent *)&aiocb64.aio_sigevent;
20970Sstevel@tonic-gate 		}
20980Sstevel@tonic-gate 
20990Sstevel@tonic-gate 		if (sigev->sigev_notify == SIGEV_PORT) {
21000Sstevel@tonic-gate 			if (copyin((void *)sigev->sigev_value.sival_ptr,
21010Sstevel@tonic-gate 			    &pntfy, sizeof (port_notify_t))) {
21020Sstevel@tonic-gate 				releasef(fd);
21030Sstevel@tonic-gate 				return (EFAULT);
21040Sstevel@tonic-gate 			}
21050Sstevel@tonic-gate 			aio_use_port = 1;
21061885Sraf 		} else if (sigev->sigev_notify == SIGEV_THREAD) {
21071885Sraf 			pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
21081885Sraf 			pntfy.portnfy_user =
21091885Sraf 			    aiocb.aio_sigevent.sigev_value.sival_ptr;
21101885Sraf 			aio_use_port = 1;
21110Sstevel@tonic-gate 		}
21120Sstevel@tonic-gate 	}
21130Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
21140Sstevel@tonic-gate 	else {
21150Sstevel@tonic-gate 		if (run_mode == AIO_32) {
21160Sstevel@tonic-gate 			/* 32 bit system call is being made on 64 bit kernel */
21170Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
21180Sstevel@tonic-gate 				return (EFAULT);
21190Sstevel@tonic-gate 
21200Sstevel@tonic-gate 			bufsize = aiocb32.aio_nbytes;
21210Sstevel@tonic-gate 			aiocb_32ton(&aiocb32, &aiocb);
21220Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
21230Sstevel@tonic-gate 			    aio_resultp);
21240Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
21250Sstevel@tonic-gate 				return (EBADF);
21260Sstevel@tonic-gate 			}
21270Sstevel@tonic-gate 			sigev32 = &aiocb32.aio_sigevent;
21280Sstevel@tonic-gate 		} else if (run_mode == AIO_LARGEFILE) {
21290Sstevel@tonic-gate 			/*
21300Sstevel@tonic-gate 			 * We come here only when we make largefile
21310Sstevel@tonic-gate 			 * call on 64 bit kernel using 32 bit library.
21320Sstevel@tonic-gate 			 */
21330Sstevel@tonic-gate 			if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
21340Sstevel@tonic-gate 				return (EFAULT);
21350Sstevel@tonic-gate 			bufsize = aiocb64.aio_nbytes;
21360Sstevel@tonic-gate 			aiocb_LFton(&aiocb64, &aiocb);
21370Sstevel@tonic-gate 			resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
21380Sstevel@tonic-gate 			    ->aio_resultp);
21390Sstevel@tonic-gate 			if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
21400Sstevel@tonic-gate 				return (EBADF);
21410Sstevel@tonic-gate 			sigev32 = &aiocb64.aio_sigevent;
21420Sstevel@tonic-gate 		}
21430Sstevel@tonic-gate 
21440Sstevel@tonic-gate 		if (sigev32->sigev_notify == SIGEV_PORT) {
21450Sstevel@tonic-gate 			if (copyin(
21460Sstevel@tonic-gate 			    (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
21470Sstevel@tonic-gate 			    &pntfy32, sizeof (port_notify32_t))) {
21480Sstevel@tonic-gate 				releasef(fd);
21490Sstevel@tonic-gate 				return (EFAULT);
21500Sstevel@tonic-gate 			}
21510Sstevel@tonic-gate 			pntfy.portnfy_port = pntfy32.portnfy_port;
21521885Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
21531885Sraf 			    pntfy32.portnfy_user;
21541885Sraf 			aio_use_port = 1;
21551885Sraf 		} else if (sigev32->sigev_notify == SIGEV_THREAD) {
21561885Sraf 			pntfy.portnfy_port = sigev32->sigev_signo;
21571885Sraf 			pntfy.portnfy_user = (void *)(uintptr_t)
21581885Sraf 			    sigev32->sigev_value.sival_ptr;
21590Sstevel@tonic-gate 			aio_use_port = 1;
21600Sstevel@tonic-gate 		}
21610Sstevel@tonic-gate 	}
21620Sstevel@tonic-gate #endif  /* _SYSCALL32_IMPL */
21630Sstevel@tonic-gate 
21640Sstevel@tonic-gate 	/*
21650Sstevel@tonic-gate 	 * check the permission of the partition
21660Sstevel@tonic-gate 	 */
21670Sstevel@tonic-gate 
21680Sstevel@tonic-gate 	if ((fp->f_flag & mode) == 0) {
21690Sstevel@tonic-gate 		releasef(fd);
21700Sstevel@tonic-gate 		return (EBADF);
21710Sstevel@tonic-gate 	}
21720Sstevel@tonic-gate 
21730Sstevel@tonic-gate 	vp = fp->f_vnode;
21740Sstevel@tonic-gate 	aio_func = check_vp(vp, mode);
21750Sstevel@tonic-gate 	if (aio_func == NULL) {
21760Sstevel@tonic-gate 		releasef(fd);
21770Sstevel@tonic-gate 		return (EBADFD);
21780Sstevel@tonic-gate 	}
21791885Sraf 	if (run_mode == AIO_LARGEFILE)
21801885Sraf 		error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp);
21810Sstevel@tonic-gate 	else
21821885Sraf 		error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp);
21830Sstevel@tonic-gate 
21840Sstevel@tonic-gate 	if (error) {
21850Sstevel@tonic-gate 		releasef(fd);
21860Sstevel@tonic-gate 		return (error);
21870Sstevel@tonic-gate 	}
21880Sstevel@tonic-gate 	/*
21890Sstevel@tonic-gate 	 * enable polling on this request if the opcode has
21900Sstevel@tonic-gate 	 * the AIO poll bit set
21910Sstevel@tonic-gate 	 */
21920Sstevel@tonic-gate 	if (opcode & AIO_POLL_BIT)
21930Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_POLL;
21940Sstevel@tonic-gate 
21950Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE)
21960Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb = aiocb_arg;
21970Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
21980Sstevel@tonic-gate 	else
21990Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
22000Sstevel@tonic-gate #endif
22010Sstevel@tonic-gate 
22021885Sraf 	if (aio_use_port) {
22031885Sraf 		int event = (run_mode == AIO_LARGEFILE)?
22041885Sraf 		    ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
22051885Sraf 		    ((mode == FREAD)? AIOAREAD : AIOAWRITE);
22061885Sraf 		error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
22071885Sraf 	}
22080Sstevel@tonic-gate 
22090Sstevel@tonic-gate 	/*
22100Sstevel@tonic-gate 	 * send the request to driver.
22110Sstevel@tonic-gate 	 */
22120Sstevel@tonic-gate 	if (error == 0) {
22130Sstevel@tonic-gate 		if (bufsize == 0) {
22140Sstevel@tonic-gate 			clear_active_fd(fd);
22150Sstevel@tonic-gate 			aio_zerolen(reqp);
22160Sstevel@tonic-gate 			return (0);
22170Sstevel@tonic-gate 		}
22180Sstevel@tonic-gate 		error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
22190Sstevel@tonic-gate 	}
22200Sstevel@tonic-gate 
22210Sstevel@tonic-gate 	/*
22220Sstevel@tonic-gate 	 * the fd is stored in the aio_req_t by aio_req_setup(), and
22230Sstevel@tonic-gate 	 * is released by the aio_cleanup_thread() when the IO has
22240Sstevel@tonic-gate 	 * completed.
22250Sstevel@tonic-gate 	 */
22260Sstevel@tonic-gate 	if (error) {
22270Sstevel@tonic-gate 		releasef(fd);
22280Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
22294502Spraks 		if (aio_use_port)
22304502Spraks 			aio_deq(&aiop->aio_portpending, reqp);
22310Sstevel@tonic-gate 		aio_req_free(aiop, reqp);
22320Sstevel@tonic-gate 		aiop->aio_pending--;
22330Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_REQ_BLOCK)
22340Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
22350Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
22360Sstevel@tonic-gate 		return (error);
22370Sstevel@tonic-gate 	}
22380Sstevel@tonic-gate 	clear_active_fd(fd);
22390Sstevel@tonic-gate 	return (0);
22400Sstevel@tonic-gate }
22410Sstevel@tonic-gate 
22420Sstevel@tonic-gate 
22430Sstevel@tonic-gate /*
22440Sstevel@tonic-gate  * set error for a list IO entry that failed.
22450Sstevel@tonic-gate  */
22460Sstevel@tonic-gate static void
22474502Spraks lio_set_error(aio_req_t *reqp, int portused)
22480Sstevel@tonic-gate {
22490Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22500Sstevel@tonic-gate 
22510Sstevel@tonic-gate 	if (aiop == NULL)
22520Sstevel@tonic-gate 		return;
22530Sstevel@tonic-gate 
22540Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
22554502Spraks 	if (portused)
22564502Spraks 		aio_deq(&aiop->aio_portpending, reqp);
22570Sstevel@tonic-gate 	aiop->aio_pending--;
22580Sstevel@tonic-gate 	/* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
22590Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PHYSIODONE;
22600Sstevel@tonic-gate 	/*
22610Sstevel@tonic-gate 	 * Need to free the request now as its never
22620Sstevel@tonic-gate 	 * going to get on the done queue
22630Sstevel@tonic-gate 	 *
22640Sstevel@tonic-gate 	 * Note: aio_outstanding is decremented in
22650Sstevel@tonic-gate 	 *	 aio_req_free()
22660Sstevel@tonic-gate 	 */
22670Sstevel@tonic-gate 	aio_req_free(aiop, reqp);
22680Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK)
22690Sstevel@tonic-gate 		cv_signal(&aiop->aio_cleanupcv);
22700Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
22710Sstevel@tonic-gate }
22720Sstevel@tonic-gate 
22730Sstevel@tonic-gate /*
22740Sstevel@tonic-gate  * check if a specified request is done, and remove it from
22750Sstevel@tonic-gate  * the done queue. otherwise remove anybody from the done queue
22760Sstevel@tonic-gate  * if NULL is specified.
22770Sstevel@tonic-gate  */
22780Sstevel@tonic-gate static aio_req_t *
22790Sstevel@tonic-gate aio_req_done(void *resultp)
22800Sstevel@tonic-gate {
22810Sstevel@tonic-gate 	aio_req_t **bucket;
22820Sstevel@tonic-gate 	aio_req_t *ent;
22830Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
22840Sstevel@tonic-gate 	long index;
22850Sstevel@tonic-gate 
22860Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
22870Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
22880Sstevel@tonic-gate 
22890Sstevel@tonic-gate 	if (resultp) {
22900Sstevel@tonic-gate 		index = AIO_HASH(resultp);
22910Sstevel@tonic-gate 		bucket = &aiop->aio_hash[index];
22920Sstevel@tonic-gate 		for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
22930Sstevel@tonic-gate 			if (ent->aio_req_resultp == (aio_result_t *)resultp) {
22940Sstevel@tonic-gate 				if (ent->aio_req_flags & AIO_DONEQ) {
22950Sstevel@tonic-gate 					return (aio_req_remove(ent));
22960Sstevel@tonic-gate 				}
22970Sstevel@tonic-gate 				return (NULL);
22980Sstevel@tonic-gate 			}
22990Sstevel@tonic-gate 		}
23000Sstevel@tonic-gate 		/* no match, resultp is invalid */
23010Sstevel@tonic-gate 		return (NULL);
23020Sstevel@tonic-gate 	}
23030Sstevel@tonic-gate 	return (aio_req_remove(NULL));
23040Sstevel@tonic-gate }
23050Sstevel@tonic-gate 
23060Sstevel@tonic-gate /*
23070Sstevel@tonic-gate  * determine if a user-level resultp pointer is associated with an
23080Sstevel@tonic-gate  * active IO request. Zero is returned when the request is done,
23090Sstevel@tonic-gate  * and the request is removed from the done queue. Only when the
23100Sstevel@tonic-gate  * return value is zero, is the "reqp" pointer valid. One is returned
23110Sstevel@tonic-gate  * when the request is inprogress. Two is returned when the request
23120Sstevel@tonic-gate  * is invalid.
23130Sstevel@tonic-gate  */
23140Sstevel@tonic-gate static int
23150Sstevel@tonic-gate aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
23160Sstevel@tonic-gate {
23170Sstevel@tonic-gate 	aio_req_t **bucket;
23180Sstevel@tonic-gate 	aio_req_t *ent;
23190Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23200Sstevel@tonic-gate 	long index;
23210Sstevel@tonic-gate 
23220Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
23230Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23240Sstevel@tonic-gate 
23250Sstevel@tonic-gate 	index = AIO_HASH(resultp);
23260Sstevel@tonic-gate 	bucket = &aiop->aio_hash[index];
23270Sstevel@tonic-gate 	for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
23280Sstevel@tonic-gate 		if (ent->aio_req_resultp == resultp) {
23290Sstevel@tonic-gate 			if (ent->aio_req_flags & AIO_DONEQ) {
23300Sstevel@tonic-gate 				*reqp = aio_req_remove(ent);
23310Sstevel@tonic-gate 				return (0);
23320Sstevel@tonic-gate 			}
23330Sstevel@tonic-gate 			return (1);
23340Sstevel@tonic-gate 		}
23350Sstevel@tonic-gate 	}
23360Sstevel@tonic-gate 	/* no match, resultp is invalid */
23370Sstevel@tonic-gate 	return (2);
23380Sstevel@tonic-gate }
23390Sstevel@tonic-gate 
23400Sstevel@tonic-gate /*
23410Sstevel@tonic-gate  * remove a request from the done queue.
23420Sstevel@tonic-gate  */
23430Sstevel@tonic-gate static aio_req_t *
23440Sstevel@tonic-gate aio_req_remove(aio_req_t *reqp)
23450Sstevel@tonic-gate {
23460Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
23470Sstevel@tonic-gate 
23480Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23490Sstevel@tonic-gate 
23501885Sraf 	if (reqp != NULL) {
23510Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23520Sstevel@tonic-gate 		if (reqp->aio_req_next == reqp) {
23530Sstevel@tonic-gate 			/* only one request on queue */
23540Sstevel@tonic-gate 			if (reqp ==  aiop->aio_doneq) {
23550Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
23560Sstevel@tonic-gate 			} else {
23570Sstevel@tonic-gate 				ASSERT(reqp == aiop->aio_cleanupq);
23580Sstevel@tonic-gate 				aiop->aio_cleanupq = NULL;
23590Sstevel@tonic-gate 			}
23600Sstevel@tonic-gate 		} else {
23610Sstevel@tonic-gate 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23620Sstevel@tonic-gate 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23630Sstevel@tonic-gate 			/*
23640Sstevel@tonic-gate 			 * The request can be either on the aio_doneq or the
23650Sstevel@tonic-gate 			 * aio_cleanupq
23660Sstevel@tonic-gate 			 */
23670Sstevel@tonic-gate 			if (reqp == aiop->aio_doneq)
23680Sstevel@tonic-gate 				aiop->aio_doneq = reqp->aio_req_next;
23690Sstevel@tonic-gate 
23700Sstevel@tonic-gate 			if (reqp == aiop->aio_cleanupq)
23710Sstevel@tonic-gate 				aiop->aio_cleanupq = reqp->aio_req_next;
23720Sstevel@tonic-gate 		}
23730Sstevel@tonic-gate 		reqp->aio_req_flags &= ~AIO_DONEQ;
23741885Sraf 		reqp->aio_req_next = NULL;
23751885Sraf 		reqp->aio_req_prev = NULL;
23761885Sraf 	} else if ((reqp = aiop->aio_doneq) != NULL) {
23771885Sraf 		ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23781885Sraf 		if (reqp == reqp->aio_req_next) {
23790Sstevel@tonic-gate 			/* only one request on queue */
23800Sstevel@tonic-gate 			aiop->aio_doneq = NULL;
23810Sstevel@tonic-gate 		} else {
23821885Sraf 			reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23831885Sraf 			reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23841885Sraf 			aiop->aio_doneq = reqp->aio_req_next;
23850Sstevel@tonic-gate 		}
23861885Sraf 		reqp->aio_req_flags &= ~AIO_DONEQ;
23871885Sraf 		reqp->aio_req_next = NULL;
23881885Sraf 		reqp->aio_req_prev = NULL;
23890Sstevel@tonic-gate 	}
23901885Sraf 	if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
23911885Sraf 		cv_broadcast(&aiop->aio_waitcv);
23921885Sraf 	return (reqp);
23930Sstevel@tonic-gate }
23940Sstevel@tonic-gate 
23950Sstevel@tonic-gate static int
23960Sstevel@tonic-gate aio_req_setup(
23970Sstevel@tonic-gate 	aio_req_t	**reqpp,
23980Sstevel@tonic-gate 	aio_t 		*aiop,
23990Sstevel@tonic-gate 	aiocb_t 	*arg,
24000Sstevel@tonic-gate 	aio_result_t 	*resultp,
24010Sstevel@tonic-gate 	vnode_t		*vp)
24020Sstevel@tonic-gate {
24031885Sraf 	sigqueue_t	*sqp = NULL;
24040Sstevel@tonic-gate 	aio_req_t 	*reqp;
24050Sstevel@tonic-gate 	struct uio 	*uio;
24060Sstevel@tonic-gate 	struct sigevent *sigev;
24070Sstevel@tonic-gate 	int		error;
24080Sstevel@tonic-gate 
24090Sstevel@tonic-gate 	sigev = &arg->aio_sigevent;
24101885Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
24111885Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
24120Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
24130Sstevel@tonic-gate 		if (sqp == NULL)
24140Sstevel@tonic-gate 			return (EAGAIN);
24150Sstevel@tonic-gate 		sqp->sq_func = NULL;
24160Sstevel@tonic-gate 		sqp->sq_next = NULL;
24170Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
24180Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
24190Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
24200Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
24210Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
24220Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
24230Sstevel@tonic-gate 		sqp->sq_info.si_value = sigev->sigev_value;
24241885Sraf 	}
24250Sstevel@tonic-gate 
24260Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
24270Sstevel@tonic-gate 
24280Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
24290Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24300Sstevel@tonic-gate 		if (sqp)
24310Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24320Sstevel@tonic-gate 		return (EIO);
24330Sstevel@tonic-gate 	}
24340Sstevel@tonic-gate 	/*
24350Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
24360Sstevel@tonic-gate 	 * from dynamic memory.
24370Sstevel@tonic-gate 	 */
24380Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
24390Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
24400Sstevel@tonic-gate 		if (sqp)
24410Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
24420Sstevel@tonic-gate 		return (error);
24430Sstevel@tonic-gate 	}
24440Sstevel@tonic-gate 	aiop->aio_pending++;
24450Sstevel@tonic-gate 	aiop->aio_outstanding++;
24460Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
24471885Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
24481885Sraf 	    sigev->sigev_notify == SIGEV_PORT)
24491885Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
24500Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
24510Sstevel@tonic-gate 	/*
24520Sstevel@tonic-gate 	 * initialize aio request.
24530Sstevel@tonic-gate 	 */
24540Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
24550Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
24560Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
24571885Sraf 	reqp->aio_req_lio = NULL;
24580Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
24590Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
24600Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
24610Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
24620Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
24630Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
24640Sstevel@tonic-gate 	*reqpp = reqp;
24650Sstevel@tonic-gate 	return (0);
24660Sstevel@tonic-gate }
24670Sstevel@tonic-gate 
24680Sstevel@tonic-gate /*
24690Sstevel@tonic-gate  * Allocate p_aio struct.
24700Sstevel@tonic-gate  */
24710Sstevel@tonic-gate static aio_t *
24720Sstevel@tonic-gate aio_aiop_alloc(void)
24730Sstevel@tonic-gate {
24740Sstevel@tonic-gate 	aio_t	*aiop;
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&curproc->p_lock));
24770Sstevel@tonic-gate 
24780Sstevel@tonic-gate 	aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
24790Sstevel@tonic-gate 	if (aiop) {
24800Sstevel@tonic-gate 		mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
24810Sstevel@tonic-gate 		mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
24824502Spraks 		    NULL);
24830Sstevel@tonic-gate 		mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
24840Sstevel@tonic-gate 	}
24850Sstevel@tonic-gate 	return (aiop);
24860Sstevel@tonic-gate }
24870Sstevel@tonic-gate 
24880Sstevel@tonic-gate /*
24890Sstevel@tonic-gate  * Allocate an aio_req struct.
24900Sstevel@tonic-gate  */
24910Sstevel@tonic-gate static int
24920Sstevel@tonic-gate aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
24930Sstevel@tonic-gate {
24940Sstevel@tonic-gate 	aio_req_t *reqp;
24950Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
24960Sstevel@tonic-gate 
24970Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
24980Sstevel@tonic-gate 
24990Sstevel@tonic-gate 	if ((reqp = aiop->aio_free) != NULL) {
25000Sstevel@tonic-gate 		aiop->aio_free = reqp->aio_req_next;
25011885Sraf 		bzero(reqp, sizeof (*reqp));
25020Sstevel@tonic-gate 	} else {
25030Sstevel@tonic-gate 		/*
25040Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25050Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25060Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25070Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25080Sstevel@tonic-gate 		 */
25090Sstevel@tonic-gate 		if (freemem < desfree)
25100Sstevel@tonic-gate 			return (EAGAIN);
25110Sstevel@tonic-gate 		reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
25120Sstevel@tonic-gate 		if (reqp == NULL)
25130Sstevel@tonic-gate 			return (EAGAIN);
25140Sstevel@tonic-gate 	}
25151885Sraf 	reqp->aio_req.aio_uio = &reqp->aio_req_uio;
25161885Sraf 	reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
25171885Sraf 	reqp->aio_req.aio_private = reqp;
25180Sstevel@tonic-gate 	reqp->aio_req_buf.b_offset = -1;
25190Sstevel@tonic-gate 	reqp->aio_req_resultp = resultp;
25200Sstevel@tonic-gate 	if (aio_hash_insert(reqp, aiop)) {
25210Sstevel@tonic-gate 		reqp->aio_req_next = aiop->aio_free;
25220Sstevel@tonic-gate 		aiop->aio_free = reqp;
25230Sstevel@tonic-gate 		return (EINVAL);
25240Sstevel@tonic-gate 	}
25250Sstevel@tonic-gate 	*nreqp = reqp;
25260Sstevel@tonic-gate 	return (0);
25270Sstevel@tonic-gate }
25280Sstevel@tonic-gate 
25290Sstevel@tonic-gate /*
25300Sstevel@tonic-gate  * Allocate an aio_lio_t struct.
25310Sstevel@tonic-gate  */
25320Sstevel@tonic-gate static int
25330Sstevel@tonic-gate aio_lio_alloc(aio_lio_t **head)
25340Sstevel@tonic-gate {
25350Sstevel@tonic-gate 	aio_lio_t *liop;
25360Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
25370Sstevel@tonic-gate 
25380Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
25390Sstevel@tonic-gate 
25400Sstevel@tonic-gate 	if ((liop = aiop->aio_lio_free) != NULL) {
25410Sstevel@tonic-gate 		aiop->aio_lio_free = liop->lio_next;
25420Sstevel@tonic-gate 	} else {
25430Sstevel@tonic-gate 		/*
25440Sstevel@tonic-gate 		 * Check whether memory is getting tight.
25450Sstevel@tonic-gate 		 * This is a temporary mechanism to avoid memory
25460Sstevel@tonic-gate 		 * exhaustion by a single process until we come up
25470Sstevel@tonic-gate 		 * with a per process solution such as setrlimit().
25480Sstevel@tonic-gate 		 */
25490Sstevel@tonic-gate 		if (freemem < desfree)
25500Sstevel@tonic-gate 			return (EAGAIN);
25510Sstevel@tonic-gate 
25520Sstevel@tonic-gate 		liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
25530Sstevel@tonic-gate 		if (liop == NULL)
25540Sstevel@tonic-gate 			return (EAGAIN);
25550Sstevel@tonic-gate 	}
25560Sstevel@tonic-gate 	*head = liop;
25570Sstevel@tonic-gate 	return (0);
25580Sstevel@tonic-gate }
25590Sstevel@tonic-gate 
25600Sstevel@tonic-gate /*
25610Sstevel@tonic-gate  * this is a special per-process thread that is only activated if
25620Sstevel@tonic-gate  * the process is unmapping a segment with outstanding aio. normally,
25630Sstevel@tonic-gate  * the process will have completed the aio before unmapping the
25640Sstevel@tonic-gate  * segment. If the process does unmap a segment with outstanding aio,
25650Sstevel@tonic-gate  * this special thread will guarentee that the locked pages due to
25660Sstevel@tonic-gate  * aphysio() are released, thereby permitting the segment to be
2567304Spraks  * unmapped. In addition to this, the cleanup thread is woken up
2568304Spraks  * during DR operations to release the locked pages.
25690Sstevel@tonic-gate  */
25700Sstevel@tonic-gate 
25710Sstevel@tonic-gate static int
25720Sstevel@tonic-gate aio_cleanup_thread(aio_t *aiop)
25730Sstevel@tonic-gate {
25740Sstevel@tonic-gate 	proc_t *p = curproc;
25750Sstevel@tonic-gate 	struct as *as = p->p_as;
25760Sstevel@tonic-gate 	int poked = 0;
25770Sstevel@tonic-gate 	kcondvar_t *cvp;
25780Sstevel@tonic-gate 	int exit_flag = 0;
2579304Spraks 	int rqclnup = 0;
25800Sstevel@tonic-gate 
25810Sstevel@tonic-gate 	sigfillset(&curthread->t_hold);
25820Sstevel@tonic-gate 	sigdiffset(&curthread->t_hold, &cantmask);
25830Sstevel@tonic-gate 	for (;;) {
25840Sstevel@tonic-gate 		/*
25850Sstevel@tonic-gate 		 * if a segment is being unmapped, and the current
25860Sstevel@tonic-gate 		 * process's done queue is not empty, then every request
25870Sstevel@tonic-gate 		 * on the doneq with locked resources should be forced
25880Sstevel@tonic-gate 		 * to release their locks. By moving the doneq request
25890Sstevel@tonic-gate 		 * to the cleanupq, aio_cleanup() will process the cleanupq,
25900Sstevel@tonic-gate 		 * and place requests back onto the doneq. All requests
25910Sstevel@tonic-gate 		 * processed by aio_cleanup() will have their physical
25920Sstevel@tonic-gate 		 * resources unlocked.
25930Sstevel@tonic-gate 		 */
25940Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
25950Sstevel@tonic-gate 		if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
25960Sstevel@tonic-gate 			aiop->aio_flags |= AIO_CLEANUP;
25970Sstevel@tonic-gate 			mutex_enter(&as->a_contents);
2598304Spraks 			if (aiop->aio_rqclnup) {
2599304Spraks 				aiop->aio_rqclnup = 0;
2600304Spraks 				rqclnup = 1;
2601304Spraks 			}
2602304Spraks 
2603304Spraks 			if ((rqclnup || AS_ISUNMAPWAIT(as)) &&
26041885Sraf 			    aiop->aio_doneq) {
26050Sstevel@tonic-gate 				aio_req_t *doneqhead = aiop->aio_doneq;
26060Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26070Sstevel@tonic-gate 				aiop->aio_doneq = NULL;
26080Sstevel@tonic-gate 				aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
26090Sstevel@tonic-gate 			} else {
26100Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26110Sstevel@tonic-gate 			}
26120Sstevel@tonic-gate 		}
26130Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26140Sstevel@tonic-gate 		aio_cleanup(AIO_CLEANUP_THREAD);
26150Sstevel@tonic-gate 		/*
26160Sstevel@tonic-gate 		 * thread should block on the cleanupcv while
26170Sstevel@tonic-gate 		 * AIO_CLEANUP is set.
26180Sstevel@tonic-gate 		 */
26190Sstevel@tonic-gate 		cvp = &aiop->aio_cleanupcv;
26200Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
26210Sstevel@tonic-gate 
26220Sstevel@tonic-gate 		if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
26230Sstevel@tonic-gate 		    aiop->aio_notifyq != NULL ||
26240Sstevel@tonic-gate 		    aiop->aio_portcleanupq != NULL) {
26250Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
26260Sstevel@tonic-gate 			continue;
26270Sstevel@tonic-gate 		}
26280Sstevel@tonic-gate 		mutex_enter(&as->a_contents);
26290Sstevel@tonic-gate 
26300Sstevel@tonic-gate 		/*
26310Sstevel@tonic-gate 		 * AIO_CLEANUP determines when the cleanup thread
2632304Spraks 		 * should be active. This flag is set when
2633304Spraks 		 * the cleanup thread is awakened by as_unmap() or
2634304Spraks 		 * due to DR operations.
26350Sstevel@tonic-gate 		 * The flag is cleared when the blocking as_unmap()
26360Sstevel@tonic-gate 		 * that originally awakened us is allowed to
26370Sstevel@tonic-gate 		 * complete. as_unmap() blocks when trying to
26380Sstevel@tonic-gate 		 * unmap a segment that has SOFTLOCKed pages. when
26390Sstevel@tonic-gate 		 * the segment's pages are all SOFTUNLOCKed,
2640304Spraks 		 * as->a_flags & AS_UNMAPWAIT should be zero.
2641304Spraks 		 *
2642304Spraks 		 * In case of cleanup request by DR, the flag is cleared
2643304Spraks 		 * once all the pending aio requests have been processed.
2644304Spraks 		 *
2645304Spraks 		 * The flag shouldn't be cleared right away if the
2646304Spraks 		 * cleanup thread was interrupted because the process
2647304Spraks 		 * is doing forkall(). This happens when cv_wait_sig()
2648304Spraks 		 * returns zero, because it was awakened by a pokelwps().
2649304Spraks 		 * If the process is not exiting, it must be doing forkall().
26500Sstevel@tonic-gate 		 */
26510Sstevel@tonic-gate 		if ((poked == 0) &&
26524502Spraks 		    ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
26534502Spraks 		    (aiop->aio_pending == 0))) {
26540Sstevel@tonic-gate 			aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
26550Sstevel@tonic-gate 			cvp = &as->a_cv;
2656304Spraks 			rqclnup = 0;
26570Sstevel@tonic-gate 		}
26580Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
26590Sstevel@tonic-gate 		if (poked) {
26600Sstevel@tonic-gate 			/*
26610Sstevel@tonic-gate 			 * If the process is exiting/killed, don't return
26620Sstevel@tonic-gate 			 * immediately without waiting for pending I/O's
26630Sstevel@tonic-gate 			 * and releasing the page locks.
26640Sstevel@tonic-gate 			 */
26650Sstevel@tonic-gate 			if (p->p_flag & (SEXITLWPS|SKILLED)) {
26660Sstevel@tonic-gate 				/*
26670Sstevel@tonic-gate 				 * If exit_flag is set, then it is
26680Sstevel@tonic-gate 				 * safe to exit because we have released
26690Sstevel@tonic-gate 				 * page locks of completed I/O's.
26700Sstevel@tonic-gate 				 */
26710Sstevel@tonic-gate 				if (exit_flag)
26720Sstevel@tonic-gate 					break;
26730Sstevel@tonic-gate 
26740Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26750Sstevel@tonic-gate 
26760Sstevel@tonic-gate 				/*
26770Sstevel@tonic-gate 				 * Wait for all the pending aio to complete.
26780Sstevel@tonic-gate 				 */
26790Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
26800Sstevel@tonic-gate 				aiop->aio_flags |= AIO_REQ_BLOCK;
26810Sstevel@tonic-gate 				while (aiop->aio_pending != 0)
26820Sstevel@tonic-gate 					cv_wait(&aiop->aio_cleanupcv,
26834502Spraks 					    &aiop->aio_mutex);
26840Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
26850Sstevel@tonic-gate 				exit_flag = 1;
26860Sstevel@tonic-gate 				continue;
26870Sstevel@tonic-gate 			} else if (p->p_flag &
26880Sstevel@tonic-gate 			    (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
26890Sstevel@tonic-gate 				/*
26900Sstevel@tonic-gate 				 * hold LWP until it
26910Sstevel@tonic-gate 				 * is continued.
26920Sstevel@tonic-gate 				 */
26930Sstevel@tonic-gate 				mutex_exit(&as->a_contents);
26940Sstevel@tonic-gate 				mutex_enter(&p->p_lock);
26950Sstevel@tonic-gate 				stop(PR_SUSPENDED, SUSPEND_NORMAL);
26960Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
26970Sstevel@tonic-gate 				poked = 0;
26980Sstevel@tonic-gate 				continue;
26990Sstevel@tonic-gate 			}
27000Sstevel@tonic-gate 		} else {
27010Sstevel@tonic-gate 			/*
27020Sstevel@tonic-gate 			 * When started this thread will sleep on as->a_cv.
27030Sstevel@tonic-gate 			 * as_unmap will awake this thread if the
27040Sstevel@tonic-gate 			 * segment has SOFTLOCKed pages (poked = 0).
27050Sstevel@tonic-gate 			 * 1. pokelwps() awakes this thread =>
27060Sstevel@tonic-gate 			 *    break the loop to check SEXITLWPS, SHOLDFORK, etc
27070Sstevel@tonic-gate 			 * 2. as_unmap awakes this thread =>
27080Sstevel@tonic-gate 			 *    to break the loop it is necessary that
27090Sstevel@tonic-gate 			 *    - AS_UNMAPWAIT is set (as_unmap is waiting for
27100Sstevel@tonic-gate 			 *	memory to be unlocked)
27110Sstevel@tonic-gate 			 *    - AIO_CLEANUP is not set
27120Sstevel@tonic-gate 			 *	(if AIO_CLEANUP is set we have to wait for
27130Sstevel@tonic-gate 			 *	pending requests. aio_done will send a signal
27140Sstevel@tonic-gate 			 *	for every request which completes to continue
27150Sstevel@tonic-gate 			 *	unmapping the corresponding address range)
2716304Spraks 			 * 3. A cleanup request will wake this thread up, ex.
2717304Spraks 			 *    by the DR operations. The aio_rqclnup flag will
2718304Spraks 			 *    be set.
27190Sstevel@tonic-gate 			 */
27200Sstevel@tonic-gate 			while (poked == 0) {
2721304Spraks 				/*
2722*4532Ssp92102 				 * The clean up requests that came in
2723*4532Ssp92102 				 * after we had just cleaned up, couldn't
2724*4532Ssp92102 				 * be causing the unmap thread to block - as
2725*4532Ssp92102 				 * unmap event happened first.
2726*4532Ssp92102 				 * Let aio_done() wake us up if it sees a need.
2727304Spraks 				 */
2728*4532Ssp92102 				if (aiop->aio_rqclnup &&
27294502Spraks 				    (aiop->aio_flags & AIO_CLEANUP) == 0)
27300Sstevel@tonic-gate 					break;
27310Sstevel@tonic-gate 				poked = !cv_wait_sig(cvp, &as->a_contents);
27320Sstevel@tonic-gate 				if (AS_ISUNMAPWAIT(as) == 0)
27330Sstevel@tonic-gate 					cv_signal(cvp);
27340Sstevel@tonic-gate 				if (aiop->aio_outstanding != 0)
27350Sstevel@tonic-gate 					break;
27360Sstevel@tonic-gate 			}
27370Sstevel@tonic-gate 		}
27380Sstevel@tonic-gate 		mutex_exit(&as->a_contents);
27390Sstevel@tonic-gate 	}
27400Sstevel@tonic-gate exit:
27410Sstevel@tonic-gate 	mutex_exit(&as->a_contents);
27420Sstevel@tonic-gate 	ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
27430Sstevel@tonic-gate 	aston(curthread);	/* make thread do post_syscall */
27440Sstevel@tonic-gate 	return (0);
27450Sstevel@tonic-gate }
27460Sstevel@tonic-gate 
27470Sstevel@tonic-gate /*
27480Sstevel@tonic-gate  * save a reference to a user's outstanding aio in a hash list.
27490Sstevel@tonic-gate  */
27500Sstevel@tonic-gate static int
27510Sstevel@tonic-gate aio_hash_insert(
27520Sstevel@tonic-gate 	aio_req_t *aio_reqp,
27530Sstevel@tonic-gate 	aio_t *aiop)
27540Sstevel@tonic-gate {
27550Sstevel@tonic-gate 	long index;
27560Sstevel@tonic-gate 	aio_result_t *resultp = aio_reqp->aio_req_resultp;
27570Sstevel@tonic-gate 	aio_req_t *current;
27580Sstevel@tonic-gate 	aio_req_t **nextp;
27590Sstevel@tonic-gate 
27600Sstevel@tonic-gate 	index = AIO_HASH(resultp);
27610Sstevel@tonic-gate 	nextp = &aiop->aio_hash[index];
27620Sstevel@tonic-gate 	while ((current = *nextp) != NULL) {
27630Sstevel@tonic-gate 		if (current->aio_req_resultp == resultp)
27640Sstevel@tonic-gate 			return (DUPLICATE);
27650Sstevel@tonic-gate 		nextp = &current->aio_hash_next;
27660Sstevel@tonic-gate 	}
27670Sstevel@tonic-gate 	*nextp = aio_reqp;
27680Sstevel@tonic-gate 	aio_reqp->aio_hash_next = NULL;
27690Sstevel@tonic-gate 	return (0);
27700Sstevel@tonic-gate }
27710Sstevel@tonic-gate 
27720Sstevel@tonic-gate static int
27730Sstevel@tonic-gate (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
27740Sstevel@tonic-gate     cred_t *)
27750Sstevel@tonic-gate {
27760Sstevel@tonic-gate 	struct snode *sp;
27770Sstevel@tonic-gate 	dev_t		dev;
27780Sstevel@tonic-gate 	struct cb_ops  	*cb;
27790Sstevel@tonic-gate 	major_t		major;
27800Sstevel@tonic-gate 	int		(*aio_func)();
27810Sstevel@tonic-gate 
27820Sstevel@tonic-gate 	dev = vp->v_rdev;
27830Sstevel@tonic-gate 	major = getmajor(dev);
27840Sstevel@tonic-gate 
27850Sstevel@tonic-gate 	/*
27860Sstevel@tonic-gate 	 * return NULL for requests to files and STREAMs so
27870Sstevel@tonic-gate 	 * that libaio takes care of them.
27880Sstevel@tonic-gate 	 */
27890Sstevel@tonic-gate 	if (vp->v_type == VCHR) {
27900Sstevel@tonic-gate 		/* no stream device for kaio */
27910Sstevel@tonic-gate 		if (STREAMSTAB(major)) {
27920Sstevel@tonic-gate 			return (NULL);
27930Sstevel@tonic-gate 		}
27940Sstevel@tonic-gate 	} else {
27950Sstevel@tonic-gate 		return (NULL);
27960Sstevel@tonic-gate 	}
27970Sstevel@tonic-gate 
27980Sstevel@tonic-gate 	/*
27990Sstevel@tonic-gate 	 * Check old drivers which do not have async I/O entry points.
28000Sstevel@tonic-gate 	 */
28010Sstevel@tonic-gate 	if (devopsp[major]->devo_rev < 3)
28020Sstevel@tonic-gate 		return (NULL);
28030Sstevel@tonic-gate 
28040Sstevel@tonic-gate 	cb = devopsp[major]->devo_cb_ops;
28050Sstevel@tonic-gate 
28060Sstevel@tonic-gate 	if (cb->cb_rev < 1)
28070Sstevel@tonic-gate 		return (NULL);
28080Sstevel@tonic-gate 
28090Sstevel@tonic-gate 	/*
28100Sstevel@tonic-gate 	 * Check whether this device is a block device.
28110Sstevel@tonic-gate 	 * Kaio is not supported for devices like tty.
28120Sstevel@tonic-gate 	 */
28130Sstevel@tonic-gate 	if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
28140Sstevel@tonic-gate 		return (NULL);
28150Sstevel@tonic-gate 
28160Sstevel@tonic-gate 	/*
28170Sstevel@tonic-gate 	 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
28180Sstevel@tonic-gate 	 * We cannot call the driver directly. Instead return the
28190Sstevel@tonic-gate 	 * PXFS functions.
28200Sstevel@tonic-gate 	 */
28210Sstevel@tonic-gate 
28220Sstevel@tonic-gate 	if (IS_PXFSVP(vp)) {
28230Sstevel@tonic-gate 		if (mode & FREAD)
28240Sstevel@tonic-gate 			return (clpxfs_aio_read);
28250Sstevel@tonic-gate 		else
28260Sstevel@tonic-gate 			return (clpxfs_aio_write);
28270Sstevel@tonic-gate 	}
28280Sstevel@tonic-gate 	if (mode & FREAD)
28290Sstevel@tonic-gate 		aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
28300Sstevel@tonic-gate 	else
28310Sstevel@tonic-gate 		aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
28320Sstevel@tonic-gate 
28330Sstevel@tonic-gate 	/*
28340Sstevel@tonic-gate 	 * Do we need this ?
28350Sstevel@tonic-gate 	 * nodev returns ENXIO anyway.
28360Sstevel@tonic-gate 	 */
28370Sstevel@tonic-gate 	if (aio_func == nodev)
28380Sstevel@tonic-gate 		return (NULL);
28390Sstevel@tonic-gate 
28400Sstevel@tonic-gate 	sp = VTOS(vp);
28410Sstevel@tonic-gate 	smark(sp, SACC);
28420Sstevel@tonic-gate 	return (aio_func);
28430Sstevel@tonic-gate }
28440Sstevel@tonic-gate 
28450Sstevel@tonic-gate /*
28460Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28470Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28480Sstevel@tonic-gate  * We define this intermediate function that will do the right
28490Sstevel@tonic-gate  * thing for driver cases.
28500Sstevel@tonic-gate  */
28510Sstevel@tonic-gate 
28520Sstevel@tonic-gate static int
28530Sstevel@tonic-gate driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28540Sstevel@tonic-gate {
28550Sstevel@tonic-gate 	dev_t dev;
28560Sstevel@tonic-gate 	struct cb_ops  	*cb;
28570Sstevel@tonic-gate 
28580Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28590Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28600Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28610Sstevel@tonic-gate 	ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
28620Sstevel@tonic-gate 
28630Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28640Sstevel@tonic-gate 
28650Sstevel@tonic-gate 	ASSERT(cb->cb_awrite != nodev);
28660Sstevel@tonic-gate 	return ((*cb->cb_awrite)(dev, aio, cred_p));
28670Sstevel@tonic-gate }
28680Sstevel@tonic-gate 
28690Sstevel@tonic-gate /*
28700Sstevel@tonic-gate  * Clustering: We want check_vp to return a function prototyped
28710Sstevel@tonic-gate  * correctly that will be common to both PXFS and regular case.
28720Sstevel@tonic-gate  * We define this intermediate function that will do the right
28730Sstevel@tonic-gate  * thing for driver cases.
28740Sstevel@tonic-gate  */
28750Sstevel@tonic-gate 
28760Sstevel@tonic-gate static int
28770Sstevel@tonic-gate driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28780Sstevel@tonic-gate {
28790Sstevel@tonic-gate 	dev_t dev;
28800Sstevel@tonic-gate 	struct cb_ops  	*cb;
28810Sstevel@tonic-gate 
28820Sstevel@tonic-gate 	ASSERT(vp->v_type == VCHR);
28830Sstevel@tonic-gate 	ASSERT(!IS_PXFSVP(vp));
28840Sstevel@tonic-gate 	dev = VTOS(vp)->s_dev;
28850Sstevel@tonic-gate 	ASSERT(!STREAMSTAB(getmajor(dev)));
28860Sstevel@tonic-gate 
28870Sstevel@tonic-gate 	cb = devopsp[getmajor(dev)]->devo_cb_ops;
28880Sstevel@tonic-gate 
28890Sstevel@tonic-gate 	ASSERT(cb->cb_aread != nodev);
28900Sstevel@tonic-gate 	return ((*cb->cb_aread)(dev, aio, cred_p));
28910Sstevel@tonic-gate }
28920Sstevel@tonic-gate 
28930Sstevel@tonic-gate /*
28940Sstevel@tonic-gate  * This routine is called when a largefile call is made by a 32bit
28950Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel. All 64bit processes are large
28960Sstevel@tonic-gate  * file by definition and will call alio() instead.
28970Sstevel@tonic-gate  */
28980Sstevel@tonic-gate static int
28990Sstevel@tonic-gate alioLF(
29000Sstevel@tonic-gate 	int		mode_arg,
29010Sstevel@tonic-gate 	void		*aiocb_arg,
29020Sstevel@tonic-gate 	int		nent,
29030Sstevel@tonic-gate 	void		*sigev)
29040Sstevel@tonic-gate {
29050Sstevel@tonic-gate 	file_t		*fp;
29060Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
29070Sstevel@tonic-gate 	int		prev_mode = -1;
29080Sstevel@tonic-gate 	struct vnode	*vp;
29090Sstevel@tonic-gate 	aio_lio_t	*head;
29100Sstevel@tonic-gate 	aio_req_t	*reqp;
29110Sstevel@tonic-gate 	aio_t		*aiop;
29120Sstevel@tonic-gate 	caddr_t		cbplist;
29131885Sraf 	aiocb64_32_t	cb64;
29141885Sraf 	aiocb64_32_t	*aiocb = &cb64;
29150Sstevel@tonic-gate 	aiocb64_32_t	*cbp;
29160Sstevel@tonic-gate 	caddr32_t	*ucbp;
29170Sstevel@tonic-gate #ifdef _LP64
29180Sstevel@tonic-gate 	aiocb_t		aiocb_n;
29190Sstevel@tonic-gate #endif
29200Sstevel@tonic-gate 	struct sigevent32	sigevk;
29210Sstevel@tonic-gate 	sigqueue_t	*sqp;
29220Sstevel@tonic-gate 	int		(*aio_func)();
29230Sstevel@tonic-gate 	int		mode;
29241885Sraf 	int		error = 0;
29251885Sraf 	int		aio_errors = 0;
29260Sstevel@tonic-gate 	int		i;
29270Sstevel@tonic-gate 	size_t		ssize;
29280Sstevel@tonic-gate 	int		deadhead = 0;
29290Sstevel@tonic-gate 	int		aio_notsupported = 0;
29301885Sraf 	int		lio_head_port;
29311885Sraf 	int		aio_port;
29321885Sraf 	int		aio_thread;
29330Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
29344502Spraks 	int		portused = 0;
29350Sstevel@tonic-gate 	port_notify32_t	pnotify;
29361885Sraf 	int		event;
29370Sstevel@tonic-gate 
29380Sstevel@tonic-gate 	aiop = curproc->p_aio;
29390Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
29400Sstevel@tonic-gate 		return (EINVAL);
29410Sstevel@tonic-gate 
29420Sstevel@tonic-gate 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
29430Sstevel@tonic-gate 
29440Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
29450Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
29460Sstevel@tonic-gate 	ucbp = (caddr32_t *)cbplist;
29470Sstevel@tonic-gate 
29481885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
29491885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
29500Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
29510Sstevel@tonic-gate 		return (EFAULT);
29520Sstevel@tonic-gate 	}
29530Sstevel@tonic-gate 
29541885Sraf 	/* Event Ports  */
29551885Sraf 	if (sigev &&
29561885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
29571885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
29581885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
29591885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
29601885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
29611885Sraf 		} else if (copyin(
29621885Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
29631885Sraf 		    &pnotify, sizeof (pnotify))) {
29640Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
29650Sstevel@tonic-gate 			return (EFAULT);
29660Sstevel@tonic-gate 		}
29671885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
29681885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
29691885Sraf 		if (error) {
29701885Sraf 			if (error == ENOMEM || error == EAGAIN)
29711885Sraf 				error = EAGAIN;
29721885Sraf 			else
29731885Sraf 				error = EINVAL;
29741885Sraf 			kmem_free(cbplist, ssize);
29751885Sraf 			return (error);
29761885Sraf 		}
29771885Sraf 		lio_head_port = pnotify.portnfy_port;
29784502Spraks 		portused = 1;
29790Sstevel@tonic-gate 	}
29800Sstevel@tonic-gate 
29810Sstevel@tonic-gate 	/*
29820Sstevel@tonic-gate 	 * a list head should be allocated if notification is
29830Sstevel@tonic-gate 	 * enabled for this list.
29840Sstevel@tonic-gate 	 */
29850Sstevel@tonic-gate 	head = NULL;
29860Sstevel@tonic-gate 
29871885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
29880Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
29890Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
29900Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
29910Sstevel@tonic-gate 		if (error)
29920Sstevel@tonic-gate 			goto done;
29930Sstevel@tonic-gate 		deadhead = 1;
29940Sstevel@tonic-gate 		head->lio_nent = nent;
29950Sstevel@tonic-gate 		head->lio_refcnt = nent;
29961885Sraf 		head->lio_port = -1;
29971885Sraf 		head->lio_portkev = NULL;
29981885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
29991885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
30000Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
30010Sstevel@tonic-gate 			if (sqp == NULL) {
30020Sstevel@tonic-gate 				error = EAGAIN;
30030Sstevel@tonic-gate 				goto done;
30040Sstevel@tonic-gate 			}
30050Sstevel@tonic-gate 			sqp->sq_func = NULL;
30060Sstevel@tonic-gate 			sqp->sq_next = NULL;
30070Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
30080Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
30090Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
30100Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
30110Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
30120Sstevel@tonic-gate 			sqp->sq_info.si_signo = sigevk.sigev_signo;
30130Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
30140Sstevel@tonic-gate 			    sigevk.sigev_value.sival_int;
30150Sstevel@tonic-gate 			head->lio_sigqp = sqp;
30160Sstevel@tonic-gate 		} else {
30170Sstevel@tonic-gate 			head->lio_sigqp = NULL;
30180Sstevel@tonic-gate 		}
30191885Sraf 		if (pkevtp) {
30201885Sraf 			/*
30211885Sraf 			 * Prepare data to send when list of aiocb's
30221885Sraf 			 * has completed.
30231885Sraf 			 */
30241885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
30251885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
30261885Sraf 			    NULL, head);
30271885Sraf 			pkevtp->portkev_events = AIOLIO64;
30281885Sraf 			head->lio_portkev = pkevtp;
30291885Sraf 			head->lio_port = pnotify.portnfy_port;
30301885Sraf 		}
30310Sstevel@tonic-gate 	}
30320Sstevel@tonic-gate 
30330Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
30340Sstevel@tonic-gate 
30350Sstevel@tonic-gate 		cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
30360Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
30371885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
30380Sstevel@tonic-gate 			if (head) {
30390Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30400Sstevel@tonic-gate 				head->lio_nent--;
30410Sstevel@tonic-gate 				head->lio_refcnt--;
30420Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30430Sstevel@tonic-gate 			}
30440Sstevel@tonic-gate 			continue;
30450Sstevel@tonic-gate 		}
30460Sstevel@tonic-gate 
30470Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
30480Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
30490Sstevel@tonic-gate 		if (mode == LIO_NOP) {
30500Sstevel@tonic-gate 			cbp = NULL;
30510Sstevel@tonic-gate 			if (head) {
30520Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30530Sstevel@tonic-gate 				head->lio_nent--;
30540Sstevel@tonic-gate 				head->lio_refcnt--;
30550Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30560Sstevel@tonic-gate 			}
30570Sstevel@tonic-gate 			continue;
30580Sstevel@tonic-gate 		}
30590Sstevel@tonic-gate 
30600Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
30610Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
30620Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30630Sstevel@tonic-gate 			if (head) {
30640Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30650Sstevel@tonic-gate 				head->lio_nent--;
30660Sstevel@tonic-gate 				head->lio_refcnt--;
30670Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30680Sstevel@tonic-gate 			}
30690Sstevel@tonic-gate 			aio_errors++;
30700Sstevel@tonic-gate 			continue;
30710Sstevel@tonic-gate 		}
30720Sstevel@tonic-gate 
30730Sstevel@tonic-gate 		/*
30740Sstevel@tonic-gate 		 * check the permission of the partition
30750Sstevel@tonic-gate 		 */
30760Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
30770Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
30780Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
30790Sstevel@tonic-gate 			if (head) {
30800Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
30810Sstevel@tonic-gate 				head->lio_nent--;
30820Sstevel@tonic-gate 				head->lio_refcnt--;
30830Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
30840Sstevel@tonic-gate 			}
30850Sstevel@tonic-gate 			aio_errors++;
30860Sstevel@tonic-gate 			continue;
30870Sstevel@tonic-gate 		}
30880Sstevel@tonic-gate 
30890Sstevel@tonic-gate 		/*
30900Sstevel@tonic-gate 		 * common case where requests are to the same fd
30910Sstevel@tonic-gate 		 * for the same r/w operation
30920Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
30930Sstevel@tonic-gate 		 */
30941885Sraf 		vp = fp->f_vnode;
30951885Sraf 		if (fp != prev_fp || mode != prev_mode) {
30960Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
30970Sstevel@tonic-gate 			if (aio_func == NULL) {
30980Sstevel@tonic-gate 				prev_fp = NULL;
30990Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
31000Sstevel@tonic-gate 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
31010Sstevel@tonic-gate 				aio_notsupported++;
31020Sstevel@tonic-gate 				if (head) {
31030Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
31040Sstevel@tonic-gate 					head->lio_nent--;
31050Sstevel@tonic-gate 					head->lio_refcnt--;
31060Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
31070Sstevel@tonic-gate 				}
31080Sstevel@tonic-gate 				continue;
31090Sstevel@tonic-gate 			} else {
31100Sstevel@tonic-gate 				prev_fp = fp;
31110Sstevel@tonic-gate 				prev_mode = mode;
31120Sstevel@tonic-gate 			}
31130Sstevel@tonic-gate 		}
31141885Sraf 
31150Sstevel@tonic-gate #ifdef	_LP64
31160Sstevel@tonic-gate 		aiocb_LFton(aiocb, &aiocb_n);
31170Sstevel@tonic-gate 		error = aio_req_setup(&reqp, aiop, &aiocb_n,
31181885Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
31190Sstevel@tonic-gate #else
31200Sstevel@tonic-gate 		error = aio_req_setupLF(&reqp, aiop, aiocb,
31211885Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
31220Sstevel@tonic-gate #endif  /* _LP64 */
31230Sstevel@tonic-gate 		if (error) {
31240Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
31251885Sraf 			lio_set_uerror(&cbp->aio_resultp, error);
31260Sstevel@tonic-gate 			if (head) {
31270Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
31280Sstevel@tonic-gate 				head->lio_nent--;
31290Sstevel@tonic-gate 				head->lio_refcnt--;
31300Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
31310Sstevel@tonic-gate 			}
31320Sstevel@tonic-gate 			aio_errors++;
31330Sstevel@tonic-gate 			continue;
31340Sstevel@tonic-gate 		}
31350Sstevel@tonic-gate 
31360Sstevel@tonic-gate 		reqp->aio_req_lio = head;
31370Sstevel@tonic-gate 		deadhead = 0;
31380Sstevel@tonic-gate 
31390Sstevel@tonic-gate 		/*
31400Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
31410Sstevel@tonic-gate 		 * the driver to avoid a race condition
31420Sstevel@tonic-gate 		 */
31430Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
31440Sstevel@tonic-gate 		    EINPROGRESS);
31450Sstevel@tonic-gate 
31460Sstevel@tonic-gate 		reqp->aio_req_iocb.iocb32 = *ucbp;
31470Sstevel@tonic-gate 
31481885Sraf 		event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
31491885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
31501885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
31511885Sraf 		if (aio_port | aio_thread) {
31521885Sraf 			port_kevent_t *lpkevp;
31531885Sraf 			/*
31541885Sraf 			 * Prepare data to send with each aiocb completed.
31551885Sraf 			 */
31561885Sraf 			if (aio_port) {
31571885Sraf 				void *paddr = (void *)(uintptr_t)
31581885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
31591885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
31601885Sraf 					error = EFAULT;
31611885Sraf 			} else {	/* aio_thread */
31621885Sraf 				pnotify.portnfy_port =
31631885Sraf 				    aiocb->aio_sigevent.sigev_signo;
31641885Sraf 				pnotify.portnfy_user =
31651885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
31661885Sraf 			}
31671885Sraf 			if (error)
31681885Sraf 				/* EMPTY */;
31691885Sraf 			else if (pkevtp != NULL &&
31701885Sraf 			    pnotify.portnfy_port == lio_head_port)
31711885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
31721885Sraf 				    PORT_ALLOC_DEFAULT);
31731885Sraf 			else
31741885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
31751885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
31761885Sraf 				    &lpkevp);
31771885Sraf 			if (error == 0) {
31781885Sraf 				port_init_event(lpkevp, (uintptr_t)*ucbp,
31791885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
31801885Sraf 				    aio_port_callback, reqp);
31811885Sraf 				lpkevp->portkev_events = event;
31821885Sraf 				reqp->aio_req_portkev = lpkevp;
31831885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
31841885Sraf 			}
31850Sstevel@tonic-gate 		}
31860Sstevel@tonic-gate 
31870Sstevel@tonic-gate 		/*
31880Sstevel@tonic-gate 		 * send the request to driver.
31890Sstevel@tonic-gate 		 */
31900Sstevel@tonic-gate 		if (error == 0) {
31910Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
31920Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
31930Sstevel@tonic-gate 				aio_zerolen(reqp);
31940Sstevel@tonic-gate 				continue;
31950Sstevel@tonic-gate 			}
31960Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
31970Sstevel@tonic-gate 			    CRED());
31980Sstevel@tonic-gate 		}
31990Sstevel@tonic-gate 
32000Sstevel@tonic-gate 		/*
32010Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
32020Sstevel@tonic-gate 		 * completed unless there was an error.
32030Sstevel@tonic-gate 		 */
32040Sstevel@tonic-gate 		if (error) {
32050Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
32060Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
32070Sstevel@tonic-gate 			if (head) {
32080Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
32090Sstevel@tonic-gate 				head->lio_nent--;
32100Sstevel@tonic-gate 				head->lio_refcnt--;
32110Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32120Sstevel@tonic-gate 			}
32130Sstevel@tonic-gate 			if (error == ENOTSUP)
32140Sstevel@tonic-gate 				aio_notsupported++;
32150Sstevel@tonic-gate 			else
32160Sstevel@tonic-gate 				aio_errors++;
32174502Spraks 			lio_set_error(reqp, portused);
32180Sstevel@tonic-gate 		} else {
32190Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
32200Sstevel@tonic-gate 		}
32210Sstevel@tonic-gate 	}
32220Sstevel@tonic-gate 
32230Sstevel@tonic-gate 	if (aio_notsupported) {
32240Sstevel@tonic-gate 		error = ENOTSUP;
32250Sstevel@tonic-gate 	} else if (aio_errors) {
32260Sstevel@tonic-gate 		/*
32270Sstevel@tonic-gate 		 * return EIO if any request failed
32280Sstevel@tonic-gate 		 */
32290Sstevel@tonic-gate 		error = EIO;
32300Sstevel@tonic-gate 	}
32310Sstevel@tonic-gate 
32320Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
32330Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
32340Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
32350Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
32360Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
32370Sstevel@tonic-gate 				error = EINTR;
32380Sstevel@tonic-gate 				goto done;
32390Sstevel@tonic-gate 			}
32400Sstevel@tonic-gate 		}
32410Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
32420Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
32430Sstevel@tonic-gate 	}
32440Sstevel@tonic-gate 
32450Sstevel@tonic-gate done:
32460Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
32470Sstevel@tonic-gate 	if (deadhead) {
32480Sstevel@tonic-gate 		if (head->lio_sigqp)
32490Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
32501885Sraf 		if (head->lio_portkev)
32511885Sraf 			port_free_event(head->lio_portkev);
32520Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
32530Sstevel@tonic-gate 	}
32540Sstevel@tonic-gate 	return (error);
32550Sstevel@tonic-gate }
32560Sstevel@tonic-gate 
32570Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
32580Sstevel@tonic-gate static void
32590Sstevel@tonic-gate aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
32600Sstevel@tonic-gate {
32610Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
32620Sstevel@tonic-gate 	dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
32630Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
32640Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
32650Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
32660Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
32670Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
32680Sstevel@tonic-gate 
32690Sstevel@tonic-gate 	/*
32700Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
32710Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
32720Sstevel@tonic-gate 	 */
32730Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
32740Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
32750Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
32760Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
32770Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
32780Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
32790Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
32800Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
32810Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
32820Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
32830Sstevel@tonic-gate }
32840Sstevel@tonic-gate #endif
32850Sstevel@tonic-gate 
32860Sstevel@tonic-gate /*
32870Sstevel@tonic-gate  * This function is used only for largefile calls made by
32881885Sraf  * 32 bit applications.
32890Sstevel@tonic-gate  */
32900Sstevel@tonic-gate static int
32910Sstevel@tonic-gate aio_req_setupLF(
32920Sstevel@tonic-gate 	aio_req_t	**reqpp,
32930Sstevel@tonic-gate 	aio_t		*aiop,
32940Sstevel@tonic-gate 	aiocb64_32_t	*arg,
32950Sstevel@tonic-gate 	aio_result_t	*resultp,
32960Sstevel@tonic-gate 	vnode_t		*vp)
32970Sstevel@tonic-gate {
32981885Sraf 	sigqueue_t	*sqp = NULL;
32990Sstevel@tonic-gate 	aio_req_t	*reqp;
33001885Sraf 	struct uio	*uio;
33011885Sraf 	struct sigevent32 *sigev;
33020Sstevel@tonic-gate 	int 		error;
33030Sstevel@tonic-gate 
33041885Sraf 	sigev = &arg->aio_sigevent;
33051885Sraf 	if (sigev->sigev_notify == SIGEV_SIGNAL &&
33061885Sraf 	    sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
33070Sstevel@tonic-gate 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
33080Sstevel@tonic-gate 		if (sqp == NULL)
33090Sstevel@tonic-gate 			return (EAGAIN);
33100Sstevel@tonic-gate 		sqp->sq_func = NULL;
33110Sstevel@tonic-gate 		sqp->sq_next = NULL;
33120Sstevel@tonic-gate 		sqp->sq_info.si_code = SI_ASYNCIO;
33130Sstevel@tonic-gate 		sqp->sq_info.si_pid = curproc->p_pid;
33140Sstevel@tonic-gate 		sqp->sq_info.si_ctid = PRCTID(curproc);
33150Sstevel@tonic-gate 		sqp->sq_info.si_zoneid = getzoneid();
33160Sstevel@tonic-gate 		sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
33170Sstevel@tonic-gate 		sqp->sq_info.si_signo = sigev->sigev_signo;
33181885Sraf 		sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
33191885Sraf 	}
33200Sstevel@tonic-gate 
33210Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
33220Sstevel@tonic-gate 
33230Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_REQ_BLOCK) {
33240Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33250Sstevel@tonic-gate 		if (sqp)
33260Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33270Sstevel@tonic-gate 		return (EIO);
33280Sstevel@tonic-gate 	}
33290Sstevel@tonic-gate 	/*
33300Sstevel@tonic-gate 	 * get an aio_reqp from the free list or allocate one
33310Sstevel@tonic-gate 	 * from dynamic memory.
33320Sstevel@tonic-gate 	 */
33330Sstevel@tonic-gate 	if (error = aio_req_alloc(&reqp, resultp)) {
33340Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
33350Sstevel@tonic-gate 		if (sqp)
33360Sstevel@tonic-gate 			kmem_free(sqp, sizeof (sigqueue_t));
33370Sstevel@tonic-gate 		return (error);
33380Sstevel@tonic-gate 	}
33390Sstevel@tonic-gate 	aiop->aio_pending++;
33400Sstevel@tonic-gate 	aiop->aio_outstanding++;
33410Sstevel@tonic-gate 	reqp->aio_req_flags = AIO_PENDING;
33421885Sraf 	if (sigev->sigev_notify == SIGEV_THREAD ||
33431885Sraf 	    sigev->sigev_notify == SIGEV_PORT)
33441885Sraf 		aio_enq(&aiop->aio_portpending, reqp, 0);
33450Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
33460Sstevel@tonic-gate 	/*
33470Sstevel@tonic-gate 	 * initialize aio request.
33480Sstevel@tonic-gate 	 */
33490Sstevel@tonic-gate 	reqp->aio_req_fd = arg->aio_fildes;
33500Sstevel@tonic-gate 	reqp->aio_req_sigqp = sqp;
33510Sstevel@tonic-gate 	reqp->aio_req_iocb.iocb = NULL;
33521885Sraf 	reqp->aio_req_lio = NULL;
33530Sstevel@tonic-gate 	reqp->aio_req_buf.b_file = vp;
33540Sstevel@tonic-gate 	uio = reqp->aio_req.aio_uio;
33550Sstevel@tonic-gate 	uio->uio_iovcnt = 1;
33560Sstevel@tonic-gate 	uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
33570Sstevel@tonic-gate 	uio->uio_iov->iov_len = arg->aio_nbytes;
33580Sstevel@tonic-gate 	uio->uio_loffset = arg->aio_offset;
33590Sstevel@tonic-gate 	*reqpp = reqp;
33600Sstevel@tonic-gate 	return (0);
33610Sstevel@tonic-gate }
33620Sstevel@tonic-gate 
33630Sstevel@tonic-gate /*
33640Sstevel@tonic-gate  * This routine is called when a non largefile call is made by a 32bit
33650Sstevel@tonic-gate  * process on a ILP32 or LP64 kernel.
33660Sstevel@tonic-gate  */
33670Sstevel@tonic-gate static int
33680Sstevel@tonic-gate alio32(
33690Sstevel@tonic-gate 	int		mode_arg,
33700Sstevel@tonic-gate 	void		*aiocb_arg,
33710Sstevel@tonic-gate 	int		nent,
33721885Sraf 	void		*sigev)
33730Sstevel@tonic-gate {
33740Sstevel@tonic-gate 	file_t		*fp;
33750Sstevel@tonic-gate 	file_t		*prev_fp = NULL;
33760Sstevel@tonic-gate 	int		prev_mode = -1;
33770Sstevel@tonic-gate 	struct vnode	*vp;
33780Sstevel@tonic-gate 	aio_lio_t	*head;
33790Sstevel@tonic-gate 	aio_req_t	*reqp;
33800Sstevel@tonic-gate 	aio_t		*aiop;
33811885Sraf 	caddr_t		cbplist;
33820Sstevel@tonic-gate 	aiocb_t		cb;
33830Sstevel@tonic-gate 	aiocb_t		*aiocb = &cb;
33840Sstevel@tonic-gate #ifdef	_LP64
33850Sstevel@tonic-gate 	aiocb32_t	*cbp;
33860Sstevel@tonic-gate 	caddr32_t	*ucbp;
33870Sstevel@tonic-gate 	aiocb32_t	cb32;
33880Sstevel@tonic-gate 	aiocb32_t	*aiocb32 = &cb32;
33891885Sraf 	struct sigevent32	sigevk;
33900Sstevel@tonic-gate #else
33910Sstevel@tonic-gate 	aiocb_t		*cbp, **ucbp;
33921885Sraf 	struct sigevent	sigevk;
33930Sstevel@tonic-gate #endif
33940Sstevel@tonic-gate 	sigqueue_t	*sqp;
33950Sstevel@tonic-gate 	int		(*aio_func)();
33960Sstevel@tonic-gate 	int		mode;
33971885Sraf 	int		error = 0;
33981885Sraf 	int		aio_errors = 0;
33990Sstevel@tonic-gate 	int		i;
34000Sstevel@tonic-gate 	size_t		ssize;
34010Sstevel@tonic-gate 	int		deadhead = 0;
34020Sstevel@tonic-gate 	int		aio_notsupported = 0;
34031885Sraf 	int		lio_head_port;
34041885Sraf 	int		aio_port;
34051885Sraf 	int		aio_thread;
34060Sstevel@tonic-gate 	port_kevent_t	*pkevtp = NULL;
34074502Spraks 	int		portused = 0;
34080Sstevel@tonic-gate #ifdef	_LP64
34090Sstevel@tonic-gate 	port_notify32_t	pnotify;
34100Sstevel@tonic-gate #else
34110Sstevel@tonic-gate 	port_notify_t	pnotify;
34120Sstevel@tonic-gate #endif
34131885Sraf 	int		event;
34141885Sraf 
34150Sstevel@tonic-gate 	aiop = curproc->p_aio;
34160Sstevel@tonic-gate 	if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
34170Sstevel@tonic-gate 		return (EINVAL);
34180Sstevel@tonic-gate 
34190Sstevel@tonic-gate #ifdef	_LP64
34200Sstevel@tonic-gate 	ssize = (sizeof (caddr32_t) * nent);
34210Sstevel@tonic-gate #else
34220Sstevel@tonic-gate 	ssize = (sizeof (aiocb_t *) * nent);
34230Sstevel@tonic-gate #endif
34240Sstevel@tonic-gate 	cbplist = kmem_alloc(ssize, KM_SLEEP);
34250Sstevel@tonic-gate 	ucbp = (void *)cbplist;
34260Sstevel@tonic-gate 
34271885Sraf 	if (copyin(aiocb_arg, cbplist, ssize) ||
34281885Sraf 	    (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
34290Sstevel@tonic-gate 		kmem_free(cbplist, ssize);
34300Sstevel@tonic-gate 		return (EFAULT);
34310Sstevel@tonic-gate 	}
34320Sstevel@tonic-gate 
34331885Sraf 	/* Event Ports  */
34341885Sraf 	if (sigev &&
34351885Sraf 	    (sigevk.sigev_notify == SIGEV_THREAD ||
34361885Sraf 	    sigevk.sigev_notify == SIGEV_PORT)) {
34371885Sraf 		if (sigevk.sigev_notify == SIGEV_THREAD) {
34381885Sraf 			pnotify.portnfy_port = sigevk.sigev_signo;
34391885Sraf 			pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
34401885Sraf 		} else if (copyin(
34411885Sraf 		    (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
34421885Sraf 		    &pnotify, sizeof (pnotify))) {
34430Sstevel@tonic-gate 			kmem_free(cbplist, ssize);
34440Sstevel@tonic-gate 			return (EFAULT);
34450Sstevel@tonic-gate 		}
34461885Sraf 		error = port_alloc_event(pnotify.portnfy_port,
34471885Sraf 		    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
34481885Sraf 		if (error) {
34491885Sraf 			if (error == ENOMEM || error == EAGAIN)
34501885Sraf 				error = EAGAIN;
34511885Sraf 			else
34521885Sraf 				error = EINVAL;
34531885Sraf 			kmem_free(cbplist, ssize);
34541885Sraf 			return (error);
34551885Sraf 		}
34561885Sraf 		lio_head_port = pnotify.portnfy_port;
34574502Spraks 		portused = 1;
34580Sstevel@tonic-gate 	}
34590Sstevel@tonic-gate 
34600Sstevel@tonic-gate 	/*
34610Sstevel@tonic-gate 	 * a list head should be allocated if notification is
34620Sstevel@tonic-gate 	 * enabled for this list.
34630Sstevel@tonic-gate 	 */
34640Sstevel@tonic-gate 	head = NULL;
34650Sstevel@tonic-gate 
34661885Sraf 	if (mode_arg == LIO_WAIT || sigev) {
34670Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
34680Sstevel@tonic-gate 		error = aio_lio_alloc(&head);
34690Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
34700Sstevel@tonic-gate 		if (error)
34710Sstevel@tonic-gate 			goto done;
34720Sstevel@tonic-gate 		deadhead = 1;
34730Sstevel@tonic-gate 		head->lio_nent = nent;
34740Sstevel@tonic-gate 		head->lio_refcnt = nent;
34751885Sraf 		head->lio_port = -1;
34761885Sraf 		head->lio_portkev = NULL;
34771885Sraf 		if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
34781885Sraf 		    sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
34790Sstevel@tonic-gate 			sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
34800Sstevel@tonic-gate 			if (sqp == NULL) {
34810Sstevel@tonic-gate 				error = EAGAIN;
34820Sstevel@tonic-gate 				goto done;
34830Sstevel@tonic-gate 			}
34840Sstevel@tonic-gate 			sqp->sq_func = NULL;
34850Sstevel@tonic-gate 			sqp->sq_next = NULL;
34860Sstevel@tonic-gate 			sqp->sq_info.si_code = SI_ASYNCIO;
34870Sstevel@tonic-gate 			sqp->sq_info.si_pid = curproc->p_pid;
34880Sstevel@tonic-gate 			sqp->sq_info.si_ctid = PRCTID(curproc);
34890Sstevel@tonic-gate 			sqp->sq_info.si_zoneid = getzoneid();
34900Sstevel@tonic-gate 			sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
34911885Sraf 			sqp->sq_info.si_signo = sigevk.sigev_signo;
34920Sstevel@tonic-gate 			sqp->sq_info.si_value.sival_int =
34931885Sraf 			    sigevk.sigev_value.sival_int;
34940Sstevel@tonic-gate 			head->lio_sigqp = sqp;
34950Sstevel@tonic-gate 		} else {
34960Sstevel@tonic-gate 			head->lio_sigqp = NULL;
34970Sstevel@tonic-gate 		}
34981885Sraf 		if (pkevtp) {
34991885Sraf 			/*
35001885Sraf 			 * Prepare data to send when list of aiocb's has
35011885Sraf 			 * completed.
35021885Sraf 			 */
35031885Sraf 			port_init_event(pkevtp, (uintptr_t)sigev,
35041885Sraf 			    (void *)(uintptr_t)pnotify.portnfy_user,
35051885Sraf 			    NULL, head);
35061885Sraf 			pkevtp->portkev_events = AIOLIO;
35071885Sraf 			head->lio_portkev = pkevtp;
35081885Sraf 			head->lio_port = pnotify.portnfy_port;
35091885Sraf 		}
35100Sstevel@tonic-gate 	}
35110Sstevel@tonic-gate 
35120Sstevel@tonic-gate 	for (i = 0; i < nent; i++, ucbp++) {
35130Sstevel@tonic-gate 
35140Sstevel@tonic-gate 		/* skip entry if it can't be copied. */
35150Sstevel@tonic-gate #ifdef	_LP64
35160Sstevel@tonic-gate 		cbp = (aiocb32_t *)(uintptr_t)*ucbp;
35171885Sraf 		if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
35180Sstevel@tonic-gate #else
35190Sstevel@tonic-gate 		cbp = (aiocb_t *)*ucbp;
35201885Sraf 		if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
35210Sstevel@tonic-gate #endif
35221885Sraf 		{
35230Sstevel@tonic-gate 			if (head) {
35240Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35250Sstevel@tonic-gate 				head->lio_nent--;
35260Sstevel@tonic-gate 				head->lio_refcnt--;
35270Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35280Sstevel@tonic-gate 			}
35290Sstevel@tonic-gate 			continue;
35300Sstevel@tonic-gate 		}
35310Sstevel@tonic-gate #ifdef	_LP64
35320Sstevel@tonic-gate 		/*
35330Sstevel@tonic-gate 		 * copy 32 bit structure into 64 bit structure
35340Sstevel@tonic-gate 		 */
35350Sstevel@tonic-gate 		aiocb_32ton(aiocb32, aiocb);
35360Sstevel@tonic-gate #endif /* _LP64 */
35370Sstevel@tonic-gate 
35380Sstevel@tonic-gate 		/* skip if opcode for aiocb is LIO_NOP */
35390Sstevel@tonic-gate 		mode = aiocb->aio_lio_opcode;
35400Sstevel@tonic-gate 		if (mode == LIO_NOP) {
35410Sstevel@tonic-gate 			cbp = NULL;
35420Sstevel@tonic-gate 			if (head) {
35430Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35440Sstevel@tonic-gate 				head->lio_nent--;
35450Sstevel@tonic-gate 				head->lio_refcnt--;
35460Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35470Sstevel@tonic-gate 			}
35480Sstevel@tonic-gate 			continue;
35490Sstevel@tonic-gate 		}
35500Sstevel@tonic-gate 
35510Sstevel@tonic-gate 		/* increment file descriptor's ref count. */
35520Sstevel@tonic-gate 		if ((fp = getf(aiocb->aio_fildes)) == NULL) {
35530Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35540Sstevel@tonic-gate 			if (head) {
35550Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35560Sstevel@tonic-gate 				head->lio_nent--;
35570Sstevel@tonic-gate 				head->lio_refcnt--;
35580Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35590Sstevel@tonic-gate 			}
35600Sstevel@tonic-gate 			aio_errors++;
35610Sstevel@tonic-gate 			continue;
35620Sstevel@tonic-gate 		}
35630Sstevel@tonic-gate 
35640Sstevel@tonic-gate 		/*
35650Sstevel@tonic-gate 		 * check the permission of the partition
35660Sstevel@tonic-gate 		 */
35670Sstevel@tonic-gate 		if ((fp->f_flag & mode) == 0) {
35680Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
35690Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, EBADF);
35700Sstevel@tonic-gate 			if (head) {
35710Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
35720Sstevel@tonic-gate 				head->lio_nent--;
35730Sstevel@tonic-gate 				head->lio_refcnt--;
35740Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
35750Sstevel@tonic-gate 			}
35760Sstevel@tonic-gate 			aio_errors++;
35770Sstevel@tonic-gate 			continue;
35780Sstevel@tonic-gate 		}
35790Sstevel@tonic-gate 
35800Sstevel@tonic-gate 		/*
35810Sstevel@tonic-gate 		 * common case where requests are to the same fd
35820Sstevel@tonic-gate 		 * for the same r/w operation
35830Sstevel@tonic-gate 		 * for UFS, need to set EBADFD
35840Sstevel@tonic-gate 		 */
35851885Sraf 		vp = fp->f_vnode;
35861885Sraf 		if (fp != prev_fp || mode != prev_mode) {
35870Sstevel@tonic-gate 			aio_func = check_vp(vp, mode);
35880Sstevel@tonic-gate 			if (aio_func == NULL) {
35890Sstevel@tonic-gate 				prev_fp = NULL;
35900Sstevel@tonic-gate 				releasef(aiocb->aio_fildes);
35911885Sraf 				lio_set_uerror(&cbp->aio_resultp, EBADFD);
35920Sstevel@tonic-gate 				aio_notsupported++;
35930Sstevel@tonic-gate 				if (head) {
35940Sstevel@tonic-gate 					mutex_enter(&aiop->aio_mutex);
35950Sstevel@tonic-gate 					head->lio_nent--;
35960Sstevel@tonic-gate 					head->lio_refcnt--;
35970Sstevel@tonic-gate 					mutex_exit(&aiop->aio_mutex);
35980Sstevel@tonic-gate 				}
35990Sstevel@tonic-gate 				continue;
36000Sstevel@tonic-gate 			} else {
36010Sstevel@tonic-gate 				prev_fp = fp;
36020Sstevel@tonic-gate 				prev_mode = mode;
36030Sstevel@tonic-gate 			}
36040Sstevel@tonic-gate 		}
36051885Sraf 
36061885Sraf 		error = aio_req_setup(&reqp, aiop, aiocb,
36071885Sraf 		    (aio_result_t *)&cbp->aio_resultp, vp);
36081885Sraf 		if (error) {
36090Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
36100Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
36110Sstevel@tonic-gate 			if (head) {
36120Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
36130Sstevel@tonic-gate 				head->lio_nent--;
36140Sstevel@tonic-gate 				head->lio_refcnt--;
36150Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
36160Sstevel@tonic-gate 			}
36170Sstevel@tonic-gate 			aio_errors++;
36180Sstevel@tonic-gate 			continue;
36190Sstevel@tonic-gate 		}
36200Sstevel@tonic-gate 
36210Sstevel@tonic-gate 		reqp->aio_req_lio = head;
36220Sstevel@tonic-gate 		deadhead = 0;
36230Sstevel@tonic-gate 
36240Sstevel@tonic-gate 		/*
36250Sstevel@tonic-gate 		 * Set the errno field now before sending the request to
36260Sstevel@tonic-gate 		 * the driver to avoid a race condition
36270Sstevel@tonic-gate 		 */
36280Sstevel@tonic-gate 		(void) suword32(&cbp->aio_resultp.aio_errno,
36290Sstevel@tonic-gate 		    EINPROGRESS);
36300Sstevel@tonic-gate 
36311885Sraf 		reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
36321885Sraf 
36331885Sraf 		event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
36341885Sraf 		aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
36351885Sraf 		aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
36361885Sraf 		if (aio_port | aio_thread) {
36371885Sraf 			port_kevent_t *lpkevp;
36381885Sraf 			/*
36391885Sraf 			 * Prepare data to send with each aiocb completed.
36401885Sraf 			 */
36410Sstevel@tonic-gate #ifdef _LP64
36421885Sraf 			if (aio_port) {
36431885Sraf 				void *paddr = (void  *)(uintptr_t)
36441885Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
36451885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
36461885Sraf 					error = EFAULT;
36471885Sraf 			} else {	/* aio_thread */
36481885Sraf 				pnotify.portnfy_port =
36491885Sraf 				    aiocb32->aio_sigevent.sigev_signo;
36501885Sraf 				pnotify.portnfy_user =
36511885Sraf 				    aiocb32->aio_sigevent.sigev_value.sival_ptr;
36521885Sraf 			}
36530Sstevel@tonic-gate #else
36541885Sraf 			if (aio_port) {
36551885Sraf 				void *paddr =
36561885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
36571885Sraf 				if (copyin(paddr, &pnotify, sizeof (pnotify)))
36581885Sraf 					error = EFAULT;
36591885Sraf 			} else {	/* aio_thread */
36601885Sraf 				pnotify.portnfy_port =
36611885Sraf 				    aiocb->aio_sigevent.sigev_signo;
36621885Sraf 				pnotify.portnfy_user =
36631885Sraf 				    aiocb->aio_sigevent.sigev_value.sival_ptr;
36641885Sraf 			}
36650Sstevel@tonic-gate #endif
36661885Sraf 			if (error)
36671885Sraf 				/* EMPTY */;
36681885Sraf 			else if (pkevtp != NULL &&
36691885Sraf 			    pnotify.portnfy_port == lio_head_port)
36701885Sraf 				error = port_dup_event(pkevtp, &lpkevp,
36711885Sraf 				    PORT_ALLOC_DEFAULT);
36721885Sraf 			else
36731885Sraf 				error = port_alloc_event(pnotify.portnfy_port,
36741885Sraf 				    PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
36751885Sraf 				    &lpkevp);
36761885Sraf 			if (error == 0) {
36771885Sraf 				port_init_event(lpkevp, (uintptr_t)cbp,
36781885Sraf 				    (void *)(uintptr_t)pnotify.portnfy_user,
36791885Sraf 				    aio_port_callback, reqp);
36801885Sraf 				lpkevp->portkev_events = event;
36811885Sraf 				reqp->aio_req_portkev = lpkevp;
36821885Sraf 				reqp->aio_req_port = pnotify.portnfy_port;
36831885Sraf 			}
36840Sstevel@tonic-gate 		}
36850Sstevel@tonic-gate 
36860Sstevel@tonic-gate 		/*
36870Sstevel@tonic-gate 		 * send the request to driver.
36880Sstevel@tonic-gate 		 */
36890Sstevel@tonic-gate 		if (error == 0) {
36900Sstevel@tonic-gate 			if (aiocb->aio_nbytes == 0) {
36910Sstevel@tonic-gate 				clear_active_fd(aiocb->aio_fildes);
36920Sstevel@tonic-gate 				aio_zerolen(reqp);
36930Sstevel@tonic-gate 				continue;
36940Sstevel@tonic-gate 			}
36950Sstevel@tonic-gate 			error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
36960Sstevel@tonic-gate 			    CRED());
36970Sstevel@tonic-gate 		}
36980Sstevel@tonic-gate 
36990Sstevel@tonic-gate 		/*
37000Sstevel@tonic-gate 		 * the fd's ref count is not decremented until the IO has
37010Sstevel@tonic-gate 		 * completed unless there was an error.
37020Sstevel@tonic-gate 		 */
37030Sstevel@tonic-gate 		if (error) {
37040Sstevel@tonic-gate 			releasef(aiocb->aio_fildes);
37050Sstevel@tonic-gate 			lio_set_uerror(&cbp->aio_resultp, error);
37060Sstevel@tonic-gate 			if (head) {
37070Sstevel@tonic-gate 				mutex_enter(&aiop->aio_mutex);
37080Sstevel@tonic-gate 				head->lio_nent--;
37090Sstevel@tonic-gate 				head->lio_refcnt--;
37100Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37110Sstevel@tonic-gate 			}
37120Sstevel@tonic-gate 			if (error == ENOTSUP)
37130Sstevel@tonic-gate 				aio_notsupported++;
37140Sstevel@tonic-gate 			else
37150Sstevel@tonic-gate 				aio_errors++;
37164502Spraks 			lio_set_error(reqp, portused);
37170Sstevel@tonic-gate 		} else {
37180Sstevel@tonic-gate 			clear_active_fd(aiocb->aio_fildes);
37190Sstevel@tonic-gate 		}
37200Sstevel@tonic-gate 	}
37210Sstevel@tonic-gate 
37220Sstevel@tonic-gate 	if (aio_notsupported) {
37230Sstevel@tonic-gate 		error = ENOTSUP;
37240Sstevel@tonic-gate 	} else if (aio_errors) {
37250Sstevel@tonic-gate 		/*
37260Sstevel@tonic-gate 		 * return EIO if any request failed
37270Sstevel@tonic-gate 		 */
37280Sstevel@tonic-gate 		error = EIO;
37290Sstevel@tonic-gate 	}
37300Sstevel@tonic-gate 
37310Sstevel@tonic-gate 	if (mode_arg == LIO_WAIT) {
37320Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
37330Sstevel@tonic-gate 		while (head->lio_refcnt > 0) {
37340Sstevel@tonic-gate 			if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
37350Sstevel@tonic-gate 				mutex_exit(&aiop->aio_mutex);
37360Sstevel@tonic-gate 				error = EINTR;
37370Sstevel@tonic-gate 				goto done;
37380Sstevel@tonic-gate 			}
37390Sstevel@tonic-gate 		}
37400Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
37410Sstevel@tonic-gate 		alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
37420Sstevel@tonic-gate 	}
37430Sstevel@tonic-gate 
37440Sstevel@tonic-gate done:
37450Sstevel@tonic-gate 	kmem_free(cbplist, ssize);
37460Sstevel@tonic-gate 	if (deadhead) {
37470Sstevel@tonic-gate 		if (head->lio_sigqp)
37480Sstevel@tonic-gate 			kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
37491885Sraf 		if (head->lio_portkev)
37501885Sraf 			port_free_event(head->lio_portkev);
37510Sstevel@tonic-gate 		kmem_free(head, sizeof (aio_lio_t));
37520Sstevel@tonic-gate 	}
37530Sstevel@tonic-gate 	return (error);
37540Sstevel@tonic-gate }
37550Sstevel@tonic-gate 
37560Sstevel@tonic-gate 
37570Sstevel@tonic-gate #ifdef  _SYSCALL32_IMPL
37580Sstevel@tonic-gate void
37590Sstevel@tonic-gate aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
37600Sstevel@tonic-gate {
37610Sstevel@tonic-gate 	dest->aio_fildes = src->aio_fildes;
37620Sstevel@tonic-gate 	dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
37630Sstevel@tonic-gate 	dest->aio_nbytes = (size_t)src->aio_nbytes;
37640Sstevel@tonic-gate 	dest->aio_offset = (off_t)src->aio_offset;
37650Sstevel@tonic-gate 	dest->aio_reqprio = src->aio_reqprio;
37660Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
37670Sstevel@tonic-gate 	dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
37680Sstevel@tonic-gate 
37690Sstevel@tonic-gate 	/*
37700Sstevel@tonic-gate 	 * See comment in sigqueue32() on handling of 32-bit
37710Sstevel@tonic-gate 	 * sigvals in a 64-bit kernel.
37720Sstevel@tonic-gate 	 */
37730Sstevel@tonic-gate 	dest->aio_sigevent.sigev_value.sival_int =
37740Sstevel@tonic-gate 	    (int)src->aio_sigevent.sigev_value.sival_int;
37750Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
37760Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_function;
37770Sstevel@tonic-gate 	dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
37780Sstevel@tonic-gate 	    (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
37790Sstevel@tonic-gate 	dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
37800Sstevel@tonic-gate 	dest->aio_lio_opcode = src->aio_lio_opcode;
37810Sstevel@tonic-gate 	dest->aio_state = src->aio_state;
37820Sstevel@tonic-gate 	dest->aio__pad[0] = src->aio__pad[0];
37830Sstevel@tonic-gate }
37840Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
37850Sstevel@tonic-gate 
37860Sstevel@tonic-gate /*
37870Sstevel@tonic-gate  * aio_port_callback() is called just before the event is retrieved from the
37880Sstevel@tonic-gate  * port. The task of this callback function is to finish the work of the
37890Sstevel@tonic-gate  * transaction for the application, it means :
37900Sstevel@tonic-gate  * - copyout transaction data to the application
37910Sstevel@tonic-gate  *	(this thread is running in the right process context)
37920Sstevel@tonic-gate  * - keep trace of the transaction (update of counters).
37930Sstevel@tonic-gate  * - free allocated buffers
37940Sstevel@tonic-gate  * The aiocb pointer is the object element of the port_kevent_t structure.
37950Sstevel@tonic-gate  *
37960Sstevel@tonic-gate  * flag :
37970Sstevel@tonic-gate  *	PORT_CALLBACK_DEFAULT : do copyout and free resources
37980Sstevel@tonic-gate  *	PORT_CALLBACK_CLOSE   : don't do copyout, free resources
37990Sstevel@tonic-gate  */
38000Sstevel@tonic-gate 
38010Sstevel@tonic-gate /*ARGSUSED*/
38020Sstevel@tonic-gate int
38030Sstevel@tonic-gate aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
38040Sstevel@tonic-gate {
38050Sstevel@tonic-gate 	aio_t		*aiop = curproc->p_aio;
38060Sstevel@tonic-gate 	aio_req_t	*reqp = arg;
38070Sstevel@tonic-gate 	struct	iovec	*iov;
38080Sstevel@tonic-gate 	struct	buf	*bp;
38090Sstevel@tonic-gate 	void		*resultp;
38100Sstevel@tonic-gate 
38110Sstevel@tonic-gate 	if (pid != curproc->p_pid) {
38120Sstevel@tonic-gate 		/* wrong proc !!, can not deliver data here ... */
38130Sstevel@tonic-gate 		return (EACCES);
38140Sstevel@tonic-gate 	}
38150Sstevel@tonic-gate 
38160Sstevel@tonic-gate 	mutex_enter(&aiop->aio_portq_mutex);
38170Sstevel@tonic-gate 	reqp->aio_req_portkev = NULL;
38180Sstevel@tonic-gate 	aio_req_remove_portq(aiop, reqp); /* remove request from portq */
38190Sstevel@tonic-gate 	mutex_exit(&aiop->aio_portq_mutex);
38200Sstevel@tonic-gate 	aphysio_unlock(reqp);		/* unlock used pages */
38210Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
38220Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
38230Sstevel@tonic-gate 		aio_req_free_port(aiop, reqp);	/* back to free list */
38240Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
38250Sstevel@tonic-gate 		return (0);
38260Sstevel@tonic-gate 	}
38270Sstevel@tonic-gate 
38280Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
38290Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
38300Sstevel@tonic-gate 	resultp = (void *)reqp->aio_req_resultp;
38310Sstevel@tonic-gate 	aio_req_free_port(aiop, reqp);	/* request struct back to free list */
38320Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
38330Sstevel@tonic-gate 	if (flag == PORT_CALLBACK_DEFAULT)
38340Sstevel@tonic-gate 		aio_copyout_result_port(iov, bp, resultp);
38350Sstevel@tonic-gate 	return (0);
38360Sstevel@tonic-gate }
3837