10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51885Sraf * Common Development and Distribution License (the "License").
61885Sraf * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
211885Sraf
220Sstevel@tonic-gate /*
238519SVamsi.Krishna@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
240Sstevel@tonic-gate * Use is subject to license terms.
250Sstevel@tonic-gate */
260Sstevel@tonic-gate
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate * Kernel asynchronous I/O.
290Sstevel@tonic-gate * This is only for raw devices now (as of Nov. 1993).
300Sstevel@tonic-gate */
310Sstevel@tonic-gate
320Sstevel@tonic-gate #include <sys/types.h>
330Sstevel@tonic-gate #include <sys/errno.h>
340Sstevel@tonic-gate #include <sys/conf.h>
350Sstevel@tonic-gate #include <sys/file.h>
360Sstevel@tonic-gate #include <sys/fs/snode.h>
370Sstevel@tonic-gate #include <sys/unistd.h>
380Sstevel@tonic-gate #include <sys/cmn_err.h>
390Sstevel@tonic-gate #include <vm/as.h>
400Sstevel@tonic-gate #include <vm/faultcode.h>
410Sstevel@tonic-gate #include <sys/sysmacros.h>
420Sstevel@tonic-gate #include <sys/procfs.h>
430Sstevel@tonic-gate #include <sys/kmem.h>
440Sstevel@tonic-gate #include <sys/autoconf.h>
450Sstevel@tonic-gate #include <sys/ddi_impldefs.h>
460Sstevel@tonic-gate #include <sys/sunddi.h>
470Sstevel@tonic-gate #include <sys/aio_impl.h>
480Sstevel@tonic-gate #include <sys/debug.h>
490Sstevel@tonic-gate #include <sys/param.h>
500Sstevel@tonic-gate #include <sys/systm.h>
510Sstevel@tonic-gate #include <sys/vmsystm.h>
520Sstevel@tonic-gate #include <sys/fs/pxfs_ki.h>
530Sstevel@tonic-gate #include <sys/contract/process_impl.h>
540Sstevel@tonic-gate
550Sstevel@tonic-gate /*
560Sstevel@tonic-gate * external entry point.
570Sstevel@tonic-gate */
580Sstevel@tonic-gate #ifdef _LP64
590Sstevel@tonic-gate static int64_t kaioc(long, long, long, long, long, long);
600Sstevel@tonic-gate #endif
610Sstevel@tonic-gate static int kaio(ulong_t *, rval_t *);
620Sstevel@tonic-gate
630Sstevel@tonic-gate
640Sstevel@tonic-gate #define AIO_64 0
650Sstevel@tonic-gate #define AIO_32 1
660Sstevel@tonic-gate #define AIO_LARGEFILE 2
670Sstevel@tonic-gate
680Sstevel@tonic-gate /*
690Sstevel@tonic-gate * implementation specific functions (private)
700Sstevel@tonic-gate */
710Sstevel@tonic-gate #ifdef _LP64
721885Sraf static int alio(int, aiocb_t **, int, struct sigevent *);
730Sstevel@tonic-gate #endif
740Sstevel@tonic-gate static int aionotify(void);
750Sstevel@tonic-gate static int aioinit(void);
760Sstevel@tonic-gate static int aiostart(void);
770Sstevel@tonic-gate static void alio_cleanup(aio_t *, aiocb_t **, int, int);
780Sstevel@tonic-gate static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
790Sstevel@tonic-gate cred_t *);
804502Spraks static void lio_set_error(aio_req_t *, int portused);
810Sstevel@tonic-gate static aio_t *aio_aiop_alloc();
820Sstevel@tonic-gate static int aio_req_alloc(aio_req_t **, aio_result_t *);
830Sstevel@tonic-gate static int aio_lio_alloc(aio_lio_t **);
840Sstevel@tonic-gate static aio_req_t *aio_req_done(void *);
850Sstevel@tonic-gate static aio_req_t *aio_req_remove(aio_req_t *);
860Sstevel@tonic-gate static int aio_req_find(aio_result_t *, aio_req_t **);
870Sstevel@tonic-gate static int aio_hash_insert(struct aio_req_t *, aio_t *);
880Sstevel@tonic-gate static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
89*10719SRoger.Faulkner@Sun.COM aio_result_t *, vnode_t *, int);
900Sstevel@tonic-gate static int aio_cleanup_thread(aio_t *);
910Sstevel@tonic-gate static aio_lio_t *aio_list_get(aio_result_t *);
920Sstevel@tonic-gate static void lio_set_uerror(void *, int);
930Sstevel@tonic-gate extern void aio_zerolen(aio_req_t *);
940Sstevel@tonic-gate static int aiowait(struct timeval *, int, long *);
950Sstevel@tonic-gate static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
960Sstevel@tonic-gate static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
970Sstevel@tonic-gate aio_req_t *reqlist, aio_t *aiop, model_t model);
980Sstevel@tonic-gate static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
990Sstevel@tonic-gate static int aiosuspend(void *, int, struct timespec *, int,
1000Sstevel@tonic-gate long *, int);
1010Sstevel@tonic-gate static int aliowait(int, void *, int, void *, int);
1020Sstevel@tonic-gate static int aioerror(void *, int);
1030Sstevel@tonic-gate static int aio_cancel(int, void *, long *, int);
1040Sstevel@tonic-gate static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
1050Sstevel@tonic-gate static int aiorw(int, void *, int, int);
1060Sstevel@tonic-gate
1070Sstevel@tonic-gate static int alioLF(int, void *, int, void *);
1081885Sraf static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
109*10719SRoger.Faulkner@Sun.COM aio_result_t *, vnode_t *, int);
1100Sstevel@tonic-gate static int alio32(int, void *, int, void *);
1110Sstevel@tonic-gate static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1120Sstevel@tonic-gate static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
1130Sstevel@tonic-gate
1140Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1150Sstevel@tonic-gate static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
1160Sstevel@tonic-gate void aiocb_32ton(aiocb32_t *, aiocb_t *);
1170Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
1180Sstevel@tonic-gate
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate * implementation specific functions (external)
1210Sstevel@tonic-gate */
1220Sstevel@tonic-gate void aio_req_free(aio_t *, aio_req_t *);
1230Sstevel@tonic-gate
1240Sstevel@tonic-gate /*
1250Sstevel@tonic-gate * Event Port framework
1260Sstevel@tonic-gate */
1270Sstevel@tonic-gate
1280Sstevel@tonic-gate void aio_req_free_port(aio_t *, aio_req_t *);
1290Sstevel@tonic-gate static int aio_port_callback(void *, int *, pid_t, int, void *);
1300Sstevel@tonic-gate
1310Sstevel@tonic-gate /*
1320Sstevel@tonic-gate * This is the loadable module wrapper.
1330Sstevel@tonic-gate */
1340Sstevel@tonic-gate #include <sys/modctl.h>
1350Sstevel@tonic-gate #include <sys/syscall.h>
1360Sstevel@tonic-gate
1370Sstevel@tonic-gate #ifdef _LP64
1380Sstevel@tonic-gate
1390Sstevel@tonic-gate static struct sysent kaio_sysent = {
1400Sstevel@tonic-gate 6,
1410Sstevel@tonic-gate SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
1420Sstevel@tonic-gate (int (*)())kaioc
1430Sstevel@tonic-gate };
1440Sstevel@tonic-gate
1450Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1460Sstevel@tonic-gate static struct sysent kaio_sysent32 = {
1470Sstevel@tonic-gate 7,
1480Sstevel@tonic-gate SE_NOUNLOAD | SE_64RVAL,
1490Sstevel@tonic-gate kaio
1500Sstevel@tonic-gate };
1510Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
1520Sstevel@tonic-gate
1530Sstevel@tonic-gate #else /* _LP64 */
1540Sstevel@tonic-gate
1550Sstevel@tonic-gate static struct sysent kaio_sysent = {
1560Sstevel@tonic-gate 7,
1570Sstevel@tonic-gate SE_NOUNLOAD | SE_32RVAL1,
1580Sstevel@tonic-gate kaio
1590Sstevel@tonic-gate };
1600Sstevel@tonic-gate
1610Sstevel@tonic-gate #endif /* _LP64 */
1620Sstevel@tonic-gate
1630Sstevel@tonic-gate /*
1640Sstevel@tonic-gate * Module linkage information for the kernel.
1650Sstevel@tonic-gate */
1660Sstevel@tonic-gate
1670Sstevel@tonic-gate static struct modlsys modlsys = {
1680Sstevel@tonic-gate &mod_syscallops,
1690Sstevel@tonic-gate "kernel Async I/O",
1700Sstevel@tonic-gate &kaio_sysent
1710Sstevel@tonic-gate };
1720Sstevel@tonic-gate
1730Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1740Sstevel@tonic-gate static struct modlsys modlsys32 = {
1750Sstevel@tonic-gate &mod_syscallops32,
1760Sstevel@tonic-gate "kernel Async I/O for 32 bit compatibility",
1770Sstevel@tonic-gate &kaio_sysent32
1780Sstevel@tonic-gate };
1790Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
1800Sstevel@tonic-gate
1810Sstevel@tonic-gate
1820Sstevel@tonic-gate static struct modlinkage modlinkage = {
1830Sstevel@tonic-gate MODREV_1,
1840Sstevel@tonic-gate &modlsys,
1850Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
1860Sstevel@tonic-gate &modlsys32,
1870Sstevel@tonic-gate #endif
1880Sstevel@tonic-gate NULL
1890Sstevel@tonic-gate };
1900Sstevel@tonic-gate
1910Sstevel@tonic-gate int
_init(void)1920Sstevel@tonic-gate _init(void)
1930Sstevel@tonic-gate {
1940Sstevel@tonic-gate int retval;
1950Sstevel@tonic-gate
1960Sstevel@tonic-gate if ((retval = mod_install(&modlinkage)) != 0)
1970Sstevel@tonic-gate return (retval);
1980Sstevel@tonic-gate
1990Sstevel@tonic-gate return (0);
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate
2020Sstevel@tonic-gate int
_fini(void)2030Sstevel@tonic-gate _fini(void)
2040Sstevel@tonic-gate {
2050Sstevel@tonic-gate int retval;
2060Sstevel@tonic-gate
2070Sstevel@tonic-gate retval = mod_remove(&modlinkage);
2080Sstevel@tonic-gate
2090Sstevel@tonic-gate return (retval);
2100Sstevel@tonic-gate }
2110Sstevel@tonic-gate
2120Sstevel@tonic-gate int
_info(struct modinfo * modinfop)2130Sstevel@tonic-gate _info(struct modinfo *modinfop)
2140Sstevel@tonic-gate {
2150Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop));
2160Sstevel@tonic-gate }
2170Sstevel@tonic-gate
2180Sstevel@tonic-gate #ifdef _LP64
2190Sstevel@tonic-gate static int64_t
kaioc(long a0,long a1,long a2,long a3,long a4,long a5)2200Sstevel@tonic-gate kaioc(
2210Sstevel@tonic-gate long a0,
2220Sstevel@tonic-gate long a1,
2230Sstevel@tonic-gate long a2,
2240Sstevel@tonic-gate long a3,
2250Sstevel@tonic-gate long a4,
2260Sstevel@tonic-gate long a5)
2270Sstevel@tonic-gate {
2280Sstevel@tonic-gate int error;
2290Sstevel@tonic-gate long rval = 0;
2300Sstevel@tonic-gate
2310Sstevel@tonic-gate switch ((int)a0 & ~AIO_POLL_BIT) {
2320Sstevel@tonic-gate case AIOREAD:
2330Sstevel@tonic-gate error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2340Sstevel@tonic-gate (offset_t)a4, (aio_result_t *)a5, FREAD);
2350Sstevel@tonic-gate break;
2360Sstevel@tonic-gate case AIOWRITE:
2370Sstevel@tonic-gate error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
2380Sstevel@tonic-gate (offset_t)a4, (aio_result_t *)a5, FWRITE);
2390Sstevel@tonic-gate break;
2400Sstevel@tonic-gate case AIOWAIT:
2410Sstevel@tonic-gate error = aiowait((struct timeval *)a1, (int)a2, &rval);
2420Sstevel@tonic-gate break;
2430Sstevel@tonic-gate case AIOWAITN:
2440Sstevel@tonic-gate error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
2450Sstevel@tonic-gate (timespec_t *)a4);
2460Sstevel@tonic-gate break;
2470Sstevel@tonic-gate case AIONOTIFY:
2480Sstevel@tonic-gate error = aionotify();
2490Sstevel@tonic-gate break;
2500Sstevel@tonic-gate case AIOINIT:
2510Sstevel@tonic-gate error = aioinit();
2520Sstevel@tonic-gate break;
2530Sstevel@tonic-gate case AIOSTART:
2540Sstevel@tonic-gate error = aiostart();
2550Sstevel@tonic-gate break;
2560Sstevel@tonic-gate case AIOLIO:
2571885Sraf error = alio((int)a1, (aiocb_t **)a2, (int)a3,
2580Sstevel@tonic-gate (struct sigevent *)a4);
2590Sstevel@tonic-gate break;
2600Sstevel@tonic-gate case AIOLIOWAIT:
2610Sstevel@tonic-gate error = aliowait((int)a1, (void *)a2, (int)a3,
2620Sstevel@tonic-gate (struct sigevent *)a4, AIO_64);
2630Sstevel@tonic-gate break;
2640Sstevel@tonic-gate case AIOSUSPEND:
2650Sstevel@tonic-gate error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
2660Sstevel@tonic-gate (int)a4, &rval, AIO_64);
2670Sstevel@tonic-gate break;
2680Sstevel@tonic-gate case AIOERROR:
2690Sstevel@tonic-gate error = aioerror((void *)a1, AIO_64);
2700Sstevel@tonic-gate break;
2710Sstevel@tonic-gate case AIOAREAD:
2720Sstevel@tonic-gate error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
2730Sstevel@tonic-gate break;
2740Sstevel@tonic-gate case AIOAWRITE:
2750Sstevel@tonic-gate error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
2760Sstevel@tonic-gate break;
2770Sstevel@tonic-gate case AIOCANCEL:
2780Sstevel@tonic-gate error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
2790Sstevel@tonic-gate break;
2800Sstevel@tonic-gate
2810Sstevel@tonic-gate /*
2820Sstevel@tonic-gate * The large file related stuff is valid only for
2830Sstevel@tonic-gate * 32 bit kernel and not for 64 bit kernel
2840Sstevel@tonic-gate * On 64 bit kernel we convert large file calls
2850Sstevel@tonic-gate * to regular 64bit calls.
2860Sstevel@tonic-gate */
2870Sstevel@tonic-gate
2880Sstevel@tonic-gate default:
2890Sstevel@tonic-gate error = EINVAL;
2900Sstevel@tonic-gate }
2910Sstevel@tonic-gate if (error)
2920Sstevel@tonic-gate return ((int64_t)set_errno(error));
2930Sstevel@tonic-gate return (rval);
2940Sstevel@tonic-gate }
2950Sstevel@tonic-gate #endif
2960Sstevel@tonic-gate
2970Sstevel@tonic-gate static int
kaio(ulong_t * uap,rval_t * rvp)2980Sstevel@tonic-gate kaio(
2990Sstevel@tonic-gate ulong_t *uap,
3000Sstevel@tonic-gate rval_t *rvp)
3010Sstevel@tonic-gate {
3020Sstevel@tonic-gate long rval = 0;
3030Sstevel@tonic-gate int error = 0;
3040Sstevel@tonic-gate offset_t off;
3050Sstevel@tonic-gate
3060Sstevel@tonic-gate
3070Sstevel@tonic-gate rvp->r_vals = 0;
3080Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN)
3090Sstevel@tonic-gate off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
3100Sstevel@tonic-gate #else
3110Sstevel@tonic-gate off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
3120Sstevel@tonic-gate #endif
3130Sstevel@tonic-gate
3140Sstevel@tonic-gate switch (uap[0] & ~AIO_POLL_BIT) {
3150Sstevel@tonic-gate /*
3160Sstevel@tonic-gate * It must be the 32 bit system call on 64 bit kernel
3170Sstevel@tonic-gate */
3180Sstevel@tonic-gate case AIOREAD:
3190Sstevel@tonic-gate return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3200Sstevel@tonic-gate (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
3210Sstevel@tonic-gate case AIOWRITE:
3220Sstevel@tonic-gate return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
3230Sstevel@tonic-gate (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
3240Sstevel@tonic-gate case AIOWAIT:
3250Sstevel@tonic-gate error = aiowait((struct timeval *)uap[1], (int)uap[2],
3260Sstevel@tonic-gate &rval);
3270Sstevel@tonic-gate break;
3280Sstevel@tonic-gate case AIOWAITN:
3290Sstevel@tonic-gate error = aiowaitn((void *)uap[1], (uint_t)uap[2],
3300Sstevel@tonic-gate (uint_t *)uap[3], (timespec_t *)uap[4]);
3310Sstevel@tonic-gate break;
3320Sstevel@tonic-gate case AIONOTIFY:
3330Sstevel@tonic-gate return (aionotify());
3340Sstevel@tonic-gate case AIOINIT:
3350Sstevel@tonic-gate return (aioinit());
3360Sstevel@tonic-gate case AIOSTART:
3370Sstevel@tonic-gate return (aiostart());
3380Sstevel@tonic-gate case AIOLIO:
3390Sstevel@tonic-gate return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
3400Sstevel@tonic-gate (void *)uap[4]));
3410Sstevel@tonic-gate case AIOLIOWAIT:
3420Sstevel@tonic-gate return (aliowait((int)uap[1], (void *)uap[2],
3430Sstevel@tonic-gate (int)uap[3], (struct sigevent *)uap[4], AIO_32));
3440Sstevel@tonic-gate case AIOSUSPEND:
3450Sstevel@tonic-gate error = aiosuspend((void *)uap[1], (int)uap[2],
3460Sstevel@tonic-gate (timespec_t *)uap[3], (int)uap[4],
3470Sstevel@tonic-gate &rval, AIO_32);
3480Sstevel@tonic-gate break;
3490Sstevel@tonic-gate case AIOERROR:
3500Sstevel@tonic-gate return (aioerror((void *)uap[1], AIO_32));
3510Sstevel@tonic-gate case AIOAREAD:
3520Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1],
3530Sstevel@tonic-gate FREAD, AIO_32));
3540Sstevel@tonic-gate case AIOAWRITE:
3550Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1],
3560Sstevel@tonic-gate FWRITE, AIO_32));
3570Sstevel@tonic-gate case AIOCANCEL:
3580Sstevel@tonic-gate error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
3590Sstevel@tonic-gate AIO_32));
3600Sstevel@tonic-gate break;
3610Sstevel@tonic-gate case AIOLIO64:
3620Sstevel@tonic-gate return (alioLF((int)uap[1], (void *)uap[2],
3630Sstevel@tonic-gate (int)uap[3], (void *)uap[4]));
3640Sstevel@tonic-gate case AIOLIOWAIT64:
3650Sstevel@tonic-gate return (aliowait(uap[1], (void *)uap[2],
3660Sstevel@tonic-gate (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
3670Sstevel@tonic-gate case AIOSUSPEND64:
3680Sstevel@tonic-gate error = aiosuspend((void *)uap[1], (int)uap[2],
3690Sstevel@tonic-gate (timespec_t *)uap[3], (int)uap[4], &rval,
3700Sstevel@tonic-gate AIO_LARGEFILE);
3710Sstevel@tonic-gate break;
3720Sstevel@tonic-gate case AIOERROR64:
3730Sstevel@tonic-gate return (aioerror((void *)uap[1], AIO_LARGEFILE));
3740Sstevel@tonic-gate case AIOAREAD64:
3750Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], FREAD,
3760Sstevel@tonic-gate AIO_LARGEFILE));
3770Sstevel@tonic-gate case AIOAWRITE64:
3780Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
3790Sstevel@tonic-gate AIO_LARGEFILE));
3800Sstevel@tonic-gate case AIOCANCEL64:
3810Sstevel@tonic-gate error = (aio_cancel((int)uap[1], (void *)uap[2],
3820Sstevel@tonic-gate &rval, AIO_LARGEFILE));
3830Sstevel@tonic-gate break;
3840Sstevel@tonic-gate default:
3850Sstevel@tonic-gate return (EINVAL);
3860Sstevel@tonic-gate }
3870Sstevel@tonic-gate
3880Sstevel@tonic-gate rvp->r_val1 = rval;
3890Sstevel@tonic-gate return (error);
3900Sstevel@tonic-gate }
3910Sstevel@tonic-gate
3920Sstevel@tonic-gate /*
3930Sstevel@tonic-gate * wake up LWPs in this process that are sleeping in
3940Sstevel@tonic-gate * aiowait().
3950Sstevel@tonic-gate */
3960Sstevel@tonic-gate static int
aionotify(void)3970Sstevel@tonic-gate aionotify(void)
3980Sstevel@tonic-gate {
3990Sstevel@tonic-gate aio_t *aiop;
4000Sstevel@tonic-gate
4010Sstevel@tonic-gate aiop = curproc->p_aio;
4020Sstevel@tonic-gate if (aiop == NULL)
4030Sstevel@tonic-gate return (0);
4040Sstevel@tonic-gate
4050Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
4060Sstevel@tonic-gate aiop->aio_notifycnt++;
4070Sstevel@tonic-gate cv_broadcast(&aiop->aio_waitcv);
4080Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
4090Sstevel@tonic-gate
4100Sstevel@tonic-gate return (0);
4110Sstevel@tonic-gate }
4120Sstevel@tonic-gate
4130Sstevel@tonic-gate static int
timeval2reltime(struct timeval * timout,timestruc_t * rqtime,timestruc_t ** rqtp,int * blocking)4140Sstevel@tonic-gate timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
4150Sstevel@tonic-gate timestruc_t **rqtp, int *blocking)
4160Sstevel@tonic-gate {
4170Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
4180Sstevel@tonic-gate struct timeval32 wait_time_32;
4190Sstevel@tonic-gate #endif
4200Sstevel@tonic-gate struct timeval wait_time;
4210Sstevel@tonic-gate model_t model = get_udatamodel();
4220Sstevel@tonic-gate
4230Sstevel@tonic-gate *rqtp = NULL;
4240Sstevel@tonic-gate if (timout == NULL) { /* wait indefinitely */
4250Sstevel@tonic-gate *blocking = 1;
4260Sstevel@tonic-gate return (0);
4270Sstevel@tonic-gate }
4280Sstevel@tonic-gate
4290Sstevel@tonic-gate /*
4300Sstevel@tonic-gate * Need to correctly compare with the -1 passed in for a user
4310Sstevel@tonic-gate * address pointer, with both 32 bit and 64 bit apps.
4320Sstevel@tonic-gate */
4330Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) {
4340Sstevel@tonic-gate if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */
4350Sstevel@tonic-gate *blocking = 0;
4360Sstevel@tonic-gate return (0);
4370Sstevel@tonic-gate }
4380Sstevel@tonic-gate
4390Sstevel@tonic-gate if (copyin(timout, &wait_time, sizeof (wait_time)))
4400Sstevel@tonic-gate return (EFAULT);
4410Sstevel@tonic-gate }
4420Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
4430Sstevel@tonic-gate else {
4440Sstevel@tonic-gate /*
4450Sstevel@tonic-gate * -1 from a 32bit app. It will not get sign extended.
4460Sstevel@tonic-gate * don't wait if -1.
4470Sstevel@tonic-gate */
4480Sstevel@tonic-gate if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
4490Sstevel@tonic-gate *blocking = 0;
4500Sstevel@tonic-gate return (0);
4510Sstevel@tonic-gate }
4520Sstevel@tonic-gate
4530Sstevel@tonic-gate if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
4540Sstevel@tonic-gate return (EFAULT);
4550Sstevel@tonic-gate TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
4560Sstevel@tonic-gate }
4570Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
4580Sstevel@tonic-gate
4590Sstevel@tonic-gate if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */
4600Sstevel@tonic-gate *blocking = 0;
4610Sstevel@tonic-gate return (0);
4620Sstevel@tonic-gate }
4630Sstevel@tonic-gate
4640Sstevel@tonic-gate if (wait_time.tv_sec < 0 ||
4650Sstevel@tonic-gate wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
4660Sstevel@tonic-gate return (EINVAL);
4670Sstevel@tonic-gate
4680Sstevel@tonic-gate rqtime->tv_sec = wait_time.tv_sec;
4690Sstevel@tonic-gate rqtime->tv_nsec = wait_time.tv_usec * 1000;
4700Sstevel@tonic-gate *rqtp = rqtime;
4710Sstevel@tonic-gate *blocking = 1;
4720Sstevel@tonic-gate
4730Sstevel@tonic-gate return (0);
4740Sstevel@tonic-gate }
4750Sstevel@tonic-gate
4760Sstevel@tonic-gate static int
timespec2reltime(timespec_t * timout,timestruc_t * rqtime,timestruc_t ** rqtp,int * blocking)4770Sstevel@tonic-gate timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
4780Sstevel@tonic-gate timestruc_t **rqtp, int *blocking)
4790Sstevel@tonic-gate {
4800Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
4810Sstevel@tonic-gate timespec32_t wait_time_32;
4820Sstevel@tonic-gate #endif
4830Sstevel@tonic-gate model_t model = get_udatamodel();
4840Sstevel@tonic-gate
4850Sstevel@tonic-gate *rqtp = NULL;
4860Sstevel@tonic-gate if (timout == NULL) {
4870Sstevel@tonic-gate *blocking = 1;
4880Sstevel@tonic-gate return (0);
4890Sstevel@tonic-gate }
4900Sstevel@tonic-gate
4910Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) {
4920Sstevel@tonic-gate if (copyin(timout, rqtime, sizeof (*rqtime)))
4930Sstevel@tonic-gate return (EFAULT);
4940Sstevel@tonic-gate }
4950Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
4960Sstevel@tonic-gate else {
4970Sstevel@tonic-gate if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
4980Sstevel@tonic-gate return (EFAULT);
4990Sstevel@tonic-gate TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
5000Sstevel@tonic-gate }
5010Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
5020Sstevel@tonic-gate
5030Sstevel@tonic-gate if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
5040Sstevel@tonic-gate *blocking = 0;
5050Sstevel@tonic-gate return (0);
5060Sstevel@tonic-gate }
5070Sstevel@tonic-gate
5080Sstevel@tonic-gate if (rqtime->tv_sec < 0 ||
5090Sstevel@tonic-gate rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
5100Sstevel@tonic-gate return (EINVAL);
5110Sstevel@tonic-gate
5120Sstevel@tonic-gate *rqtp = rqtime;
5130Sstevel@tonic-gate *blocking = 1;
5140Sstevel@tonic-gate
5150Sstevel@tonic-gate return (0);
5160Sstevel@tonic-gate }
5170Sstevel@tonic-gate
5180Sstevel@tonic-gate /*ARGSUSED*/
5190Sstevel@tonic-gate static int
aiowait(struct timeval * timout,int dontblockflg,long * rval)5200Sstevel@tonic-gate aiowait(
5210Sstevel@tonic-gate struct timeval *timout,
5220Sstevel@tonic-gate int dontblockflg,
5230Sstevel@tonic-gate long *rval)
5240Sstevel@tonic-gate {
5250Sstevel@tonic-gate int error;
5260Sstevel@tonic-gate aio_t *aiop;
5270Sstevel@tonic-gate aio_req_t *reqp;
5280Sstevel@tonic-gate clock_t status;
5290Sstevel@tonic-gate int blocking;
5304123Sdm120769 int timecheck;
5310Sstevel@tonic-gate timestruc_t rqtime;
5320Sstevel@tonic-gate timestruc_t *rqtp;
5330Sstevel@tonic-gate
5340Sstevel@tonic-gate aiop = curproc->p_aio;
5350Sstevel@tonic-gate if (aiop == NULL)
5360Sstevel@tonic-gate return (EINVAL);
5370Sstevel@tonic-gate
5380Sstevel@tonic-gate /*
5390Sstevel@tonic-gate * Establish the absolute future time for the timeout.
5400Sstevel@tonic-gate */
5410Sstevel@tonic-gate error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
5420Sstevel@tonic-gate if (error)
5430Sstevel@tonic-gate return (error);
5440Sstevel@tonic-gate if (rqtp) {
5450Sstevel@tonic-gate timestruc_t now;
5464123Sdm120769 timecheck = timechanged;
5470Sstevel@tonic-gate gethrestime(&now);
5480Sstevel@tonic-gate timespecadd(rqtp, &now);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate
5510Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
5520Sstevel@tonic-gate for (;;) {
5530Sstevel@tonic-gate /* process requests on poll queue */
5540Sstevel@tonic-gate if (aiop->aio_pollq) {
5550Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
5560Sstevel@tonic-gate aio_cleanup(0);
5570Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
5580Sstevel@tonic-gate }
5590Sstevel@tonic-gate if ((reqp = aio_req_remove(NULL)) != NULL) {
5600Sstevel@tonic-gate *rval = (long)reqp->aio_req_resultp;
5610Sstevel@tonic-gate break;
5620Sstevel@tonic-gate }
5630Sstevel@tonic-gate /* user-level done queue might not be empty */
5640Sstevel@tonic-gate if (aiop->aio_notifycnt > 0) {
5650Sstevel@tonic-gate aiop->aio_notifycnt--;
5660Sstevel@tonic-gate *rval = 1;
5670Sstevel@tonic-gate break;
5680Sstevel@tonic-gate }
5690Sstevel@tonic-gate /* don't block if no outstanding aio */
5700Sstevel@tonic-gate if (aiop->aio_outstanding == 0 && dontblockflg) {
5710Sstevel@tonic-gate error = EINVAL;
5720Sstevel@tonic-gate break;
5730Sstevel@tonic-gate }
5740Sstevel@tonic-gate if (blocking) {
5750Sstevel@tonic-gate status = cv_waituntil_sig(&aiop->aio_waitcv,
5764123Sdm120769 &aiop->aio_mutex, rqtp, timecheck);
5770Sstevel@tonic-gate
5780Sstevel@tonic-gate if (status > 0) /* check done queue again */
5790Sstevel@tonic-gate continue;
5800Sstevel@tonic-gate if (status == 0) { /* interrupted by a signal */
5810Sstevel@tonic-gate error = EINTR;
5820Sstevel@tonic-gate *rval = -1;
5830Sstevel@tonic-gate } else { /* timer expired */
5840Sstevel@tonic-gate error = ETIME;
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate }
5870Sstevel@tonic-gate break;
5880Sstevel@tonic-gate }
5890Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
5900Sstevel@tonic-gate if (reqp) {
5910Sstevel@tonic-gate aphysio_unlock(reqp);
5920Sstevel@tonic-gate aio_copyout_result(reqp);
5930Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
5940Sstevel@tonic-gate aio_req_free(aiop, reqp);
5950Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
5960Sstevel@tonic-gate }
5970Sstevel@tonic-gate return (error);
5980Sstevel@tonic-gate }
5990Sstevel@tonic-gate
6000Sstevel@tonic-gate /*
6010Sstevel@tonic-gate * aiowaitn can be used to reap completed asynchronous requests submitted with
6020Sstevel@tonic-gate * lio_listio, aio_read or aio_write.
6030Sstevel@tonic-gate * This function only reaps asynchronous raw I/Os.
6040Sstevel@tonic-gate */
6050Sstevel@tonic-gate
6060Sstevel@tonic-gate /*ARGSUSED*/
6070Sstevel@tonic-gate static int
aiowaitn(void * uiocb,uint_t nent,uint_t * nwait,timespec_t * timout)6080Sstevel@tonic-gate aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
6090Sstevel@tonic-gate {
6100Sstevel@tonic-gate int error = 0;
6110Sstevel@tonic-gate aio_t *aiop;
6120Sstevel@tonic-gate aio_req_t *reqlist = NULL;
6130Sstevel@tonic-gate caddr_t iocblist = NULL; /* array of iocb ptr's */
6140Sstevel@tonic-gate uint_t waitcnt, cnt = 0; /* iocb cnt */
6150Sstevel@tonic-gate size_t iocbsz; /* users iocb size */
6160Sstevel@tonic-gate size_t riocbsz; /* returned iocb size */
6170Sstevel@tonic-gate int iocb_index = 0;
6180Sstevel@tonic-gate model_t model = get_udatamodel();
6190Sstevel@tonic-gate int blocking = 1;
6204123Sdm120769 int timecheck;
6210Sstevel@tonic-gate timestruc_t rqtime;
6220Sstevel@tonic-gate timestruc_t *rqtp;
6230Sstevel@tonic-gate
6240Sstevel@tonic-gate aiop = curproc->p_aio;
6258519SVamsi.Krishna@Sun.COM if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX)
6268519SVamsi.Krishna@Sun.COM return (EINVAL);
6278519SVamsi.Krishna@Sun.COM
6288519SVamsi.Krishna@Sun.COM if (aiop->aio_outstanding == 0)
6290Sstevel@tonic-gate return (EAGAIN);
6300Sstevel@tonic-gate
6310Sstevel@tonic-gate if (copyin(nwait, &waitcnt, sizeof (uint_t)))
6320Sstevel@tonic-gate return (EFAULT);
6330Sstevel@tonic-gate
6340Sstevel@tonic-gate /* set *nwait to zero, if we must return prematurely */
6350Sstevel@tonic-gate if (copyout(&cnt, nwait, sizeof (uint_t)))
6360Sstevel@tonic-gate return (EFAULT);
6370Sstevel@tonic-gate
6380Sstevel@tonic-gate if (waitcnt == 0) {
6390Sstevel@tonic-gate blocking = 0;
6400Sstevel@tonic-gate rqtp = NULL;
6410Sstevel@tonic-gate waitcnt = nent;
6420Sstevel@tonic-gate } else {
6430Sstevel@tonic-gate error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
6440Sstevel@tonic-gate if (error)
6450Sstevel@tonic-gate return (error);
6460Sstevel@tonic-gate }
6470Sstevel@tonic-gate
6480Sstevel@tonic-gate if (model == DATAMODEL_NATIVE)
6490Sstevel@tonic-gate iocbsz = (sizeof (aiocb_t *) * nent);
6500Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
6510Sstevel@tonic-gate else
6520Sstevel@tonic-gate iocbsz = (sizeof (caddr32_t) * nent);
6530Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
6540Sstevel@tonic-gate
6550Sstevel@tonic-gate /*
6560Sstevel@tonic-gate * Only one aio_waitn call is allowed at a time.
6570Sstevel@tonic-gate * The active aio_waitn will collect all requests
6580Sstevel@tonic-gate * out of the "done" list and if necessary it will wait
6590Sstevel@tonic-gate * for some/all pending requests to fulfill the nwait
6600Sstevel@tonic-gate * parameter.
6610Sstevel@tonic-gate * A second or further aio_waitn calls will sleep here
6620Sstevel@tonic-gate * until the active aio_waitn finishes and leaves the kernel
6630Sstevel@tonic-gate * If the second call does not block (poll), then return
6640Sstevel@tonic-gate * immediately with the error code : EAGAIN.
6650Sstevel@tonic-gate * If the second call should block, then sleep here, but
6660Sstevel@tonic-gate * do not touch the timeout. The timeout starts when this
6670Sstevel@tonic-gate * aio_waitn-call becomes active.
6680Sstevel@tonic-gate */
6690Sstevel@tonic-gate
6700Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
6710Sstevel@tonic-gate
6720Sstevel@tonic-gate while (aiop->aio_flags & AIO_WAITN) {
6730Sstevel@tonic-gate if (blocking == 0) {
6740Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
6750Sstevel@tonic-gate return (EAGAIN);
6760Sstevel@tonic-gate }
6770Sstevel@tonic-gate
6780Sstevel@tonic-gate /* block, no timeout */
6790Sstevel@tonic-gate aiop->aio_flags |= AIO_WAITN_PENDING;
6800Sstevel@tonic-gate if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
6810Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
6820Sstevel@tonic-gate return (EINTR);
6830Sstevel@tonic-gate }
6840Sstevel@tonic-gate }
6850Sstevel@tonic-gate
6860Sstevel@tonic-gate /*
6870Sstevel@tonic-gate * Establish the absolute future time for the timeout.
6880Sstevel@tonic-gate */
6890Sstevel@tonic-gate if (rqtp) {
6900Sstevel@tonic-gate timestruc_t now;
6914123Sdm120769 timecheck = timechanged;
6920Sstevel@tonic-gate gethrestime(&now);
6930Sstevel@tonic-gate timespecadd(rqtp, &now);
6940Sstevel@tonic-gate }
6950Sstevel@tonic-gate
6960Sstevel@tonic-gate if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
6970Sstevel@tonic-gate kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
6980Sstevel@tonic-gate aiop->aio_iocb = NULL;
6990Sstevel@tonic-gate }
7000Sstevel@tonic-gate
7010Sstevel@tonic-gate if (aiop->aio_iocb == NULL) {
7020Sstevel@tonic-gate iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
7030Sstevel@tonic-gate if (iocblist == NULL) {
7040Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
7050Sstevel@tonic-gate return (ENOMEM);
7060Sstevel@tonic-gate }
7070Sstevel@tonic-gate aiop->aio_iocb = (aiocb_t **)iocblist;
7080Sstevel@tonic-gate aiop->aio_iocbsz = iocbsz;
7090Sstevel@tonic-gate } else {
7100Sstevel@tonic-gate iocblist = (char *)aiop->aio_iocb;
7110Sstevel@tonic-gate }
7120Sstevel@tonic-gate
7130Sstevel@tonic-gate aiop->aio_waitncnt = waitcnt;
7140Sstevel@tonic-gate aiop->aio_flags |= AIO_WAITN;
7150Sstevel@tonic-gate
7160Sstevel@tonic-gate for (;;) {
7170Sstevel@tonic-gate /* push requests on poll queue to done queue */
7180Sstevel@tonic-gate if (aiop->aio_pollq) {
7190Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
7200Sstevel@tonic-gate aio_cleanup(0);
7210Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
7220Sstevel@tonic-gate }
7230Sstevel@tonic-gate
7240Sstevel@tonic-gate /* check for requests on done queue */
7250Sstevel@tonic-gate if (aiop->aio_doneq) {
7260Sstevel@tonic-gate cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
7270Sstevel@tonic-gate aiop->aio_waitncnt = waitcnt - cnt;
7280Sstevel@tonic-gate }
7290Sstevel@tonic-gate
7300Sstevel@tonic-gate /* user-level done queue might not be empty */
7310Sstevel@tonic-gate if (aiop->aio_notifycnt > 0) {
7320Sstevel@tonic-gate aiop->aio_notifycnt--;
7330Sstevel@tonic-gate error = 0;
7340Sstevel@tonic-gate break;
7350Sstevel@tonic-gate }
7360Sstevel@tonic-gate
7370Sstevel@tonic-gate /*
7380Sstevel@tonic-gate * if we are here second time as a result of timer
7390Sstevel@tonic-gate * expiration, we reset error if there are enough
7400Sstevel@tonic-gate * aiocb's to satisfy request.
7410Sstevel@tonic-gate * We return also if all requests are already done
7420Sstevel@tonic-gate * and we picked up the whole done queue.
7430Sstevel@tonic-gate */
7440Sstevel@tonic-gate
7450Sstevel@tonic-gate if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
7460Sstevel@tonic-gate aiop->aio_doneq == NULL)) {
7470Sstevel@tonic-gate error = 0;
7480Sstevel@tonic-gate break;
7490Sstevel@tonic-gate }
7500Sstevel@tonic-gate
7510Sstevel@tonic-gate if ((cnt < waitcnt) && blocking) {
7520Sstevel@tonic-gate int rval = cv_waituntil_sig(&aiop->aio_waitcv,
7534502Spraks &aiop->aio_mutex, rqtp, timecheck);
7540Sstevel@tonic-gate if (rval > 0)
7550Sstevel@tonic-gate continue;
7560Sstevel@tonic-gate if (rval < 0) {
7570Sstevel@tonic-gate error = ETIME;
7580Sstevel@tonic-gate blocking = 0;
7590Sstevel@tonic-gate continue;
7600Sstevel@tonic-gate }
7610Sstevel@tonic-gate error = EINTR;
7620Sstevel@tonic-gate }
7630Sstevel@tonic-gate break;
7640Sstevel@tonic-gate }
7650Sstevel@tonic-gate
7660Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
7670Sstevel@tonic-gate
7680Sstevel@tonic-gate if (cnt > 0) {
7690Sstevel@tonic-gate
7700Sstevel@tonic-gate iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
7710Sstevel@tonic-gate aiop, model);
7720Sstevel@tonic-gate
7730Sstevel@tonic-gate if (model == DATAMODEL_NATIVE)
7740Sstevel@tonic-gate riocbsz = (sizeof (aiocb_t *) * cnt);
7750Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
7760Sstevel@tonic-gate else
7770Sstevel@tonic-gate riocbsz = (sizeof (caddr32_t) * cnt);
7780Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
7790Sstevel@tonic-gate
7800Sstevel@tonic-gate if (copyout(iocblist, uiocb, riocbsz) ||
7810Sstevel@tonic-gate copyout(&cnt, nwait, sizeof (uint_t)))
7820Sstevel@tonic-gate error = EFAULT;
7830Sstevel@tonic-gate }
7840Sstevel@tonic-gate
7850Sstevel@tonic-gate /* check if there is another thread waiting for execution */
7860Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
7870Sstevel@tonic-gate aiop->aio_flags &= ~AIO_WAITN;
7880Sstevel@tonic-gate if (aiop->aio_flags & AIO_WAITN_PENDING) {
7890Sstevel@tonic-gate aiop->aio_flags &= ~AIO_WAITN_PENDING;
7900Sstevel@tonic-gate cv_signal(&aiop->aio_waitncv);
7910Sstevel@tonic-gate }
7920Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
7930Sstevel@tonic-gate
7940Sstevel@tonic-gate return (error);
7950Sstevel@tonic-gate }
7960Sstevel@tonic-gate
7970Sstevel@tonic-gate /*
7980Sstevel@tonic-gate * aio_unlock_requests
7990Sstevel@tonic-gate * copyouts the result of the request as well as the return value.
8000Sstevel@tonic-gate * It builds the list of completed asynchronous requests,
8010Sstevel@tonic-gate * unlocks the allocated memory ranges and
8020Sstevel@tonic-gate * put the aio request structure back into the free list.
8030Sstevel@tonic-gate */
8040Sstevel@tonic-gate
8050Sstevel@tonic-gate static int
aio_unlock_requests(caddr_t iocblist,int iocb_index,aio_req_t * reqlist,aio_t * aiop,model_t model)8060Sstevel@tonic-gate aio_unlock_requests(
8070Sstevel@tonic-gate caddr_t iocblist,
8080Sstevel@tonic-gate int iocb_index,
8090Sstevel@tonic-gate aio_req_t *reqlist,
8100Sstevel@tonic-gate aio_t *aiop,
8110Sstevel@tonic-gate model_t model)
8120Sstevel@tonic-gate {
8130Sstevel@tonic-gate aio_req_t *reqp, *nreqp;
8140Sstevel@tonic-gate
8150Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) {
8160Sstevel@tonic-gate for (reqp = reqlist; reqp != NULL; reqp = nreqp) {
8170Sstevel@tonic-gate (((caddr_t *)iocblist)[iocb_index++]) =
8180Sstevel@tonic-gate reqp->aio_req_iocb.iocb;
8190Sstevel@tonic-gate nreqp = reqp->aio_req_next;
8200Sstevel@tonic-gate aphysio_unlock(reqp);
8210Sstevel@tonic-gate aio_copyout_result(reqp);
8220Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
8230Sstevel@tonic-gate aio_req_free(aiop, reqp);
8240Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
8250Sstevel@tonic-gate }
8260Sstevel@tonic-gate }
8270Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
8280Sstevel@tonic-gate else {
8290Sstevel@tonic-gate for (reqp = reqlist; reqp != NULL; reqp = nreqp) {
8300Sstevel@tonic-gate ((caddr32_t *)iocblist)[iocb_index++] =
8310Sstevel@tonic-gate reqp->aio_req_iocb.iocb32;
8320Sstevel@tonic-gate nreqp = reqp->aio_req_next;
8330Sstevel@tonic-gate aphysio_unlock(reqp);
8340Sstevel@tonic-gate aio_copyout_result(reqp);
8350Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
8360Sstevel@tonic-gate aio_req_free(aiop, reqp);
8370Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
8380Sstevel@tonic-gate }
8390Sstevel@tonic-gate }
8400Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
8410Sstevel@tonic-gate return (iocb_index);
8420Sstevel@tonic-gate }
8430Sstevel@tonic-gate
8440Sstevel@tonic-gate /*
8450Sstevel@tonic-gate * aio_reqlist_concat
8460Sstevel@tonic-gate * moves "max" elements from the done queue to the reqlist queue and removes
8470Sstevel@tonic-gate * the AIO_DONEQ flag.
8480Sstevel@tonic-gate * - reqlist queue is a simple linked list
8490Sstevel@tonic-gate * - done queue is a double linked list
8500Sstevel@tonic-gate */
8510Sstevel@tonic-gate
8520Sstevel@tonic-gate static int
aio_reqlist_concat(aio_t * aiop,aio_req_t ** reqlist,int max)8530Sstevel@tonic-gate aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate aio_req_t *q2, *q2work, *list;
8560Sstevel@tonic-gate int count = 0;
8570Sstevel@tonic-gate
8580Sstevel@tonic-gate list = *reqlist;
8590Sstevel@tonic-gate q2 = aiop->aio_doneq;
8600Sstevel@tonic-gate q2work = q2;
8610Sstevel@tonic-gate while (max-- > 0) {
8620Sstevel@tonic-gate q2work->aio_req_flags &= ~AIO_DONEQ;
8630Sstevel@tonic-gate q2work = q2work->aio_req_next;
8640Sstevel@tonic-gate count++;
8650Sstevel@tonic-gate if (q2work == q2)
8660Sstevel@tonic-gate break;
8670Sstevel@tonic-gate }
8680Sstevel@tonic-gate
8690Sstevel@tonic-gate if (q2work == q2) {
8700Sstevel@tonic-gate /* all elements revised */
8710Sstevel@tonic-gate q2->aio_req_prev->aio_req_next = list;
8720Sstevel@tonic-gate list = q2;
8730Sstevel@tonic-gate aiop->aio_doneq = NULL;
8740Sstevel@tonic-gate } else {
8750Sstevel@tonic-gate /*
8760Sstevel@tonic-gate * max < elements in the doneq
8770Sstevel@tonic-gate * detach only the required amount of elements
8780Sstevel@tonic-gate * out of the doneq
8790Sstevel@tonic-gate */
8800Sstevel@tonic-gate q2work->aio_req_prev->aio_req_next = list;
8810Sstevel@tonic-gate list = q2;
8820Sstevel@tonic-gate
8830Sstevel@tonic-gate aiop->aio_doneq = q2work;
8840Sstevel@tonic-gate q2work->aio_req_prev = q2->aio_req_prev;
8850Sstevel@tonic-gate q2->aio_req_prev->aio_req_next = q2work;
8860Sstevel@tonic-gate }
8870Sstevel@tonic-gate *reqlist = list;
8880Sstevel@tonic-gate return (count);
8890Sstevel@tonic-gate }
8900Sstevel@tonic-gate
8910Sstevel@tonic-gate /*ARGSUSED*/
8920Sstevel@tonic-gate static int
aiosuspend(void * aiocb,int nent,struct timespec * timout,int flag,long * rval,int run_mode)8930Sstevel@tonic-gate aiosuspend(
8940Sstevel@tonic-gate void *aiocb,
8950Sstevel@tonic-gate int nent,
8960Sstevel@tonic-gate struct timespec *timout,
8970Sstevel@tonic-gate int flag,
8980Sstevel@tonic-gate long *rval,
8990Sstevel@tonic-gate int run_mode)
9000Sstevel@tonic-gate {
9010Sstevel@tonic-gate int error;
9020Sstevel@tonic-gate aio_t *aiop;
9030Sstevel@tonic-gate aio_req_t *reqp, *found, *next;
9040Sstevel@tonic-gate caddr_t cbplist = NULL;
9050Sstevel@tonic-gate aiocb_t *cbp, **ucbp;
9060Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
9070Sstevel@tonic-gate aiocb32_t *cbp32;
9080Sstevel@tonic-gate caddr32_t *ucbp32;
9090Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
9100Sstevel@tonic-gate aiocb64_32_t *cbp64;
9110Sstevel@tonic-gate int rv;
9120Sstevel@tonic-gate int i;
9130Sstevel@tonic-gate size_t ssize;
9140Sstevel@tonic-gate model_t model = get_udatamodel();
9150Sstevel@tonic-gate int blocking;
9164123Sdm120769 int timecheck;
9170Sstevel@tonic-gate timestruc_t rqtime;
9180Sstevel@tonic-gate timestruc_t *rqtp;
9190Sstevel@tonic-gate
9200Sstevel@tonic-gate aiop = curproc->p_aio;
9218519SVamsi.Krishna@Sun.COM if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
9220Sstevel@tonic-gate return (EINVAL);
9230Sstevel@tonic-gate
9240Sstevel@tonic-gate /*
9250Sstevel@tonic-gate * Establish the absolute future time for the timeout.
9260Sstevel@tonic-gate */
9270Sstevel@tonic-gate error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
9280Sstevel@tonic-gate if (error)
9290Sstevel@tonic-gate return (error);
9300Sstevel@tonic-gate if (rqtp) {
9310Sstevel@tonic-gate timestruc_t now;
9324123Sdm120769 timecheck = timechanged;
9330Sstevel@tonic-gate gethrestime(&now);
9340Sstevel@tonic-gate timespecadd(rqtp, &now);
9350Sstevel@tonic-gate }
9360Sstevel@tonic-gate
9370Sstevel@tonic-gate /*
9380Sstevel@tonic-gate * If we are not blocking and there's no IO complete
9390Sstevel@tonic-gate * skip aiocb copyin.
9400Sstevel@tonic-gate */
9410Sstevel@tonic-gate if (!blocking && (aiop->aio_pollq == NULL) &&
9420Sstevel@tonic-gate (aiop->aio_doneq == NULL)) {
9430Sstevel@tonic-gate return (EAGAIN);
9440Sstevel@tonic-gate }
9450Sstevel@tonic-gate
9460Sstevel@tonic-gate if (model == DATAMODEL_NATIVE)
9470Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent);
9480Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
9490Sstevel@tonic-gate else
9500Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent);
9510Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
9520Sstevel@tonic-gate
9530Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_NOSLEEP);
9540Sstevel@tonic-gate if (cbplist == NULL)
9550Sstevel@tonic-gate return (ENOMEM);
9560Sstevel@tonic-gate
9570Sstevel@tonic-gate if (copyin(aiocb, cbplist, ssize)) {
9580Sstevel@tonic-gate error = EFAULT;
9590Sstevel@tonic-gate goto done;
9600Sstevel@tonic-gate }
9610Sstevel@tonic-gate
9620Sstevel@tonic-gate found = NULL;
9630Sstevel@tonic-gate /*
9640Sstevel@tonic-gate * we need to get the aio_cleanupq_mutex since we call
9650Sstevel@tonic-gate * aio_req_done().
9660Sstevel@tonic-gate */
9670Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex);
9680Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
9690Sstevel@tonic-gate for (;;) {
9700Sstevel@tonic-gate /* push requests on poll queue to done queue */
9710Sstevel@tonic-gate if (aiop->aio_pollq) {
9720Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
9730Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex);
9740Sstevel@tonic-gate aio_cleanup(0);
9750Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex);
9760Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
9770Sstevel@tonic-gate }
9780Sstevel@tonic-gate /* check for requests on done queue */
9790Sstevel@tonic-gate if (aiop->aio_doneq) {
9800Sstevel@tonic-gate if (model == DATAMODEL_NATIVE)
9810Sstevel@tonic-gate ucbp = (aiocb_t **)cbplist;
9820Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
9830Sstevel@tonic-gate else
9840Sstevel@tonic-gate ucbp32 = (caddr32_t *)cbplist;
9850Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
9860Sstevel@tonic-gate for (i = 0; i < nent; i++) {
9870Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) {
9880Sstevel@tonic-gate if ((cbp = *ucbp++) == NULL)
9890Sstevel@tonic-gate continue;
9900Sstevel@tonic-gate if (run_mode != AIO_LARGEFILE)
9910Sstevel@tonic-gate reqp = aio_req_done(
9920Sstevel@tonic-gate &cbp->aio_resultp);
9930Sstevel@tonic-gate else {
9940Sstevel@tonic-gate cbp64 = (aiocb64_32_t *)cbp;
9950Sstevel@tonic-gate reqp = aio_req_done(
9960Sstevel@tonic-gate &cbp64->aio_resultp);
9970Sstevel@tonic-gate }
9980Sstevel@tonic-gate }
9990Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
10000Sstevel@tonic-gate else {
10010Sstevel@tonic-gate if (run_mode == AIO_32) {
10020Sstevel@tonic-gate if ((cbp32 =
10030Sstevel@tonic-gate (aiocb32_t *)(uintptr_t)
10040Sstevel@tonic-gate *ucbp32++) == NULL)
10050Sstevel@tonic-gate continue;
10060Sstevel@tonic-gate reqp = aio_req_done(
10070Sstevel@tonic-gate &cbp32->aio_resultp);
10080Sstevel@tonic-gate } else if (run_mode == AIO_LARGEFILE) {
10090Sstevel@tonic-gate if ((cbp64 =
10100Sstevel@tonic-gate (aiocb64_32_t *)(uintptr_t)
10110Sstevel@tonic-gate *ucbp32++) == NULL)
10120Sstevel@tonic-gate continue;
10134502Spraks reqp = aio_req_done(
10144502Spraks &cbp64->aio_resultp);
10150Sstevel@tonic-gate }
10160Sstevel@tonic-gate
10170Sstevel@tonic-gate }
10180Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
10190Sstevel@tonic-gate if (reqp) {
10200Sstevel@tonic-gate reqp->aio_req_next = found;
10210Sstevel@tonic-gate found = reqp;
10220Sstevel@tonic-gate }
10230Sstevel@tonic-gate if (aiop->aio_doneq == NULL)
10240Sstevel@tonic-gate break;
10250Sstevel@tonic-gate }
10260Sstevel@tonic-gate if (found)
10270Sstevel@tonic-gate break;
10280Sstevel@tonic-gate }
10290Sstevel@tonic-gate if (aiop->aio_notifycnt > 0) {
10300Sstevel@tonic-gate /*
10310Sstevel@tonic-gate * nothing on the kernel's queue. the user
10320Sstevel@tonic-gate * has notified the kernel that it has items
10330Sstevel@tonic-gate * on a user-level queue.
10340Sstevel@tonic-gate */
10350Sstevel@tonic-gate aiop->aio_notifycnt--;
10360Sstevel@tonic-gate *rval = 1;
10370Sstevel@tonic-gate error = 0;
10380Sstevel@tonic-gate break;
10390Sstevel@tonic-gate }
10400Sstevel@tonic-gate /* don't block if nothing is outstanding */
10410Sstevel@tonic-gate if (aiop->aio_outstanding == 0) {
10420Sstevel@tonic-gate error = EAGAIN;
10430Sstevel@tonic-gate break;
10440Sstevel@tonic-gate }
10450Sstevel@tonic-gate if (blocking) {
10460Sstevel@tonic-gate /*
10470Sstevel@tonic-gate * drop the aio_cleanupq_mutex as we are
10480Sstevel@tonic-gate * going to block.
10490Sstevel@tonic-gate */
10500Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex);
10510Sstevel@tonic-gate rv = cv_waituntil_sig(&aiop->aio_waitcv,
10524502Spraks &aiop->aio_mutex, rqtp, timecheck);
10530Sstevel@tonic-gate /*
10540Sstevel@tonic-gate * we have to drop aio_mutex and
10550Sstevel@tonic-gate * grab it in the right order.
10560Sstevel@tonic-gate */
10570Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
10580Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex);
10590Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
10600Sstevel@tonic-gate if (rv > 0) /* check done queue again */
10610Sstevel@tonic-gate continue;
10620Sstevel@tonic-gate if (rv == 0) /* interrupted by a signal */
10630Sstevel@tonic-gate error = EINTR;
10640Sstevel@tonic-gate else /* timer expired */
10650Sstevel@tonic-gate error = ETIME;
10660Sstevel@tonic-gate } else {
10670Sstevel@tonic-gate error = EAGAIN;
10680Sstevel@tonic-gate }
10690Sstevel@tonic-gate break;
10700Sstevel@tonic-gate }
10710Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
10720Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex);
10730Sstevel@tonic-gate for (reqp = found; reqp != NULL; reqp = next) {
10740Sstevel@tonic-gate next = reqp->aio_req_next;
10750Sstevel@tonic-gate aphysio_unlock(reqp);
10760Sstevel@tonic-gate aio_copyout_result(reqp);
10770Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
10780Sstevel@tonic-gate aio_req_free(aiop, reqp);
10790Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
10800Sstevel@tonic-gate }
10810Sstevel@tonic-gate done:
10820Sstevel@tonic-gate kmem_free(cbplist, ssize);
10830Sstevel@tonic-gate return (error);
10840Sstevel@tonic-gate }
10850Sstevel@tonic-gate
10860Sstevel@tonic-gate /*
10870Sstevel@tonic-gate * initialize aio by allocating an aio_t struct for this
10880Sstevel@tonic-gate * process.
10890Sstevel@tonic-gate */
10900Sstevel@tonic-gate static int
aioinit(void)10910Sstevel@tonic-gate aioinit(void)
10920Sstevel@tonic-gate {
10930Sstevel@tonic-gate proc_t *p = curproc;
10940Sstevel@tonic-gate aio_t *aiop;
10950Sstevel@tonic-gate mutex_enter(&p->p_lock);
10960Sstevel@tonic-gate if ((aiop = p->p_aio) == NULL) {
10970Sstevel@tonic-gate aiop = aio_aiop_alloc();
10980Sstevel@tonic-gate p->p_aio = aiop;
10990Sstevel@tonic-gate }
11000Sstevel@tonic-gate mutex_exit(&p->p_lock);
11010Sstevel@tonic-gate if (aiop == NULL)
11020Sstevel@tonic-gate return (ENOMEM);
11030Sstevel@tonic-gate return (0);
11040Sstevel@tonic-gate }
11050Sstevel@tonic-gate
11060Sstevel@tonic-gate /*
11070Sstevel@tonic-gate * start a special thread that will cleanup after aio requests
11080Sstevel@tonic-gate * that are preventing a segment from being unmapped. as_unmap()
11090Sstevel@tonic-gate * blocks until all phsyio to this segment is completed. this
11100Sstevel@tonic-gate * doesn't happen until all the pages in this segment are not
11110Sstevel@tonic-gate * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
11120Sstevel@tonic-gate * requests still outstanding. this special thread will make sure
11130Sstevel@tonic-gate * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
11140Sstevel@tonic-gate *
11150Sstevel@tonic-gate * this function will return an error if the process has only
11160Sstevel@tonic-gate * one LWP. the assumption is that the caller is a separate LWP
11170Sstevel@tonic-gate * that remains blocked in the kernel for the life of this process.
11180Sstevel@tonic-gate */
11190Sstevel@tonic-gate static int
aiostart(void)11200Sstevel@tonic-gate aiostart(void)
11210Sstevel@tonic-gate {
11220Sstevel@tonic-gate proc_t *p = curproc;
11230Sstevel@tonic-gate aio_t *aiop;
11240Sstevel@tonic-gate int first, error = 0;
11250Sstevel@tonic-gate
11260Sstevel@tonic-gate if (p->p_lwpcnt == 1)
11270Sstevel@tonic-gate return (EDEADLK);
11280Sstevel@tonic-gate mutex_enter(&p->p_lock);
11290Sstevel@tonic-gate if ((aiop = p->p_aio) == NULL)
11300Sstevel@tonic-gate error = EINVAL;
11310Sstevel@tonic-gate else {
11320Sstevel@tonic-gate first = aiop->aio_ok;
11330Sstevel@tonic-gate if (aiop->aio_ok == 0)
11340Sstevel@tonic-gate aiop->aio_ok = 1;
11350Sstevel@tonic-gate }
11360Sstevel@tonic-gate mutex_exit(&p->p_lock);
11370Sstevel@tonic-gate if (error == 0 && first == 0) {
11380Sstevel@tonic-gate return (aio_cleanup_thread(aiop));
11390Sstevel@tonic-gate /* should return only to exit */
11400Sstevel@tonic-gate }
11410Sstevel@tonic-gate return (error);
11420Sstevel@tonic-gate }
11430Sstevel@tonic-gate
11440Sstevel@tonic-gate /*
11450Sstevel@tonic-gate * Associate an aiocb with a port.
11460Sstevel@tonic-gate * This function is used by aiorw() to associate a transaction with a port.
11470Sstevel@tonic-gate * Allocate an event port structure (port_alloc_event()) and store the
11480Sstevel@tonic-gate * delivered user pointer (portnfy_user) in the portkev_user field of the
11490Sstevel@tonic-gate * port_kevent_t structure..
11500Sstevel@tonic-gate * The aio_req_portkev pointer in the aio_req_t structure was added to identify
11510Sstevel@tonic-gate * the port association.
11520Sstevel@tonic-gate */
11530Sstevel@tonic-gate
11540Sstevel@tonic-gate static int
aio_req_assoc_port_rw(port_notify_t * pntfy,aiocb_t * cbp,aio_req_t * reqp,int event)11551885Sraf aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
11561885Sraf aio_req_t *reqp, int event)
11570Sstevel@tonic-gate {
11580Sstevel@tonic-gate port_kevent_t *pkevp = NULL;
11590Sstevel@tonic-gate int error;
11600Sstevel@tonic-gate
11610Sstevel@tonic-gate error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
11620Sstevel@tonic-gate PORT_SOURCE_AIO, &pkevp);
11630Sstevel@tonic-gate if (error) {
11640Sstevel@tonic-gate if ((error == ENOMEM) || (error == EAGAIN))
11650Sstevel@tonic-gate error = EAGAIN;
11660Sstevel@tonic-gate else
11670Sstevel@tonic-gate error = EINVAL;
11680Sstevel@tonic-gate } else {
11690Sstevel@tonic-gate port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
11700Sstevel@tonic-gate aio_port_callback, reqp);
11711885Sraf pkevp->portkev_events = event;
11720Sstevel@tonic-gate reqp->aio_req_portkev = pkevp;
11730Sstevel@tonic-gate reqp->aio_req_port = pntfy->portnfy_port;
11740Sstevel@tonic-gate }
11750Sstevel@tonic-gate return (error);
11760Sstevel@tonic-gate }
11770Sstevel@tonic-gate
11780Sstevel@tonic-gate #ifdef _LP64
11790Sstevel@tonic-gate
11800Sstevel@tonic-gate /*
11810Sstevel@tonic-gate * Asynchronous list IO. A chain of aiocb's are copied in
11820Sstevel@tonic-gate * one at a time. If the aiocb is invalid, it is skipped.
11830Sstevel@tonic-gate * For each aiocb, the appropriate driver entry point is
11840Sstevel@tonic-gate * called. Optimize for the common case where the list
11850Sstevel@tonic-gate * of requests is to the same file descriptor.
11860Sstevel@tonic-gate *
11870Sstevel@tonic-gate * One possible optimization is to define a new driver entry
11880Sstevel@tonic-gate * point that supports a list of IO requests. Whether this
11890Sstevel@tonic-gate * improves performance depends somewhat on the driver's
11900Sstevel@tonic-gate * locking strategy. Processing a list could adversely impact
11910Sstevel@tonic-gate * the driver's interrupt latency.
11920Sstevel@tonic-gate */
11930Sstevel@tonic-gate static int
alio(int mode_arg,aiocb_t ** aiocb_arg,int nent,struct sigevent * sigev)11940Sstevel@tonic-gate alio(
11951885Sraf int mode_arg,
11961885Sraf aiocb_t **aiocb_arg,
11971885Sraf int nent,
11981885Sraf struct sigevent *sigev)
11990Sstevel@tonic-gate {
12000Sstevel@tonic-gate file_t *fp;
12010Sstevel@tonic-gate file_t *prev_fp = NULL;
12020Sstevel@tonic-gate int prev_mode = -1;
12030Sstevel@tonic-gate struct vnode *vp;
12040Sstevel@tonic-gate aio_lio_t *head;
12050Sstevel@tonic-gate aio_req_t *reqp;
12060Sstevel@tonic-gate aio_t *aiop;
12070Sstevel@tonic-gate caddr_t cbplist;
12080Sstevel@tonic-gate aiocb_t cb;
12090Sstevel@tonic-gate aiocb_t *aiocb = &cb;
12101885Sraf aiocb_t *cbp;
12111885Sraf aiocb_t **ucbp;
12120Sstevel@tonic-gate struct sigevent sigevk;
12130Sstevel@tonic-gate sigqueue_t *sqp;
12140Sstevel@tonic-gate int (*aio_func)();
12150Sstevel@tonic-gate int mode;
12160Sstevel@tonic-gate int error = 0;
12170Sstevel@tonic-gate int aio_errors = 0;
12180Sstevel@tonic-gate int i;
12190Sstevel@tonic-gate size_t ssize;
12200Sstevel@tonic-gate int deadhead = 0;
12210Sstevel@tonic-gate int aio_notsupported = 0;
12221885Sraf int lio_head_port;
12231885Sraf int aio_port;
12241885Sraf int aio_thread;
12250Sstevel@tonic-gate port_kevent_t *pkevtp = NULL;
12264502Spraks int portused = 0;
12270Sstevel@tonic-gate port_notify_t pnotify;
12281885Sraf int event;
12290Sstevel@tonic-gate
12300Sstevel@tonic-gate aiop = curproc->p_aio;
12310Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
12320Sstevel@tonic-gate return (EINVAL);
12330Sstevel@tonic-gate
12340Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent);
12350Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP);
12360Sstevel@tonic-gate ucbp = (aiocb_t **)cbplist;
12370Sstevel@tonic-gate
12381885Sraf if (copyin(aiocb_arg, cbplist, ssize) ||
12391885Sraf (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
12400Sstevel@tonic-gate kmem_free(cbplist, ssize);
12410Sstevel@tonic-gate return (EFAULT);
12420Sstevel@tonic-gate }
12430Sstevel@tonic-gate
12441885Sraf /* Event Ports */
12451885Sraf if (sigev &&
12461885Sraf (sigevk.sigev_notify == SIGEV_THREAD ||
12471885Sraf sigevk.sigev_notify == SIGEV_PORT)) {
12481885Sraf if (sigevk.sigev_notify == SIGEV_THREAD) {
12491885Sraf pnotify.portnfy_port = sigevk.sigev_signo;
12501885Sraf pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
12511885Sraf } else if (copyin(sigevk.sigev_value.sival_ptr,
12521885Sraf &pnotify, sizeof (pnotify))) {
12530Sstevel@tonic-gate kmem_free(cbplist, ssize);
12540Sstevel@tonic-gate return (EFAULT);
12550Sstevel@tonic-gate }
12561885Sraf error = port_alloc_event(pnotify.portnfy_port,
12571885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
12581885Sraf if (error) {
12591885Sraf if (error == ENOMEM || error == EAGAIN)
12601885Sraf error = EAGAIN;
12611885Sraf else
12621885Sraf error = EINVAL;
12631885Sraf kmem_free(cbplist, ssize);
12641885Sraf return (error);
12651885Sraf }
12661885Sraf lio_head_port = pnotify.portnfy_port;
12674502Spraks portused = 1;
12680Sstevel@tonic-gate }
12690Sstevel@tonic-gate
12700Sstevel@tonic-gate /*
12710Sstevel@tonic-gate * a list head should be allocated if notification is
12720Sstevel@tonic-gate * enabled for this list.
12730Sstevel@tonic-gate */
12740Sstevel@tonic-gate head = NULL;
12750Sstevel@tonic-gate
12761885Sraf if (mode_arg == LIO_WAIT || sigev) {
12770Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
12780Sstevel@tonic-gate error = aio_lio_alloc(&head);
12790Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
12800Sstevel@tonic-gate if (error)
12810Sstevel@tonic-gate goto done;
12820Sstevel@tonic-gate deadhead = 1;
12830Sstevel@tonic-gate head->lio_nent = nent;
12840Sstevel@tonic-gate head->lio_refcnt = nent;
12851885Sraf head->lio_port = -1;
12861885Sraf head->lio_portkev = NULL;
12871885Sraf if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
12881885Sraf sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
12890Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
12900Sstevel@tonic-gate if (sqp == NULL) {
12910Sstevel@tonic-gate error = EAGAIN;
12920Sstevel@tonic-gate goto done;
12930Sstevel@tonic-gate }
12940Sstevel@tonic-gate sqp->sq_func = NULL;
12950Sstevel@tonic-gate sqp->sq_next = NULL;
12960Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO;
12970Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid;
12980Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc);
12990Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid();
13000Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
13010Sstevel@tonic-gate sqp->sq_info.si_signo = sigevk.sigev_signo;
13020Sstevel@tonic-gate sqp->sq_info.si_value = sigevk.sigev_value;
13030Sstevel@tonic-gate head->lio_sigqp = sqp;
13040Sstevel@tonic-gate } else {
13050Sstevel@tonic-gate head->lio_sigqp = NULL;
13060Sstevel@tonic-gate }
13071885Sraf if (pkevtp) {
13081885Sraf /*
13091885Sraf * Prepare data to send when list of aiocb's
13101885Sraf * has completed.
13111885Sraf */
13121885Sraf port_init_event(pkevtp, (uintptr_t)sigev,
13131885Sraf (void *)(uintptr_t)pnotify.portnfy_user,
13141885Sraf NULL, head);
13151885Sraf pkevtp->portkev_events = AIOLIO;
13161885Sraf head->lio_portkev = pkevtp;
13171885Sraf head->lio_port = pnotify.portnfy_port;
13181885Sraf }
13190Sstevel@tonic-gate }
13200Sstevel@tonic-gate
13210Sstevel@tonic-gate for (i = 0; i < nent; i++, ucbp++) {
13220Sstevel@tonic-gate
13230Sstevel@tonic-gate cbp = *ucbp;
13240Sstevel@tonic-gate /* skip entry if it can't be copied. */
13251885Sraf if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
13260Sstevel@tonic-gate if (head) {
13270Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
13280Sstevel@tonic-gate head->lio_nent--;
13290Sstevel@tonic-gate head->lio_refcnt--;
13300Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
13310Sstevel@tonic-gate }
13320Sstevel@tonic-gate continue;
13330Sstevel@tonic-gate }
13340Sstevel@tonic-gate
13350Sstevel@tonic-gate /* skip if opcode for aiocb is LIO_NOP */
13360Sstevel@tonic-gate mode = aiocb->aio_lio_opcode;
13370Sstevel@tonic-gate if (mode == LIO_NOP) {
13380Sstevel@tonic-gate cbp = NULL;
13390Sstevel@tonic-gate if (head) {
13400Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
13410Sstevel@tonic-gate head->lio_nent--;
13420Sstevel@tonic-gate head->lio_refcnt--;
13430Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
13440Sstevel@tonic-gate }
13450Sstevel@tonic-gate continue;
13460Sstevel@tonic-gate }
13470Sstevel@tonic-gate
13480Sstevel@tonic-gate /* increment file descriptor's ref count. */
13490Sstevel@tonic-gate if ((fp = getf(aiocb->aio_fildes)) == NULL) {
13500Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF);
13510Sstevel@tonic-gate if (head) {
13520Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
13530Sstevel@tonic-gate head->lio_nent--;
13540Sstevel@tonic-gate head->lio_refcnt--;
13550Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
13560Sstevel@tonic-gate }
13570Sstevel@tonic-gate aio_errors++;
13580Sstevel@tonic-gate continue;
13590Sstevel@tonic-gate }
13600Sstevel@tonic-gate
13610Sstevel@tonic-gate /*
13620Sstevel@tonic-gate * check the permission of the partition
13630Sstevel@tonic-gate */
13640Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) {
13650Sstevel@tonic-gate releasef(aiocb->aio_fildes);
13660Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF);
13670Sstevel@tonic-gate if (head) {
13680Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
13690Sstevel@tonic-gate head->lio_nent--;
13700Sstevel@tonic-gate head->lio_refcnt--;
13710Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
13720Sstevel@tonic-gate }
13730Sstevel@tonic-gate aio_errors++;
13740Sstevel@tonic-gate continue;
13750Sstevel@tonic-gate }
13760Sstevel@tonic-gate
13770Sstevel@tonic-gate /*
13781885Sraf * common case where requests are to the same fd
13791885Sraf * for the same r/w operation.
13800Sstevel@tonic-gate * for UFS, need to set EBADFD
13810Sstevel@tonic-gate */
13821885Sraf vp = fp->f_vnode;
13831885Sraf if (fp != prev_fp || mode != prev_mode) {
13840Sstevel@tonic-gate aio_func = check_vp(vp, mode);
13850Sstevel@tonic-gate if (aio_func == NULL) {
13860Sstevel@tonic-gate prev_fp = NULL;
13870Sstevel@tonic-gate releasef(aiocb->aio_fildes);
13880Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADFD);
13890Sstevel@tonic-gate aio_notsupported++;
13900Sstevel@tonic-gate if (head) {
13910Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
13920Sstevel@tonic-gate head->lio_nent--;
13930Sstevel@tonic-gate head->lio_refcnt--;
13940Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
13950Sstevel@tonic-gate }
13960Sstevel@tonic-gate continue;
13970Sstevel@tonic-gate } else {
13980Sstevel@tonic-gate prev_fp = fp;
13990Sstevel@tonic-gate prev_mode = mode;
14000Sstevel@tonic-gate }
14010Sstevel@tonic-gate }
14020Sstevel@tonic-gate
14031885Sraf error = aio_req_setup(&reqp, aiop, aiocb,
1404*10719SRoger.Faulkner@Sun.COM &cbp->aio_resultp, vp, 0);
14051885Sraf if (error) {
14060Sstevel@tonic-gate releasef(aiocb->aio_fildes);
14070Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error);
14080Sstevel@tonic-gate if (head) {
14090Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
14100Sstevel@tonic-gate head->lio_nent--;
14110Sstevel@tonic-gate head->lio_refcnt--;
14120Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
14130Sstevel@tonic-gate }
14140Sstevel@tonic-gate aio_errors++;
14150Sstevel@tonic-gate continue;
14160Sstevel@tonic-gate }
14170Sstevel@tonic-gate
14180Sstevel@tonic-gate reqp->aio_req_lio = head;
14190Sstevel@tonic-gate deadhead = 0;
14200Sstevel@tonic-gate
14210Sstevel@tonic-gate /*
14220Sstevel@tonic-gate * Set the errno field now before sending the request to
14230Sstevel@tonic-gate * the driver to avoid a race condition
14240Sstevel@tonic-gate */
14250Sstevel@tonic-gate (void) suword32(&cbp->aio_resultp.aio_errno,
14260Sstevel@tonic-gate EINPROGRESS);
14270Sstevel@tonic-gate
14280Sstevel@tonic-gate reqp->aio_req_iocb.iocb = (caddr_t)cbp;
14290Sstevel@tonic-gate
14301885Sraf event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
14311885Sraf aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
14321885Sraf aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
14331885Sraf if (aio_port | aio_thread) {
14341885Sraf port_kevent_t *lpkevp;
14351885Sraf /*
14361885Sraf * Prepare data to send with each aiocb completed.
14371885Sraf */
14381885Sraf if (aio_port) {
14391885Sraf void *paddr =
14401885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr;
14411885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify)))
14421885Sraf error = EFAULT;
14431885Sraf } else { /* aio_thread */
14441885Sraf pnotify.portnfy_port =
14451885Sraf aiocb->aio_sigevent.sigev_signo;
14461885Sraf pnotify.portnfy_user =
14471885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr;
14481885Sraf }
14491885Sraf if (error)
14501885Sraf /* EMPTY */;
14511885Sraf else if (pkevtp != NULL &&
14521885Sraf pnotify.portnfy_port == lio_head_port)
14531885Sraf error = port_dup_event(pkevtp, &lpkevp,
14541885Sraf PORT_ALLOC_DEFAULT);
14551885Sraf else
14561885Sraf error = port_alloc_event(pnotify.portnfy_port,
14571885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
14581885Sraf &lpkevp);
14591885Sraf if (error == 0) {
14601885Sraf port_init_event(lpkevp, (uintptr_t)cbp,
14611885Sraf (void *)(uintptr_t)pnotify.portnfy_user,
14621885Sraf aio_port_callback, reqp);
14631885Sraf lpkevp->portkev_events = event;
14641885Sraf reqp->aio_req_portkev = lpkevp;
14651885Sraf reqp->aio_req_port = pnotify.portnfy_port;
14661885Sraf }
14670Sstevel@tonic-gate }
14680Sstevel@tonic-gate
14690Sstevel@tonic-gate /*
14700Sstevel@tonic-gate * send the request to driver.
14710Sstevel@tonic-gate */
14720Sstevel@tonic-gate if (error == 0) {
14730Sstevel@tonic-gate if (aiocb->aio_nbytes == 0) {
14740Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes);
14750Sstevel@tonic-gate aio_zerolen(reqp);
14760Sstevel@tonic-gate continue;
14770Sstevel@tonic-gate }
14780Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
14790Sstevel@tonic-gate CRED());
14800Sstevel@tonic-gate }
14811885Sraf
14820Sstevel@tonic-gate /*
14830Sstevel@tonic-gate * the fd's ref count is not decremented until the IO has
14840Sstevel@tonic-gate * completed unless there was an error.
14850Sstevel@tonic-gate */
14860Sstevel@tonic-gate if (error) {
14870Sstevel@tonic-gate releasef(aiocb->aio_fildes);
14880Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error);
14890Sstevel@tonic-gate if (head) {
14900Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
14910Sstevel@tonic-gate head->lio_nent--;
14920Sstevel@tonic-gate head->lio_refcnt--;
14930Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
14940Sstevel@tonic-gate }
14950Sstevel@tonic-gate if (error == ENOTSUP)
14960Sstevel@tonic-gate aio_notsupported++;
14970Sstevel@tonic-gate else
14980Sstevel@tonic-gate aio_errors++;
14994502Spraks lio_set_error(reqp, portused);
15000Sstevel@tonic-gate } else {
15010Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes);
15020Sstevel@tonic-gate }
15030Sstevel@tonic-gate }
15040Sstevel@tonic-gate
15050Sstevel@tonic-gate if (aio_notsupported) {
15060Sstevel@tonic-gate error = ENOTSUP;
15070Sstevel@tonic-gate } else if (aio_errors) {
15080Sstevel@tonic-gate /*
15090Sstevel@tonic-gate * return EIO if any request failed
15100Sstevel@tonic-gate */
15110Sstevel@tonic-gate error = EIO;
15120Sstevel@tonic-gate }
15130Sstevel@tonic-gate
15140Sstevel@tonic-gate if (mode_arg == LIO_WAIT) {
15150Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
15160Sstevel@tonic-gate while (head->lio_refcnt > 0) {
15170Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
15180Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
15190Sstevel@tonic-gate error = EINTR;
15200Sstevel@tonic-gate goto done;
15210Sstevel@tonic-gate }
15220Sstevel@tonic-gate }
15230Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
15240Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
15250Sstevel@tonic-gate }
15260Sstevel@tonic-gate
15270Sstevel@tonic-gate done:
15280Sstevel@tonic-gate kmem_free(cbplist, ssize);
15290Sstevel@tonic-gate if (deadhead) {
15300Sstevel@tonic-gate if (head->lio_sigqp)
15310Sstevel@tonic-gate kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
15321885Sraf if (head->lio_portkev)
15331885Sraf port_free_event(head->lio_portkev);
15340Sstevel@tonic-gate kmem_free(head, sizeof (aio_lio_t));
15350Sstevel@tonic-gate }
15360Sstevel@tonic-gate return (error);
15370Sstevel@tonic-gate }
15380Sstevel@tonic-gate
15390Sstevel@tonic-gate #endif /* _LP64 */
15400Sstevel@tonic-gate
15410Sstevel@tonic-gate /*
15420Sstevel@tonic-gate * Asynchronous list IO.
15430Sstevel@tonic-gate * If list I/O is called with LIO_WAIT it can still return
15440Sstevel@tonic-gate * before all the I/O's are completed if a signal is caught
15450Sstevel@tonic-gate * or if the list include UFS I/O requests. If this happens,
15460Sstevel@tonic-gate * libaio will call aliowait() to wait for the I/O's to
15470Sstevel@tonic-gate * complete
15480Sstevel@tonic-gate */
15490Sstevel@tonic-gate /*ARGSUSED*/
15500Sstevel@tonic-gate static int
aliowait(int mode,void * aiocb,int nent,void * sigev,int run_mode)15510Sstevel@tonic-gate aliowait(
15520Sstevel@tonic-gate int mode,
15530Sstevel@tonic-gate void *aiocb,
15540Sstevel@tonic-gate int nent,
15550Sstevel@tonic-gate void *sigev,
15560Sstevel@tonic-gate int run_mode)
15570Sstevel@tonic-gate {
15580Sstevel@tonic-gate aio_lio_t *head;
15590Sstevel@tonic-gate aio_t *aiop;
15600Sstevel@tonic-gate caddr_t cbplist;
15610Sstevel@tonic-gate aiocb_t *cbp, **ucbp;
15620Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
15630Sstevel@tonic-gate aiocb32_t *cbp32;
15640Sstevel@tonic-gate caddr32_t *ucbp32;
15650Sstevel@tonic-gate aiocb64_32_t *cbp64;
15660Sstevel@tonic-gate #endif
15670Sstevel@tonic-gate int error = 0;
15680Sstevel@tonic-gate int i;
15690Sstevel@tonic-gate size_t ssize = 0;
15700Sstevel@tonic-gate model_t model = get_udatamodel();
15710Sstevel@tonic-gate
15720Sstevel@tonic-gate aiop = curproc->p_aio;
15730Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
15740Sstevel@tonic-gate return (EINVAL);
15750Sstevel@tonic-gate
15760Sstevel@tonic-gate if (model == DATAMODEL_NATIVE)
15770Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent);
15780Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
15790Sstevel@tonic-gate else
15800Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent);
15810Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
15820Sstevel@tonic-gate
15830Sstevel@tonic-gate if (ssize == 0)
15840Sstevel@tonic-gate return (EINVAL);
15850Sstevel@tonic-gate
15860Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP);
15870Sstevel@tonic-gate
15880Sstevel@tonic-gate if (model == DATAMODEL_NATIVE)
15890Sstevel@tonic-gate ucbp = (aiocb_t **)cbplist;
15900Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
15910Sstevel@tonic-gate else
15920Sstevel@tonic-gate ucbp32 = (caddr32_t *)cbplist;
15930Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
15940Sstevel@tonic-gate
15950Sstevel@tonic-gate if (copyin(aiocb, cbplist, ssize)) {
15960Sstevel@tonic-gate error = EFAULT;
15970Sstevel@tonic-gate goto done;
15980Sstevel@tonic-gate }
15990Sstevel@tonic-gate
16000Sstevel@tonic-gate /*
16010Sstevel@tonic-gate * To find the list head, we go through the
16020Sstevel@tonic-gate * list of aiocb structs, find the request
16030Sstevel@tonic-gate * its for, then get the list head that reqp
16040Sstevel@tonic-gate * points to
16050Sstevel@tonic-gate */
16060Sstevel@tonic-gate head = NULL;
16070Sstevel@tonic-gate
16080Sstevel@tonic-gate for (i = 0; i < nent; i++) {
16090Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) {
16100Sstevel@tonic-gate /*
16110Sstevel@tonic-gate * Since we are only checking for a NULL pointer
16120Sstevel@tonic-gate * Following should work on both native data sizes
16130Sstevel@tonic-gate * as well as for largefile aiocb.
16140Sstevel@tonic-gate */
16150Sstevel@tonic-gate if ((cbp = *ucbp++) == NULL)
16160Sstevel@tonic-gate continue;
16170Sstevel@tonic-gate if (run_mode != AIO_LARGEFILE)
16180Sstevel@tonic-gate if (head = aio_list_get(&cbp->aio_resultp))
16190Sstevel@tonic-gate break;
16200Sstevel@tonic-gate else {
16210Sstevel@tonic-gate /*
16220Sstevel@tonic-gate * This is a case when largefile call is
16230Sstevel@tonic-gate * made on 32 bit kernel.
16240Sstevel@tonic-gate * Treat each pointer as pointer to
16250Sstevel@tonic-gate * aiocb64_32
16260Sstevel@tonic-gate */
16270Sstevel@tonic-gate if (head = aio_list_get((aio_result_t *)
16280Sstevel@tonic-gate &(((aiocb64_32_t *)cbp)->aio_resultp)))
16290Sstevel@tonic-gate break;
16300Sstevel@tonic-gate }
16310Sstevel@tonic-gate }
16320Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
16330Sstevel@tonic-gate else {
16340Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) {
16350Sstevel@tonic-gate if ((cbp64 = (aiocb64_32_t *)
16360Sstevel@tonic-gate (uintptr_t)*ucbp32++) == NULL)
16370Sstevel@tonic-gate continue;
16380Sstevel@tonic-gate if (head = aio_list_get((aio_result_t *)
16390Sstevel@tonic-gate &cbp64->aio_resultp))
16400Sstevel@tonic-gate break;
16410Sstevel@tonic-gate } else if (run_mode == AIO_32) {
16420Sstevel@tonic-gate if ((cbp32 = (aiocb32_t *)
16430Sstevel@tonic-gate (uintptr_t)*ucbp32++) == NULL)
16440Sstevel@tonic-gate continue;
16450Sstevel@tonic-gate if (head = aio_list_get((aio_result_t *)
16460Sstevel@tonic-gate &cbp32->aio_resultp))
16470Sstevel@tonic-gate break;
16480Sstevel@tonic-gate }
16490Sstevel@tonic-gate }
16500Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
16510Sstevel@tonic-gate }
16520Sstevel@tonic-gate
16530Sstevel@tonic-gate if (head == NULL) {
16540Sstevel@tonic-gate error = EINVAL;
16550Sstevel@tonic-gate goto done;
16560Sstevel@tonic-gate }
16570Sstevel@tonic-gate
16580Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
16590Sstevel@tonic-gate while (head->lio_refcnt > 0) {
16600Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
16610Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
16620Sstevel@tonic-gate error = EINTR;
16630Sstevel@tonic-gate goto done;
16640Sstevel@tonic-gate }
16650Sstevel@tonic-gate }
16660Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
16670Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
16680Sstevel@tonic-gate done:
16690Sstevel@tonic-gate kmem_free(cbplist, ssize);
16700Sstevel@tonic-gate return (error);
16710Sstevel@tonic-gate }
16720Sstevel@tonic-gate
16730Sstevel@tonic-gate aio_lio_t *
aio_list_get(aio_result_t * resultp)16740Sstevel@tonic-gate aio_list_get(aio_result_t *resultp)
16750Sstevel@tonic-gate {
16760Sstevel@tonic-gate aio_lio_t *head = NULL;
16770Sstevel@tonic-gate aio_t *aiop;
16780Sstevel@tonic-gate aio_req_t **bucket;
16790Sstevel@tonic-gate aio_req_t *reqp;
16800Sstevel@tonic-gate long index;
16810Sstevel@tonic-gate
16820Sstevel@tonic-gate aiop = curproc->p_aio;
16830Sstevel@tonic-gate if (aiop == NULL)
16840Sstevel@tonic-gate return (NULL);
16850Sstevel@tonic-gate
16860Sstevel@tonic-gate if (resultp) {
16870Sstevel@tonic-gate index = AIO_HASH(resultp);
16880Sstevel@tonic-gate bucket = &aiop->aio_hash[index];
16890Sstevel@tonic-gate for (reqp = *bucket; reqp != NULL;
16900Sstevel@tonic-gate reqp = reqp->aio_hash_next) {
16910Sstevel@tonic-gate if (reqp->aio_req_resultp == resultp) {
16920Sstevel@tonic-gate head = reqp->aio_req_lio;
16930Sstevel@tonic-gate return (head);
16940Sstevel@tonic-gate }
16950Sstevel@tonic-gate }
16960Sstevel@tonic-gate }
16970Sstevel@tonic-gate return (NULL);
16980Sstevel@tonic-gate }
16990Sstevel@tonic-gate
17000Sstevel@tonic-gate
17010Sstevel@tonic-gate static void
lio_set_uerror(void * resultp,int error)17020Sstevel@tonic-gate lio_set_uerror(void *resultp, int error)
17030Sstevel@tonic-gate {
17040Sstevel@tonic-gate /*
17050Sstevel@tonic-gate * the resultp field is a pointer to where the
17060Sstevel@tonic-gate * error should be written out to the user's
17070Sstevel@tonic-gate * aiocb.
17080Sstevel@tonic-gate *
17090Sstevel@tonic-gate */
17100Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) {
17110Sstevel@tonic-gate (void) sulword(&((aio_result_t *)resultp)->aio_return,
17120Sstevel@tonic-gate (ssize_t)-1);
17130Sstevel@tonic-gate (void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
17140Sstevel@tonic-gate }
17150Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
17160Sstevel@tonic-gate else {
17170Sstevel@tonic-gate (void) suword32(&((aio_result32_t *)resultp)->aio_return,
17180Sstevel@tonic-gate (uint_t)-1);
17190Sstevel@tonic-gate (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
17200Sstevel@tonic-gate }
17210Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
17220Sstevel@tonic-gate }
17230Sstevel@tonic-gate
17240Sstevel@tonic-gate /*
17250Sstevel@tonic-gate * do cleanup completion for all requests in list. memory for
17260Sstevel@tonic-gate * each request is also freed.
17270Sstevel@tonic-gate */
17280Sstevel@tonic-gate static void
alio_cleanup(aio_t * aiop,aiocb_t ** cbp,int nent,int run_mode)17290Sstevel@tonic-gate alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
17300Sstevel@tonic-gate {
17310Sstevel@tonic-gate int i;
17320Sstevel@tonic-gate aio_req_t *reqp;
17330Sstevel@tonic-gate aio_result_t *resultp;
17341885Sraf aiocb64_32_t *aiocb_64;
17350Sstevel@tonic-gate
17360Sstevel@tonic-gate for (i = 0; i < nent; i++) {
17370Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) {
17380Sstevel@tonic-gate if (cbp[i] == NULL)
17390Sstevel@tonic-gate continue;
17400Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) {
17410Sstevel@tonic-gate aiocb_64 = (aiocb64_32_t *)cbp[i];
17421885Sraf resultp = (aio_result_t *)
17431885Sraf &aiocb_64->aio_resultp;
17440Sstevel@tonic-gate } else
17450Sstevel@tonic-gate resultp = &cbp[i]->aio_resultp;
17460Sstevel@tonic-gate }
17470Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
17480Sstevel@tonic-gate else {
17491885Sraf aiocb32_t *aiocb_32;
17501885Sraf caddr32_t *cbp32;
17510Sstevel@tonic-gate
17520Sstevel@tonic-gate cbp32 = (caddr32_t *)cbp;
17530Sstevel@tonic-gate if (cbp32[i] == NULL)
17540Sstevel@tonic-gate continue;
17550Sstevel@tonic-gate if (run_mode == AIO_32) {
17560Sstevel@tonic-gate aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
17570Sstevel@tonic-gate resultp = (aio_result_t *)&aiocb_32->
17580Sstevel@tonic-gate aio_resultp;
17590Sstevel@tonic-gate } else if (run_mode == AIO_LARGEFILE) {
17600Sstevel@tonic-gate aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
17610Sstevel@tonic-gate resultp = (aio_result_t *)&aiocb_64->
17620Sstevel@tonic-gate aio_resultp;
17630Sstevel@tonic-gate }
17640Sstevel@tonic-gate }
17650Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
17660Sstevel@tonic-gate /*
17670Sstevel@tonic-gate * we need to get the aio_cleanupq_mutex since we call
17680Sstevel@tonic-gate * aio_req_done().
17690Sstevel@tonic-gate */
17700Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex);
17710Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
17720Sstevel@tonic-gate reqp = aio_req_done(resultp);
17730Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
17740Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex);
17750Sstevel@tonic-gate if (reqp != NULL) {
17760Sstevel@tonic-gate aphysio_unlock(reqp);
17770Sstevel@tonic-gate aio_copyout_result(reqp);
17780Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
17790Sstevel@tonic-gate aio_req_free(aiop, reqp);
17800Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
17810Sstevel@tonic-gate }
17820Sstevel@tonic-gate }
17830Sstevel@tonic-gate }
17840Sstevel@tonic-gate
17850Sstevel@tonic-gate /*
17861885Sraf * Write out the results for an aio request that is done.
17870Sstevel@tonic-gate */
17880Sstevel@tonic-gate static int
aioerror(void * cb,int run_mode)17890Sstevel@tonic-gate aioerror(void *cb, int run_mode)
17900Sstevel@tonic-gate {
17910Sstevel@tonic-gate aio_result_t *resultp;
17920Sstevel@tonic-gate aio_t *aiop;
17930Sstevel@tonic-gate aio_req_t *reqp;
17940Sstevel@tonic-gate int retval;
17950Sstevel@tonic-gate
17960Sstevel@tonic-gate aiop = curproc->p_aio;
17970Sstevel@tonic-gate if (aiop == NULL || cb == NULL)
17980Sstevel@tonic-gate return (EINVAL);
17990Sstevel@tonic-gate
18000Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) {
18010Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE)
18020Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18030Sstevel@tonic-gate aio_resultp;
18040Sstevel@tonic-gate else
18050Sstevel@tonic-gate resultp = &((aiocb_t *)cb)->aio_resultp;
18060Sstevel@tonic-gate }
18070Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
18080Sstevel@tonic-gate else {
18090Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE)
18100Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
18110Sstevel@tonic-gate aio_resultp;
18120Sstevel@tonic-gate else if (run_mode == AIO_32)
18130Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb32_t *)cb)->
18140Sstevel@tonic-gate aio_resultp;
18150Sstevel@tonic-gate }
18160Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
18170Sstevel@tonic-gate /*
18180Sstevel@tonic-gate * we need to get the aio_cleanupq_mutex since we call
18190Sstevel@tonic-gate * aio_req_find().
18200Sstevel@tonic-gate */
18210Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex);
18220Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
18230Sstevel@tonic-gate retval = aio_req_find(resultp, &reqp);
18240Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
18250Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex);
18260Sstevel@tonic-gate if (retval == 0) {
18270Sstevel@tonic-gate aphysio_unlock(reqp);
18280Sstevel@tonic-gate aio_copyout_result(reqp);
18290Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
18300Sstevel@tonic-gate aio_req_free(aiop, reqp);
18310Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
18320Sstevel@tonic-gate return (0);
18330Sstevel@tonic-gate } else if (retval == 1)
18340Sstevel@tonic-gate return (EINPROGRESS);
18350Sstevel@tonic-gate else if (retval == 2)
18360Sstevel@tonic-gate return (EINVAL);
18370Sstevel@tonic-gate return (0);
18380Sstevel@tonic-gate }
18390Sstevel@tonic-gate
18400Sstevel@tonic-gate /*
18410Sstevel@tonic-gate * aio_cancel - if no requests outstanding,
18420Sstevel@tonic-gate * return AIO_ALLDONE
18430Sstevel@tonic-gate * else
18440Sstevel@tonic-gate * return AIO_NOTCANCELED
18450Sstevel@tonic-gate */
18460Sstevel@tonic-gate static int
aio_cancel(int fildes,void * cb,long * rval,int run_mode)18470Sstevel@tonic-gate aio_cancel(
18480Sstevel@tonic-gate int fildes,
18490Sstevel@tonic-gate void *cb,
18500Sstevel@tonic-gate long *rval,
18510Sstevel@tonic-gate int run_mode)
18520Sstevel@tonic-gate {
18530Sstevel@tonic-gate aio_t *aiop;
18540Sstevel@tonic-gate void *resultp;
18550Sstevel@tonic-gate int index;
18560Sstevel@tonic-gate aio_req_t **bucket;
18570Sstevel@tonic-gate aio_req_t *ent;
18580Sstevel@tonic-gate
18590Sstevel@tonic-gate
18600Sstevel@tonic-gate /*
18610Sstevel@tonic-gate * Verify valid file descriptor
18620Sstevel@tonic-gate */
18630Sstevel@tonic-gate if ((getf(fildes)) == NULL) {
18640Sstevel@tonic-gate return (EBADF);
18650Sstevel@tonic-gate }
18660Sstevel@tonic-gate releasef(fildes);
18670Sstevel@tonic-gate
18680Sstevel@tonic-gate aiop = curproc->p_aio;
18690Sstevel@tonic-gate if (aiop == NULL)
18700Sstevel@tonic-gate return (EINVAL);
18710Sstevel@tonic-gate
18720Sstevel@tonic-gate if (aiop->aio_outstanding == 0) {
18730Sstevel@tonic-gate *rval = AIO_ALLDONE;
18740Sstevel@tonic-gate return (0);
18750Sstevel@tonic-gate }
18760Sstevel@tonic-gate
18770Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
18780Sstevel@tonic-gate if (cb != NULL) {
18790Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) {
18800Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE)
18810Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18820Sstevel@tonic-gate ->aio_resultp;
18830Sstevel@tonic-gate else
18840Sstevel@tonic-gate resultp = &((aiocb_t *)cb)->aio_resultp;
18850Sstevel@tonic-gate }
18860Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
18870Sstevel@tonic-gate else {
18880Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE)
18890Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
18900Sstevel@tonic-gate ->aio_resultp;
18910Sstevel@tonic-gate else if (run_mode == AIO_32)
18920Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb32_t *)cb)
18930Sstevel@tonic-gate ->aio_resultp;
18940Sstevel@tonic-gate }
18950Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
18960Sstevel@tonic-gate index = AIO_HASH(resultp);
18970Sstevel@tonic-gate bucket = &aiop->aio_hash[index];
18980Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
18990Sstevel@tonic-gate if (ent->aio_req_resultp == resultp) {
19000Sstevel@tonic-gate if ((ent->aio_req_flags & AIO_PENDING) == 0) {
19010Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
19020Sstevel@tonic-gate *rval = AIO_ALLDONE;
19030Sstevel@tonic-gate return (0);
19040Sstevel@tonic-gate }
19050Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
19060Sstevel@tonic-gate *rval = AIO_NOTCANCELED;
19070Sstevel@tonic-gate return (0);
19080Sstevel@tonic-gate }
19090Sstevel@tonic-gate }
19100Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
19110Sstevel@tonic-gate *rval = AIO_ALLDONE;
19120Sstevel@tonic-gate return (0);
19130Sstevel@tonic-gate }
19140Sstevel@tonic-gate
19150Sstevel@tonic-gate for (index = 0; index < AIO_HASHSZ; index++) {
19160Sstevel@tonic-gate bucket = &aiop->aio_hash[index];
19170Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
19180Sstevel@tonic-gate if (ent->aio_req_fd == fildes) {
19190Sstevel@tonic-gate if ((ent->aio_req_flags & AIO_PENDING) != 0) {
19200Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
19210Sstevel@tonic-gate *rval = AIO_NOTCANCELED;
19220Sstevel@tonic-gate return (0);
19230Sstevel@tonic-gate }
19240Sstevel@tonic-gate }
19250Sstevel@tonic-gate }
19260Sstevel@tonic-gate }
19270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
19280Sstevel@tonic-gate *rval = AIO_ALLDONE;
19290Sstevel@tonic-gate return (0);
19300Sstevel@tonic-gate }
19310Sstevel@tonic-gate
19320Sstevel@tonic-gate /*
19330Sstevel@tonic-gate * solaris version of asynchronous read and write
19340Sstevel@tonic-gate */
19350Sstevel@tonic-gate static int
arw(int opcode,int fdes,char * bufp,int bufsize,offset_t offset,aio_result_t * resultp,int mode)19360Sstevel@tonic-gate arw(
19370Sstevel@tonic-gate int opcode,
19380Sstevel@tonic-gate int fdes,
19390Sstevel@tonic-gate char *bufp,
19400Sstevel@tonic-gate int bufsize,
19410Sstevel@tonic-gate offset_t offset,
19420Sstevel@tonic-gate aio_result_t *resultp,
19430Sstevel@tonic-gate int mode)
19440Sstevel@tonic-gate {
19450Sstevel@tonic-gate file_t *fp;
19460Sstevel@tonic-gate int error;
19470Sstevel@tonic-gate struct vnode *vp;
19480Sstevel@tonic-gate aio_req_t *reqp;
19490Sstevel@tonic-gate aio_t *aiop;
19500Sstevel@tonic-gate int (*aio_func)();
19510Sstevel@tonic-gate #ifdef _LP64
19520Sstevel@tonic-gate aiocb_t aiocb;
19530Sstevel@tonic-gate #else
19540Sstevel@tonic-gate aiocb64_32_t aiocb64;
19550Sstevel@tonic-gate #endif
19560Sstevel@tonic-gate
19570Sstevel@tonic-gate aiop = curproc->p_aio;
19580Sstevel@tonic-gate if (aiop == NULL)
19590Sstevel@tonic-gate return (EINVAL);
19600Sstevel@tonic-gate
19610Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) {
19620Sstevel@tonic-gate return (EBADF);
19630Sstevel@tonic-gate }
19640Sstevel@tonic-gate
19650Sstevel@tonic-gate /*
19660Sstevel@tonic-gate * check the permission of the partition
19670Sstevel@tonic-gate */
19680Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) {
19690Sstevel@tonic-gate releasef(fdes);
19700Sstevel@tonic-gate return (EBADF);
19710Sstevel@tonic-gate }
19720Sstevel@tonic-gate
19730Sstevel@tonic-gate vp = fp->f_vnode;
19740Sstevel@tonic-gate aio_func = check_vp(vp, mode);
19750Sstevel@tonic-gate if (aio_func == NULL) {
19760Sstevel@tonic-gate releasef(fdes);
19770Sstevel@tonic-gate return (EBADFD);
19780Sstevel@tonic-gate }
19790Sstevel@tonic-gate #ifdef _LP64
19800Sstevel@tonic-gate aiocb.aio_fildes = fdes;
19810Sstevel@tonic-gate aiocb.aio_buf = bufp;
19820Sstevel@tonic-gate aiocb.aio_nbytes = bufsize;
19830Sstevel@tonic-gate aiocb.aio_offset = offset;
19840Sstevel@tonic-gate aiocb.aio_sigevent.sigev_notify = 0;
1985*10719SRoger.Faulkner@Sun.COM error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 1);
19860Sstevel@tonic-gate #else
19870Sstevel@tonic-gate aiocb64.aio_fildes = fdes;
19880Sstevel@tonic-gate aiocb64.aio_buf = (caddr32_t)bufp;
19890Sstevel@tonic-gate aiocb64.aio_nbytes = bufsize;
19900Sstevel@tonic-gate aiocb64.aio_offset = offset;
19910Sstevel@tonic-gate aiocb64.aio_sigevent.sigev_notify = 0;
1992*10719SRoger.Faulkner@Sun.COM error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 1);
19930Sstevel@tonic-gate #endif
19940Sstevel@tonic-gate if (error) {
19950Sstevel@tonic-gate releasef(fdes);
19960Sstevel@tonic-gate return (error);
19970Sstevel@tonic-gate }
19980Sstevel@tonic-gate
19990Sstevel@tonic-gate /*
20000Sstevel@tonic-gate * enable polling on this request if the opcode has
20010Sstevel@tonic-gate * the AIO poll bit set
20020Sstevel@tonic-gate */
20030Sstevel@tonic-gate if (opcode & AIO_POLL_BIT)
20040Sstevel@tonic-gate reqp->aio_req_flags |= AIO_POLL;
20050Sstevel@tonic-gate
20060Sstevel@tonic-gate if (bufsize == 0) {
20070Sstevel@tonic-gate clear_active_fd(fdes);
20080Sstevel@tonic-gate aio_zerolen(reqp);
20090Sstevel@tonic-gate return (0);
20100Sstevel@tonic-gate }
20110Sstevel@tonic-gate /*
20120Sstevel@tonic-gate * send the request to driver.
20130Sstevel@tonic-gate */
20140Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
20150Sstevel@tonic-gate /*
20160Sstevel@tonic-gate * the fd is stored in the aio_req_t by aio_req_setup(), and
20170Sstevel@tonic-gate * is released by the aio_cleanup_thread() when the IO has
20180Sstevel@tonic-gate * completed.
20190Sstevel@tonic-gate */
20200Sstevel@tonic-gate if (error) {
20210Sstevel@tonic-gate releasef(fdes);
20220Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
20230Sstevel@tonic-gate aio_req_free(aiop, reqp);
20240Sstevel@tonic-gate aiop->aio_pending--;
20250Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK)
20260Sstevel@tonic-gate cv_signal(&aiop->aio_cleanupcv);
20270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
20280Sstevel@tonic-gate return (error);
20290Sstevel@tonic-gate }
20300Sstevel@tonic-gate clear_active_fd(fdes);
20310Sstevel@tonic-gate return (0);
20320Sstevel@tonic-gate }
20330Sstevel@tonic-gate
20340Sstevel@tonic-gate /*
20350Sstevel@tonic-gate * posix version of asynchronous read and write
20360Sstevel@tonic-gate */
20371885Sraf static int
aiorw(int opcode,void * aiocb_arg,int mode,int run_mode)20380Sstevel@tonic-gate aiorw(
20390Sstevel@tonic-gate int opcode,
20400Sstevel@tonic-gate void *aiocb_arg,
20410Sstevel@tonic-gate int mode,
20420Sstevel@tonic-gate int run_mode)
20430Sstevel@tonic-gate {
20440Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
20450Sstevel@tonic-gate aiocb32_t aiocb32;
20460Sstevel@tonic-gate struct sigevent32 *sigev32;
20470Sstevel@tonic-gate port_notify32_t pntfy32;
20480Sstevel@tonic-gate #endif
20490Sstevel@tonic-gate aiocb64_32_t aiocb64;
20500Sstevel@tonic-gate aiocb_t aiocb;
20510Sstevel@tonic-gate file_t *fp;
20520Sstevel@tonic-gate int error, fd;
20530Sstevel@tonic-gate size_t bufsize;
20540Sstevel@tonic-gate struct vnode *vp;
20550Sstevel@tonic-gate aio_req_t *reqp;
20560Sstevel@tonic-gate aio_t *aiop;
20570Sstevel@tonic-gate int (*aio_func)();
20580Sstevel@tonic-gate aio_result_t *resultp;
20590Sstevel@tonic-gate struct sigevent *sigev;
20600Sstevel@tonic-gate model_t model;
20610Sstevel@tonic-gate int aio_use_port = 0;
20620Sstevel@tonic-gate port_notify_t pntfy;
20630Sstevel@tonic-gate
20640Sstevel@tonic-gate model = get_udatamodel();
20650Sstevel@tonic-gate aiop = curproc->p_aio;
20660Sstevel@tonic-gate if (aiop == NULL)
20670Sstevel@tonic-gate return (EINVAL);
20680Sstevel@tonic-gate
20690Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) {
20700Sstevel@tonic-gate if (run_mode != AIO_LARGEFILE) {
20710Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
20720Sstevel@tonic-gate return (EFAULT);
20730Sstevel@tonic-gate bufsize = aiocb.aio_nbytes;
20740Sstevel@tonic-gate resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
20750Sstevel@tonic-gate if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
20760Sstevel@tonic-gate return (EBADF);
20770Sstevel@tonic-gate }
20780Sstevel@tonic-gate sigev = &aiocb.aio_sigevent;
20790Sstevel@tonic-gate } else {
20800Sstevel@tonic-gate /*
20810Sstevel@tonic-gate * We come here only when we make largefile
20820Sstevel@tonic-gate * call on 32 bit kernel using 32 bit library.
20830Sstevel@tonic-gate */
20840Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
20850Sstevel@tonic-gate return (EFAULT);
20860Sstevel@tonic-gate bufsize = aiocb64.aio_nbytes;
20870Sstevel@tonic-gate resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
20880Sstevel@tonic-gate ->aio_resultp);
20891885Sraf if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
20900Sstevel@tonic-gate return (EBADF);
20910Sstevel@tonic-gate sigev = (struct sigevent *)&aiocb64.aio_sigevent;
20920Sstevel@tonic-gate }
20930Sstevel@tonic-gate
20940Sstevel@tonic-gate if (sigev->sigev_notify == SIGEV_PORT) {
20950Sstevel@tonic-gate if (copyin((void *)sigev->sigev_value.sival_ptr,
20960Sstevel@tonic-gate &pntfy, sizeof (port_notify_t))) {
20970Sstevel@tonic-gate releasef(fd);
20980Sstevel@tonic-gate return (EFAULT);
20990Sstevel@tonic-gate }
21000Sstevel@tonic-gate aio_use_port = 1;
21011885Sraf } else if (sigev->sigev_notify == SIGEV_THREAD) {
21021885Sraf pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
21031885Sraf pntfy.portnfy_user =
21041885Sraf aiocb.aio_sigevent.sigev_value.sival_ptr;
21051885Sraf aio_use_port = 1;
21060Sstevel@tonic-gate }
21070Sstevel@tonic-gate }
21080Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
21090Sstevel@tonic-gate else {
21100Sstevel@tonic-gate if (run_mode == AIO_32) {
21110Sstevel@tonic-gate /* 32 bit system call is being made on 64 bit kernel */
21120Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
21130Sstevel@tonic-gate return (EFAULT);
21140Sstevel@tonic-gate
21150Sstevel@tonic-gate bufsize = aiocb32.aio_nbytes;
21160Sstevel@tonic-gate aiocb_32ton(&aiocb32, &aiocb);
21170Sstevel@tonic-gate resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
21180Sstevel@tonic-gate aio_resultp);
21190Sstevel@tonic-gate if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
21200Sstevel@tonic-gate return (EBADF);
21210Sstevel@tonic-gate }
21220Sstevel@tonic-gate sigev32 = &aiocb32.aio_sigevent;
21230Sstevel@tonic-gate } else if (run_mode == AIO_LARGEFILE) {
21240Sstevel@tonic-gate /*
21250Sstevel@tonic-gate * We come here only when we make largefile
21260Sstevel@tonic-gate * call on 64 bit kernel using 32 bit library.
21270Sstevel@tonic-gate */
21280Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
21290Sstevel@tonic-gate return (EFAULT);
21300Sstevel@tonic-gate bufsize = aiocb64.aio_nbytes;
21310Sstevel@tonic-gate aiocb_LFton(&aiocb64, &aiocb);
21320Sstevel@tonic-gate resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
21330Sstevel@tonic-gate ->aio_resultp);
21340Sstevel@tonic-gate if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
21350Sstevel@tonic-gate return (EBADF);
21360Sstevel@tonic-gate sigev32 = &aiocb64.aio_sigevent;
21370Sstevel@tonic-gate }
21380Sstevel@tonic-gate
21390Sstevel@tonic-gate if (sigev32->sigev_notify == SIGEV_PORT) {
21400Sstevel@tonic-gate if (copyin(
21410Sstevel@tonic-gate (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
21420Sstevel@tonic-gate &pntfy32, sizeof (port_notify32_t))) {
21430Sstevel@tonic-gate releasef(fd);
21440Sstevel@tonic-gate return (EFAULT);
21450Sstevel@tonic-gate }
21460Sstevel@tonic-gate pntfy.portnfy_port = pntfy32.portnfy_port;
21471885Sraf pntfy.portnfy_user = (void *)(uintptr_t)
21481885Sraf pntfy32.portnfy_user;
21491885Sraf aio_use_port = 1;
21501885Sraf } else if (sigev32->sigev_notify == SIGEV_THREAD) {
21511885Sraf pntfy.portnfy_port = sigev32->sigev_signo;
21521885Sraf pntfy.portnfy_user = (void *)(uintptr_t)
21531885Sraf sigev32->sigev_value.sival_ptr;
21540Sstevel@tonic-gate aio_use_port = 1;
21550Sstevel@tonic-gate }
21560Sstevel@tonic-gate }
21570Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
21580Sstevel@tonic-gate
21590Sstevel@tonic-gate /*
21600Sstevel@tonic-gate * check the permission of the partition
21610Sstevel@tonic-gate */
21620Sstevel@tonic-gate
21630Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) {
21640Sstevel@tonic-gate releasef(fd);
21650Sstevel@tonic-gate return (EBADF);
21660Sstevel@tonic-gate }
21670Sstevel@tonic-gate
21680Sstevel@tonic-gate vp = fp->f_vnode;
21690Sstevel@tonic-gate aio_func = check_vp(vp, mode);
21700Sstevel@tonic-gate if (aio_func == NULL) {
21710Sstevel@tonic-gate releasef(fd);
21720Sstevel@tonic-gate return (EBADFD);
21730Sstevel@tonic-gate }
21741885Sraf if (run_mode == AIO_LARGEFILE)
2175*10719SRoger.Faulkner@Sun.COM error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 0);
21760Sstevel@tonic-gate else
2177*10719SRoger.Faulkner@Sun.COM error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 0);
21780Sstevel@tonic-gate
21790Sstevel@tonic-gate if (error) {
21800Sstevel@tonic-gate releasef(fd);
21810Sstevel@tonic-gate return (error);
21820Sstevel@tonic-gate }
21830Sstevel@tonic-gate /*
21840Sstevel@tonic-gate * enable polling on this request if the opcode has
21850Sstevel@tonic-gate * the AIO poll bit set
21860Sstevel@tonic-gate */
21870Sstevel@tonic-gate if (opcode & AIO_POLL_BIT)
21880Sstevel@tonic-gate reqp->aio_req_flags |= AIO_POLL;
21890Sstevel@tonic-gate
21900Sstevel@tonic-gate if (model == DATAMODEL_NATIVE)
21910Sstevel@tonic-gate reqp->aio_req_iocb.iocb = aiocb_arg;
21920Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
21930Sstevel@tonic-gate else
21940Sstevel@tonic-gate reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
21950Sstevel@tonic-gate #endif
21960Sstevel@tonic-gate
21971885Sraf if (aio_use_port) {
21981885Sraf int event = (run_mode == AIO_LARGEFILE)?
21991885Sraf ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
22001885Sraf ((mode == FREAD)? AIOAREAD : AIOAWRITE);
22011885Sraf error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
22021885Sraf }
22030Sstevel@tonic-gate
22040Sstevel@tonic-gate /*
22050Sstevel@tonic-gate * send the request to driver.
22060Sstevel@tonic-gate */
22070Sstevel@tonic-gate if (error == 0) {
22080Sstevel@tonic-gate if (bufsize == 0) {
22090Sstevel@tonic-gate clear_active_fd(fd);
22100Sstevel@tonic-gate aio_zerolen(reqp);
22110Sstevel@tonic-gate return (0);
22120Sstevel@tonic-gate }
22130Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
22140Sstevel@tonic-gate }
22150Sstevel@tonic-gate
22160Sstevel@tonic-gate /*
22170Sstevel@tonic-gate * the fd is stored in the aio_req_t by aio_req_setup(), and
22180Sstevel@tonic-gate * is released by the aio_cleanup_thread() when the IO has
22190Sstevel@tonic-gate * completed.
22200Sstevel@tonic-gate */
22210Sstevel@tonic-gate if (error) {
22220Sstevel@tonic-gate releasef(fd);
22230Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
22244502Spraks if (aio_use_port)
22254502Spraks aio_deq(&aiop->aio_portpending, reqp);
22260Sstevel@tonic-gate aio_req_free(aiop, reqp);
22270Sstevel@tonic-gate aiop->aio_pending--;
22280Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK)
22290Sstevel@tonic-gate cv_signal(&aiop->aio_cleanupcv);
22300Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
22310Sstevel@tonic-gate return (error);
22320Sstevel@tonic-gate }
22330Sstevel@tonic-gate clear_active_fd(fd);
22340Sstevel@tonic-gate return (0);
22350Sstevel@tonic-gate }
22360Sstevel@tonic-gate
22370Sstevel@tonic-gate
22380Sstevel@tonic-gate /*
22390Sstevel@tonic-gate * set error for a list IO entry that failed.
22400Sstevel@tonic-gate */
22410Sstevel@tonic-gate static void
lio_set_error(aio_req_t * reqp,int portused)22424502Spraks lio_set_error(aio_req_t *reqp, int portused)
22430Sstevel@tonic-gate {
22440Sstevel@tonic-gate aio_t *aiop = curproc->p_aio;
22450Sstevel@tonic-gate
22460Sstevel@tonic-gate if (aiop == NULL)
22470Sstevel@tonic-gate return;
22480Sstevel@tonic-gate
22490Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
22504502Spraks if (portused)
22514502Spraks aio_deq(&aiop->aio_portpending, reqp);
22520Sstevel@tonic-gate aiop->aio_pending--;
22530Sstevel@tonic-gate /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
22540Sstevel@tonic-gate reqp->aio_req_flags |= AIO_PHYSIODONE;
22550Sstevel@tonic-gate /*
22560Sstevel@tonic-gate * Need to free the request now as its never
22570Sstevel@tonic-gate * going to get on the done queue
22580Sstevel@tonic-gate *
22590Sstevel@tonic-gate * Note: aio_outstanding is decremented in
22600Sstevel@tonic-gate * aio_req_free()
22610Sstevel@tonic-gate */
22620Sstevel@tonic-gate aio_req_free(aiop, reqp);
22630Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK)
22640Sstevel@tonic-gate cv_signal(&aiop->aio_cleanupcv);
22650Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
22660Sstevel@tonic-gate }
22670Sstevel@tonic-gate
22680Sstevel@tonic-gate /*
22690Sstevel@tonic-gate * check if a specified request is done, and remove it from
22700Sstevel@tonic-gate * the done queue. otherwise remove anybody from the done queue
22710Sstevel@tonic-gate * if NULL is specified.
22720Sstevel@tonic-gate */
22730Sstevel@tonic-gate static aio_req_t *
aio_req_done(void * resultp)22740Sstevel@tonic-gate aio_req_done(void *resultp)
22750Sstevel@tonic-gate {
22760Sstevel@tonic-gate aio_req_t **bucket;
22770Sstevel@tonic-gate aio_req_t *ent;
22780Sstevel@tonic-gate aio_t *aiop = curproc->p_aio;
22790Sstevel@tonic-gate long index;
22800Sstevel@tonic-gate
22810Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
22820Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex));
22830Sstevel@tonic-gate
22840Sstevel@tonic-gate if (resultp) {
22850Sstevel@tonic-gate index = AIO_HASH(resultp);
22860Sstevel@tonic-gate bucket = &aiop->aio_hash[index];
22870Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
22880Sstevel@tonic-gate if (ent->aio_req_resultp == (aio_result_t *)resultp) {
22890Sstevel@tonic-gate if (ent->aio_req_flags & AIO_DONEQ) {
22900Sstevel@tonic-gate return (aio_req_remove(ent));
22910Sstevel@tonic-gate }
22920Sstevel@tonic-gate return (NULL);
22930Sstevel@tonic-gate }
22940Sstevel@tonic-gate }
22950Sstevel@tonic-gate /* no match, resultp is invalid */
22960Sstevel@tonic-gate return (NULL);
22970Sstevel@tonic-gate }
22980Sstevel@tonic-gate return (aio_req_remove(NULL));
22990Sstevel@tonic-gate }
23000Sstevel@tonic-gate
23010Sstevel@tonic-gate /*
23020Sstevel@tonic-gate * determine if a user-level resultp pointer is associated with an
23030Sstevel@tonic-gate * active IO request. Zero is returned when the request is done,
23040Sstevel@tonic-gate * and the request is removed from the done queue. Only when the
23050Sstevel@tonic-gate * return value is zero, is the "reqp" pointer valid. One is returned
23060Sstevel@tonic-gate * when the request is inprogress. Two is returned when the request
23070Sstevel@tonic-gate * is invalid.
23080Sstevel@tonic-gate */
23090Sstevel@tonic-gate static int
aio_req_find(aio_result_t * resultp,aio_req_t ** reqp)23100Sstevel@tonic-gate aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
23110Sstevel@tonic-gate {
23120Sstevel@tonic-gate aio_req_t **bucket;
23130Sstevel@tonic-gate aio_req_t *ent;
23140Sstevel@tonic-gate aio_t *aiop = curproc->p_aio;
23150Sstevel@tonic-gate long index;
23160Sstevel@tonic-gate
23170Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
23180Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23190Sstevel@tonic-gate
23200Sstevel@tonic-gate index = AIO_HASH(resultp);
23210Sstevel@tonic-gate bucket = &aiop->aio_hash[index];
23220Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
23230Sstevel@tonic-gate if (ent->aio_req_resultp == resultp) {
23240Sstevel@tonic-gate if (ent->aio_req_flags & AIO_DONEQ) {
23250Sstevel@tonic-gate *reqp = aio_req_remove(ent);
23260Sstevel@tonic-gate return (0);
23270Sstevel@tonic-gate }
23280Sstevel@tonic-gate return (1);
23290Sstevel@tonic-gate }
23300Sstevel@tonic-gate }
23310Sstevel@tonic-gate /* no match, resultp is invalid */
23320Sstevel@tonic-gate return (2);
23330Sstevel@tonic-gate }
23340Sstevel@tonic-gate
23350Sstevel@tonic-gate /*
23360Sstevel@tonic-gate * remove a request from the done queue.
23370Sstevel@tonic-gate */
23380Sstevel@tonic-gate static aio_req_t *
aio_req_remove(aio_req_t * reqp)23390Sstevel@tonic-gate aio_req_remove(aio_req_t *reqp)
23400Sstevel@tonic-gate {
23410Sstevel@tonic-gate aio_t *aiop = curproc->p_aio;
23420Sstevel@tonic-gate
23430Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex));
23440Sstevel@tonic-gate
23451885Sraf if (reqp != NULL) {
23460Sstevel@tonic-gate ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23470Sstevel@tonic-gate if (reqp->aio_req_next == reqp) {
23480Sstevel@tonic-gate /* only one request on queue */
23490Sstevel@tonic-gate if (reqp == aiop->aio_doneq) {
23500Sstevel@tonic-gate aiop->aio_doneq = NULL;
23510Sstevel@tonic-gate } else {
23520Sstevel@tonic-gate ASSERT(reqp == aiop->aio_cleanupq);
23530Sstevel@tonic-gate aiop->aio_cleanupq = NULL;
23540Sstevel@tonic-gate }
23550Sstevel@tonic-gate } else {
23560Sstevel@tonic-gate reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23570Sstevel@tonic-gate reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23580Sstevel@tonic-gate /*
23590Sstevel@tonic-gate * The request can be either on the aio_doneq or the
23600Sstevel@tonic-gate * aio_cleanupq
23610Sstevel@tonic-gate */
23620Sstevel@tonic-gate if (reqp == aiop->aio_doneq)
23630Sstevel@tonic-gate aiop->aio_doneq = reqp->aio_req_next;
23640Sstevel@tonic-gate
23650Sstevel@tonic-gate if (reqp == aiop->aio_cleanupq)
23660Sstevel@tonic-gate aiop->aio_cleanupq = reqp->aio_req_next;
23670Sstevel@tonic-gate }
23680Sstevel@tonic-gate reqp->aio_req_flags &= ~AIO_DONEQ;
23691885Sraf reqp->aio_req_next = NULL;
23701885Sraf reqp->aio_req_prev = NULL;
23711885Sraf } else if ((reqp = aiop->aio_doneq) != NULL) {
23721885Sraf ASSERT(reqp->aio_req_flags & AIO_DONEQ);
23731885Sraf if (reqp == reqp->aio_req_next) {
23740Sstevel@tonic-gate /* only one request on queue */
23750Sstevel@tonic-gate aiop->aio_doneq = NULL;
23760Sstevel@tonic-gate } else {
23771885Sraf reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
23781885Sraf reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
23791885Sraf aiop->aio_doneq = reqp->aio_req_next;
23800Sstevel@tonic-gate }
23811885Sraf reqp->aio_req_flags &= ~AIO_DONEQ;
23821885Sraf reqp->aio_req_next = NULL;
23831885Sraf reqp->aio_req_prev = NULL;
23840Sstevel@tonic-gate }
23851885Sraf if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
23861885Sraf cv_broadcast(&aiop->aio_waitcv);
23871885Sraf return (reqp);
23880Sstevel@tonic-gate }
23890Sstevel@tonic-gate
23900Sstevel@tonic-gate static int
aio_req_setup(aio_req_t ** reqpp,aio_t * aiop,aiocb_t * arg,aio_result_t * resultp,vnode_t * vp,int old_solaris_req)23910Sstevel@tonic-gate aio_req_setup(
23920Sstevel@tonic-gate aio_req_t **reqpp,
23930Sstevel@tonic-gate aio_t *aiop,
23940Sstevel@tonic-gate aiocb_t *arg,
23950Sstevel@tonic-gate aio_result_t *resultp,
2396*10719SRoger.Faulkner@Sun.COM vnode_t *vp,
2397*10719SRoger.Faulkner@Sun.COM int old_solaris_req)
23980Sstevel@tonic-gate {
23991885Sraf sigqueue_t *sqp = NULL;
24000Sstevel@tonic-gate aio_req_t *reqp;
24010Sstevel@tonic-gate struct uio *uio;
24020Sstevel@tonic-gate struct sigevent *sigev;
24030Sstevel@tonic-gate int error;
24040Sstevel@tonic-gate
24050Sstevel@tonic-gate sigev = &arg->aio_sigevent;
24061885Sraf if (sigev->sigev_notify == SIGEV_SIGNAL &&
24071885Sraf sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
24080Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
24090Sstevel@tonic-gate if (sqp == NULL)
24100Sstevel@tonic-gate return (EAGAIN);
24110Sstevel@tonic-gate sqp->sq_func = NULL;
24120Sstevel@tonic-gate sqp->sq_next = NULL;
24130Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO;
24140Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid;
24150Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc);
24160Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid();
24170Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
24180Sstevel@tonic-gate sqp->sq_info.si_signo = sigev->sigev_signo;
24190Sstevel@tonic-gate sqp->sq_info.si_value = sigev->sigev_value;
24201885Sraf }
24210Sstevel@tonic-gate
24220Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
24230Sstevel@tonic-gate
24240Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) {
24250Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
24260Sstevel@tonic-gate if (sqp)
24270Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t));
24280Sstevel@tonic-gate return (EIO);
24290Sstevel@tonic-gate }
24300Sstevel@tonic-gate /*
24310Sstevel@tonic-gate * get an aio_reqp from the free list or allocate one
24320Sstevel@tonic-gate * from dynamic memory.
24330Sstevel@tonic-gate */
24340Sstevel@tonic-gate if (error = aio_req_alloc(&reqp, resultp)) {
24350Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
24360Sstevel@tonic-gate if (sqp)
24370Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t));
24380Sstevel@tonic-gate return (error);
24390Sstevel@tonic-gate }
24400Sstevel@tonic-gate aiop->aio_pending++;
24410Sstevel@tonic-gate aiop->aio_outstanding++;
24420Sstevel@tonic-gate reqp->aio_req_flags = AIO_PENDING;
2443*10719SRoger.Faulkner@Sun.COM if (old_solaris_req) {
2444*10719SRoger.Faulkner@Sun.COM /* this is an old solaris aio request */
2445*10719SRoger.Faulkner@Sun.COM reqp->aio_req_flags |= AIO_SOLARIS;
2446*10719SRoger.Faulkner@Sun.COM aiop->aio_flags |= AIO_SOLARIS_REQ;
2447*10719SRoger.Faulkner@Sun.COM }
24481885Sraf if (sigev->sigev_notify == SIGEV_THREAD ||
24491885Sraf sigev->sigev_notify == SIGEV_PORT)
24501885Sraf aio_enq(&aiop->aio_portpending, reqp, 0);
24510Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
24520Sstevel@tonic-gate /*
24530Sstevel@tonic-gate * initialize aio request.
24540Sstevel@tonic-gate */
24550Sstevel@tonic-gate reqp->aio_req_fd = arg->aio_fildes;
24560Sstevel@tonic-gate reqp->aio_req_sigqp = sqp;
24570Sstevel@tonic-gate reqp->aio_req_iocb.iocb = NULL;
24581885Sraf reqp->aio_req_lio = NULL;
24590Sstevel@tonic-gate reqp->aio_req_buf.b_file = vp;
24600Sstevel@tonic-gate uio = reqp->aio_req.aio_uio;
24610Sstevel@tonic-gate uio->uio_iovcnt = 1;
24620Sstevel@tonic-gate uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
24630Sstevel@tonic-gate uio->uio_iov->iov_len = arg->aio_nbytes;
24640Sstevel@tonic-gate uio->uio_loffset = arg->aio_offset;
24650Sstevel@tonic-gate *reqpp = reqp;
24660Sstevel@tonic-gate return (0);
24670Sstevel@tonic-gate }
24680Sstevel@tonic-gate
24690Sstevel@tonic-gate /*
24700Sstevel@tonic-gate * Allocate p_aio struct.
24710Sstevel@tonic-gate */
24720Sstevel@tonic-gate static aio_t *
aio_aiop_alloc(void)24730Sstevel@tonic-gate aio_aiop_alloc(void)
24740Sstevel@tonic-gate {
24750Sstevel@tonic-gate aio_t *aiop;
24760Sstevel@tonic-gate
24770Sstevel@tonic-gate ASSERT(MUTEX_HELD(&curproc->p_lock));
24780Sstevel@tonic-gate
24790Sstevel@tonic-gate aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
24800Sstevel@tonic-gate if (aiop) {
24810Sstevel@tonic-gate mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
24820Sstevel@tonic-gate mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
24834502Spraks NULL);
24840Sstevel@tonic-gate mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
24850Sstevel@tonic-gate }
24860Sstevel@tonic-gate return (aiop);
24870Sstevel@tonic-gate }
24880Sstevel@tonic-gate
24890Sstevel@tonic-gate /*
24900Sstevel@tonic-gate * Allocate an aio_req struct.
24910Sstevel@tonic-gate */
24920Sstevel@tonic-gate static int
aio_req_alloc(aio_req_t ** nreqp,aio_result_t * resultp)24930Sstevel@tonic-gate aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
24940Sstevel@tonic-gate {
24950Sstevel@tonic-gate aio_req_t *reqp;
24960Sstevel@tonic-gate aio_t *aiop = curproc->p_aio;
24970Sstevel@tonic-gate
24980Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex));
24990Sstevel@tonic-gate
25000Sstevel@tonic-gate if ((reqp = aiop->aio_free) != NULL) {
25010Sstevel@tonic-gate aiop->aio_free = reqp->aio_req_next;
25021885Sraf bzero(reqp, sizeof (*reqp));
25030Sstevel@tonic-gate } else {
25040Sstevel@tonic-gate /*
25050Sstevel@tonic-gate * Check whether memory is getting tight.
25060Sstevel@tonic-gate * This is a temporary mechanism to avoid memory
25070Sstevel@tonic-gate * exhaustion by a single process until we come up
25080Sstevel@tonic-gate * with a per process solution such as setrlimit().
25090Sstevel@tonic-gate */
25100Sstevel@tonic-gate if (freemem < desfree)
25110Sstevel@tonic-gate return (EAGAIN);
25120Sstevel@tonic-gate reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
25130Sstevel@tonic-gate if (reqp == NULL)
25140Sstevel@tonic-gate return (EAGAIN);
25150Sstevel@tonic-gate }
25161885Sraf reqp->aio_req.aio_uio = &reqp->aio_req_uio;
25171885Sraf reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
25181885Sraf reqp->aio_req.aio_private = reqp;
25190Sstevel@tonic-gate reqp->aio_req_buf.b_offset = -1;
25200Sstevel@tonic-gate reqp->aio_req_resultp = resultp;
25210Sstevel@tonic-gate if (aio_hash_insert(reqp, aiop)) {
25220Sstevel@tonic-gate reqp->aio_req_next = aiop->aio_free;
25230Sstevel@tonic-gate aiop->aio_free = reqp;
252410011SPrakash.Sangappa@Sun.COM return (EBUSY);
25250Sstevel@tonic-gate }
25260Sstevel@tonic-gate *nreqp = reqp;
25270Sstevel@tonic-gate return (0);
25280Sstevel@tonic-gate }
25290Sstevel@tonic-gate
25300Sstevel@tonic-gate /*
25310Sstevel@tonic-gate * Allocate an aio_lio_t struct.
25320Sstevel@tonic-gate */
25330Sstevel@tonic-gate static int
aio_lio_alloc(aio_lio_t ** head)25340Sstevel@tonic-gate aio_lio_alloc(aio_lio_t **head)
25350Sstevel@tonic-gate {
25360Sstevel@tonic-gate aio_lio_t *liop;
25370Sstevel@tonic-gate aio_t *aiop = curproc->p_aio;
25380Sstevel@tonic-gate
25390Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex));
25400Sstevel@tonic-gate
25410Sstevel@tonic-gate if ((liop = aiop->aio_lio_free) != NULL) {
25420Sstevel@tonic-gate aiop->aio_lio_free = liop->lio_next;
25430Sstevel@tonic-gate } else {
25440Sstevel@tonic-gate /*
25450Sstevel@tonic-gate * Check whether memory is getting tight.
25460Sstevel@tonic-gate * This is a temporary mechanism to avoid memory
25470Sstevel@tonic-gate * exhaustion by a single process until we come up
25480Sstevel@tonic-gate * with a per process solution such as setrlimit().
25490Sstevel@tonic-gate */
25500Sstevel@tonic-gate if (freemem < desfree)
25510Sstevel@tonic-gate return (EAGAIN);
25520Sstevel@tonic-gate
25530Sstevel@tonic-gate liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
25540Sstevel@tonic-gate if (liop == NULL)
25550Sstevel@tonic-gate return (EAGAIN);
25560Sstevel@tonic-gate }
25570Sstevel@tonic-gate *head = liop;
25580Sstevel@tonic-gate return (0);
25590Sstevel@tonic-gate }
25600Sstevel@tonic-gate
25610Sstevel@tonic-gate /*
25620Sstevel@tonic-gate * this is a special per-process thread that is only activated if
25630Sstevel@tonic-gate * the process is unmapping a segment with outstanding aio. normally,
25640Sstevel@tonic-gate * the process will have completed the aio before unmapping the
25650Sstevel@tonic-gate * segment. If the process does unmap a segment with outstanding aio,
25660Sstevel@tonic-gate * this special thread will guarentee that the locked pages due to
25670Sstevel@tonic-gate * aphysio() are released, thereby permitting the segment to be
2568304Spraks * unmapped. In addition to this, the cleanup thread is woken up
2569304Spraks * during DR operations to release the locked pages.
25700Sstevel@tonic-gate */
25710Sstevel@tonic-gate
25720Sstevel@tonic-gate static int
aio_cleanup_thread(aio_t * aiop)25730Sstevel@tonic-gate aio_cleanup_thread(aio_t *aiop)
25740Sstevel@tonic-gate {
25750Sstevel@tonic-gate proc_t *p = curproc;
25760Sstevel@tonic-gate struct as *as = p->p_as;
25770Sstevel@tonic-gate int poked = 0;
25780Sstevel@tonic-gate kcondvar_t *cvp;
25790Sstevel@tonic-gate int exit_flag = 0;
2580304Spraks int rqclnup = 0;
25810Sstevel@tonic-gate
25820Sstevel@tonic-gate sigfillset(&curthread->t_hold);
25830Sstevel@tonic-gate sigdiffset(&curthread->t_hold, &cantmask);
25840Sstevel@tonic-gate for (;;) {
25850Sstevel@tonic-gate /*
25860Sstevel@tonic-gate * if a segment is being unmapped, and the current
25870Sstevel@tonic-gate * process's done queue is not empty, then every request
25880Sstevel@tonic-gate * on the doneq with locked resources should be forced
25890Sstevel@tonic-gate * to release their locks. By moving the doneq request
25900Sstevel@tonic-gate * to the cleanupq, aio_cleanup() will process the cleanupq,
25910Sstevel@tonic-gate * and place requests back onto the doneq. All requests
25920Sstevel@tonic-gate * processed by aio_cleanup() will have their physical
25930Sstevel@tonic-gate * resources unlocked.
25940Sstevel@tonic-gate */
25950Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
25960Sstevel@tonic-gate if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
25970Sstevel@tonic-gate aiop->aio_flags |= AIO_CLEANUP;
25980Sstevel@tonic-gate mutex_enter(&as->a_contents);
2599304Spraks if (aiop->aio_rqclnup) {
2600304Spraks aiop->aio_rqclnup = 0;
2601304Spraks rqclnup = 1;
2602304Spraks }
26039973SSurya.Prakki@Sun.COM mutex_exit(&as->a_contents);
26049973SSurya.Prakki@Sun.COM if (aiop->aio_doneq) {
26050Sstevel@tonic-gate aio_req_t *doneqhead = aiop->aio_doneq;
26060Sstevel@tonic-gate aiop->aio_doneq = NULL;
26070Sstevel@tonic-gate aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
26080Sstevel@tonic-gate }
26090Sstevel@tonic-gate }
26100Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
26110Sstevel@tonic-gate aio_cleanup(AIO_CLEANUP_THREAD);
26120Sstevel@tonic-gate /*
26130Sstevel@tonic-gate * thread should block on the cleanupcv while
26140Sstevel@tonic-gate * AIO_CLEANUP is set.
26150Sstevel@tonic-gate */
26160Sstevel@tonic-gate cvp = &aiop->aio_cleanupcv;
26170Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
26180Sstevel@tonic-gate
26190Sstevel@tonic-gate if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
26200Sstevel@tonic-gate aiop->aio_notifyq != NULL ||
26210Sstevel@tonic-gate aiop->aio_portcleanupq != NULL) {
26220Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
26230Sstevel@tonic-gate continue;
26240Sstevel@tonic-gate }
26250Sstevel@tonic-gate mutex_enter(&as->a_contents);
26260Sstevel@tonic-gate
26270Sstevel@tonic-gate /*
26280Sstevel@tonic-gate * AIO_CLEANUP determines when the cleanup thread
2629304Spraks * should be active. This flag is set when
2630304Spraks * the cleanup thread is awakened by as_unmap() or
2631304Spraks * due to DR operations.
26320Sstevel@tonic-gate * The flag is cleared when the blocking as_unmap()
26330Sstevel@tonic-gate * that originally awakened us is allowed to
26340Sstevel@tonic-gate * complete. as_unmap() blocks when trying to
26350Sstevel@tonic-gate * unmap a segment that has SOFTLOCKed pages. when
26360Sstevel@tonic-gate * the segment's pages are all SOFTUNLOCKed,
2637304Spraks * as->a_flags & AS_UNMAPWAIT should be zero.
2638304Spraks *
2639304Spraks * In case of cleanup request by DR, the flag is cleared
2640304Spraks * once all the pending aio requests have been processed.
2641304Spraks *
2642304Spraks * The flag shouldn't be cleared right away if the
2643304Spraks * cleanup thread was interrupted because the process
2644304Spraks * is doing forkall(). This happens when cv_wait_sig()
2645304Spraks * returns zero, because it was awakened by a pokelwps().
2646304Spraks * If the process is not exiting, it must be doing forkall().
26470Sstevel@tonic-gate */
26480Sstevel@tonic-gate if ((poked == 0) &&
26494502Spraks ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
26504502Spraks (aiop->aio_pending == 0))) {
26510Sstevel@tonic-gate aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
26520Sstevel@tonic-gate cvp = &as->a_cv;
2653304Spraks rqclnup = 0;
26540Sstevel@tonic-gate }
26550Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
26560Sstevel@tonic-gate if (poked) {
26570Sstevel@tonic-gate /*
26580Sstevel@tonic-gate * If the process is exiting/killed, don't return
26590Sstevel@tonic-gate * immediately without waiting for pending I/O's
26600Sstevel@tonic-gate * and releasing the page locks.
26610Sstevel@tonic-gate */
26620Sstevel@tonic-gate if (p->p_flag & (SEXITLWPS|SKILLED)) {
26630Sstevel@tonic-gate /*
26640Sstevel@tonic-gate * If exit_flag is set, then it is
26650Sstevel@tonic-gate * safe to exit because we have released
26660Sstevel@tonic-gate * page locks of completed I/O's.
26670Sstevel@tonic-gate */
26680Sstevel@tonic-gate if (exit_flag)
26690Sstevel@tonic-gate break;
26700Sstevel@tonic-gate
26710Sstevel@tonic-gate mutex_exit(&as->a_contents);
26720Sstevel@tonic-gate
26730Sstevel@tonic-gate /*
26740Sstevel@tonic-gate * Wait for all the pending aio to complete.
26750Sstevel@tonic-gate */
26760Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
26770Sstevel@tonic-gate aiop->aio_flags |= AIO_REQ_BLOCK;
26780Sstevel@tonic-gate while (aiop->aio_pending != 0)
26790Sstevel@tonic-gate cv_wait(&aiop->aio_cleanupcv,
26804502Spraks &aiop->aio_mutex);
26810Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
26820Sstevel@tonic-gate exit_flag = 1;
26830Sstevel@tonic-gate continue;
26840Sstevel@tonic-gate } else if (p->p_flag &
26850Sstevel@tonic-gate (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
26860Sstevel@tonic-gate /*
26870Sstevel@tonic-gate * hold LWP until it
26880Sstevel@tonic-gate * is continued.
26890Sstevel@tonic-gate */
26900Sstevel@tonic-gate mutex_exit(&as->a_contents);
26910Sstevel@tonic-gate mutex_enter(&p->p_lock);
26920Sstevel@tonic-gate stop(PR_SUSPENDED, SUSPEND_NORMAL);
26930Sstevel@tonic-gate mutex_exit(&p->p_lock);
26940Sstevel@tonic-gate poked = 0;
26950Sstevel@tonic-gate continue;
26960Sstevel@tonic-gate }
26970Sstevel@tonic-gate } else {
26980Sstevel@tonic-gate /*
26990Sstevel@tonic-gate * When started this thread will sleep on as->a_cv.
27000Sstevel@tonic-gate * as_unmap will awake this thread if the
27010Sstevel@tonic-gate * segment has SOFTLOCKed pages (poked = 0).
27020Sstevel@tonic-gate * 1. pokelwps() awakes this thread =>
27030Sstevel@tonic-gate * break the loop to check SEXITLWPS, SHOLDFORK, etc
27040Sstevel@tonic-gate * 2. as_unmap awakes this thread =>
27050Sstevel@tonic-gate * to break the loop it is necessary that
27060Sstevel@tonic-gate * - AS_UNMAPWAIT is set (as_unmap is waiting for
27070Sstevel@tonic-gate * memory to be unlocked)
27080Sstevel@tonic-gate * - AIO_CLEANUP is not set
27090Sstevel@tonic-gate * (if AIO_CLEANUP is set we have to wait for
27100Sstevel@tonic-gate * pending requests. aio_done will send a signal
27110Sstevel@tonic-gate * for every request which completes to continue
27120Sstevel@tonic-gate * unmapping the corresponding address range)
2713304Spraks * 3. A cleanup request will wake this thread up, ex.
2714304Spraks * by the DR operations. The aio_rqclnup flag will
2715304Spraks * be set.
27160Sstevel@tonic-gate */
27170Sstevel@tonic-gate while (poked == 0) {
2718304Spraks /*
27194532Ssp92102 * The clean up requests that came in
27204532Ssp92102 * after we had just cleaned up, couldn't
27214532Ssp92102 * be causing the unmap thread to block - as
27224532Ssp92102 * unmap event happened first.
27234532Ssp92102 * Let aio_done() wake us up if it sees a need.
2724304Spraks */
27254532Ssp92102 if (aiop->aio_rqclnup &&
27264502Spraks (aiop->aio_flags & AIO_CLEANUP) == 0)
27270Sstevel@tonic-gate break;
27280Sstevel@tonic-gate poked = !cv_wait_sig(cvp, &as->a_contents);
27290Sstevel@tonic-gate if (AS_ISUNMAPWAIT(as) == 0)
27300Sstevel@tonic-gate cv_signal(cvp);
27310Sstevel@tonic-gate if (aiop->aio_outstanding != 0)
27320Sstevel@tonic-gate break;
27330Sstevel@tonic-gate }
27340Sstevel@tonic-gate }
27350Sstevel@tonic-gate mutex_exit(&as->a_contents);
27360Sstevel@tonic-gate }
27370Sstevel@tonic-gate exit:
27380Sstevel@tonic-gate mutex_exit(&as->a_contents);
27390Sstevel@tonic-gate ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
27400Sstevel@tonic-gate aston(curthread); /* make thread do post_syscall */
27410Sstevel@tonic-gate return (0);
27420Sstevel@tonic-gate }
27430Sstevel@tonic-gate
27440Sstevel@tonic-gate /*
27450Sstevel@tonic-gate * save a reference to a user's outstanding aio in a hash list.
27460Sstevel@tonic-gate */
27470Sstevel@tonic-gate static int
aio_hash_insert(aio_req_t * aio_reqp,aio_t * aiop)27480Sstevel@tonic-gate aio_hash_insert(
27490Sstevel@tonic-gate aio_req_t *aio_reqp,
27500Sstevel@tonic-gate aio_t *aiop)
27510Sstevel@tonic-gate {
27520Sstevel@tonic-gate long index;
27530Sstevel@tonic-gate aio_result_t *resultp = aio_reqp->aio_req_resultp;
27540Sstevel@tonic-gate aio_req_t *current;
27550Sstevel@tonic-gate aio_req_t **nextp;
27560Sstevel@tonic-gate
27570Sstevel@tonic-gate index = AIO_HASH(resultp);
27580Sstevel@tonic-gate nextp = &aiop->aio_hash[index];
27590Sstevel@tonic-gate while ((current = *nextp) != NULL) {
27600Sstevel@tonic-gate if (current->aio_req_resultp == resultp)
27610Sstevel@tonic-gate return (DUPLICATE);
27620Sstevel@tonic-gate nextp = ¤t->aio_hash_next;
27630Sstevel@tonic-gate }
27640Sstevel@tonic-gate *nextp = aio_reqp;
27650Sstevel@tonic-gate aio_reqp->aio_hash_next = NULL;
27660Sstevel@tonic-gate return (0);
27670Sstevel@tonic-gate }
27680Sstevel@tonic-gate
27690Sstevel@tonic-gate static int
check_vp(struct vnode * vp,int mode)27700Sstevel@tonic-gate (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
27710Sstevel@tonic-gate cred_t *)
27720Sstevel@tonic-gate {
27730Sstevel@tonic-gate struct snode *sp;
27740Sstevel@tonic-gate dev_t dev;
27750Sstevel@tonic-gate struct cb_ops *cb;
27760Sstevel@tonic-gate major_t major;
27770Sstevel@tonic-gate int (*aio_func)();
27780Sstevel@tonic-gate
27790Sstevel@tonic-gate dev = vp->v_rdev;
27800Sstevel@tonic-gate major = getmajor(dev);
27810Sstevel@tonic-gate
27820Sstevel@tonic-gate /*
27830Sstevel@tonic-gate * return NULL for requests to files and STREAMs so
27840Sstevel@tonic-gate * that libaio takes care of them.
27850Sstevel@tonic-gate */
27860Sstevel@tonic-gate if (vp->v_type == VCHR) {
27870Sstevel@tonic-gate /* no stream device for kaio */
27880Sstevel@tonic-gate if (STREAMSTAB(major)) {
27890Sstevel@tonic-gate return (NULL);
27900Sstevel@tonic-gate }
27910Sstevel@tonic-gate } else {
27920Sstevel@tonic-gate return (NULL);
27930Sstevel@tonic-gate }
27940Sstevel@tonic-gate
27950Sstevel@tonic-gate /*
27960Sstevel@tonic-gate * Check old drivers which do not have async I/O entry points.
27970Sstevel@tonic-gate */
27980Sstevel@tonic-gate if (devopsp[major]->devo_rev < 3)
27990Sstevel@tonic-gate return (NULL);
28000Sstevel@tonic-gate
28010Sstevel@tonic-gate cb = devopsp[major]->devo_cb_ops;
28020Sstevel@tonic-gate
28030Sstevel@tonic-gate if (cb->cb_rev < 1)
28040Sstevel@tonic-gate return (NULL);
28050Sstevel@tonic-gate
28060Sstevel@tonic-gate /*
28070Sstevel@tonic-gate * Check whether this device is a block device.
28080Sstevel@tonic-gate * Kaio is not supported for devices like tty.
28090Sstevel@tonic-gate */
28100Sstevel@tonic-gate if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
28110Sstevel@tonic-gate return (NULL);
28120Sstevel@tonic-gate
28130Sstevel@tonic-gate /*
28140Sstevel@tonic-gate * Clustering: If vnode is a PXFS vnode, then the device may be remote.
28150Sstevel@tonic-gate * We cannot call the driver directly. Instead return the
28160Sstevel@tonic-gate * PXFS functions.
28170Sstevel@tonic-gate */
28180Sstevel@tonic-gate
28190Sstevel@tonic-gate if (IS_PXFSVP(vp)) {
28200Sstevel@tonic-gate if (mode & FREAD)
28210Sstevel@tonic-gate return (clpxfs_aio_read);
28220Sstevel@tonic-gate else
28230Sstevel@tonic-gate return (clpxfs_aio_write);
28240Sstevel@tonic-gate }
28250Sstevel@tonic-gate if (mode & FREAD)
28260Sstevel@tonic-gate aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
28270Sstevel@tonic-gate else
28280Sstevel@tonic-gate aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
28290Sstevel@tonic-gate
28300Sstevel@tonic-gate /*
28310Sstevel@tonic-gate * Do we need this ?
28320Sstevel@tonic-gate * nodev returns ENXIO anyway.
28330Sstevel@tonic-gate */
28340Sstevel@tonic-gate if (aio_func == nodev)
28350Sstevel@tonic-gate return (NULL);
28360Sstevel@tonic-gate
28370Sstevel@tonic-gate sp = VTOS(vp);
28380Sstevel@tonic-gate smark(sp, SACC);
28390Sstevel@tonic-gate return (aio_func);
28400Sstevel@tonic-gate }
28410Sstevel@tonic-gate
28420Sstevel@tonic-gate /*
28430Sstevel@tonic-gate * Clustering: We want check_vp to return a function prototyped
28440Sstevel@tonic-gate * correctly that will be common to both PXFS and regular case.
28450Sstevel@tonic-gate * We define this intermediate function that will do the right
28460Sstevel@tonic-gate * thing for driver cases.
28470Sstevel@tonic-gate */
28480Sstevel@tonic-gate
28490Sstevel@tonic-gate static int
driver_aio_write(vnode_t * vp,struct aio_req * aio,cred_t * cred_p)28500Sstevel@tonic-gate driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28510Sstevel@tonic-gate {
28520Sstevel@tonic-gate dev_t dev;
28530Sstevel@tonic-gate struct cb_ops *cb;
28540Sstevel@tonic-gate
28550Sstevel@tonic-gate ASSERT(vp->v_type == VCHR);
28560Sstevel@tonic-gate ASSERT(!IS_PXFSVP(vp));
28570Sstevel@tonic-gate dev = VTOS(vp)->s_dev;
28580Sstevel@tonic-gate ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
28590Sstevel@tonic-gate
28600Sstevel@tonic-gate cb = devopsp[getmajor(dev)]->devo_cb_ops;
28610Sstevel@tonic-gate
28620Sstevel@tonic-gate ASSERT(cb->cb_awrite != nodev);
28630Sstevel@tonic-gate return ((*cb->cb_awrite)(dev, aio, cred_p));
28640Sstevel@tonic-gate }
28650Sstevel@tonic-gate
28660Sstevel@tonic-gate /*
28670Sstevel@tonic-gate * Clustering: We want check_vp to return a function prototyped
28680Sstevel@tonic-gate * correctly that will be common to both PXFS and regular case.
28690Sstevel@tonic-gate * We define this intermediate function that will do the right
28700Sstevel@tonic-gate * thing for driver cases.
28710Sstevel@tonic-gate */
28720Sstevel@tonic-gate
28730Sstevel@tonic-gate static int
driver_aio_read(vnode_t * vp,struct aio_req * aio,cred_t * cred_p)28740Sstevel@tonic-gate driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
28750Sstevel@tonic-gate {
28760Sstevel@tonic-gate dev_t dev;
28770Sstevel@tonic-gate struct cb_ops *cb;
28780Sstevel@tonic-gate
28790Sstevel@tonic-gate ASSERT(vp->v_type == VCHR);
28800Sstevel@tonic-gate ASSERT(!IS_PXFSVP(vp));
28810Sstevel@tonic-gate dev = VTOS(vp)->s_dev;
28820Sstevel@tonic-gate ASSERT(!STREAMSTAB(getmajor(dev)));
28830Sstevel@tonic-gate
28840Sstevel@tonic-gate cb = devopsp[getmajor(dev)]->devo_cb_ops;
28850Sstevel@tonic-gate
28860Sstevel@tonic-gate ASSERT(cb->cb_aread != nodev);
28870Sstevel@tonic-gate return ((*cb->cb_aread)(dev, aio, cred_p));
28880Sstevel@tonic-gate }
28890Sstevel@tonic-gate
28900Sstevel@tonic-gate /*
28910Sstevel@tonic-gate * This routine is called when a largefile call is made by a 32bit
28920Sstevel@tonic-gate * process on a ILP32 or LP64 kernel. All 64bit processes are large
28930Sstevel@tonic-gate * file by definition and will call alio() instead.
28940Sstevel@tonic-gate */
28950Sstevel@tonic-gate static int
alioLF(int mode_arg,void * aiocb_arg,int nent,void * sigev)28960Sstevel@tonic-gate alioLF(
28970Sstevel@tonic-gate int mode_arg,
28980Sstevel@tonic-gate void *aiocb_arg,
28990Sstevel@tonic-gate int nent,
29000Sstevel@tonic-gate void *sigev)
29010Sstevel@tonic-gate {
29020Sstevel@tonic-gate file_t *fp;
29030Sstevel@tonic-gate file_t *prev_fp = NULL;
29040Sstevel@tonic-gate int prev_mode = -1;
29050Sstevel@tonic-gate struct vnode *vp;
29060Sstevel@tonic-gate aio_lio_t *head;
29070Sstevel@tonic-gate aio_req_t *reqp;
29080Sstevel@tonic-gate aio_t *aiop;
29090Sstevel@tonic-gate caddr_t cbplist;
29101885Sraf aiocb64_32_t cb64;
29111885Sraf aiocb64_32_t *aiocb = &cb64;
29120Sstevel@tonic-gate aiocb64_32_t *cbp;
29130Sstevel@tonic-gate caddr32_t *ucbp;
29140Sstevel@tonic-gate #ifdef _LP64
29150Sstevel@tonic-gate aiocb_t aiocb_n;
29160Sstevel@tonic-gate #endif
29170Sstevel@tonic-gate struct sigevent32 sigevk;
29180Sstevel@tonic-gate sigqueue_t *sqp;
29190Sstevel@tonic-gate int (*aio_func)();
29200Sstevel@tonic-gate int mode;
29211885Sraf int error = 0;
29221885Sraf int aio_errors = 0;
29230Sstevel@tonic-gate int i;
29240Sstevel@tonic-gate size_t ssize;
29250Sstevel@tonic-gate int deadhead = 0;
29260Sstevel@tonic-gate int aio_notsupported = 0;
29271885Sraf int lio_head_port;
29281885Sraf int aio_port;
29291885Sraf int aio_thread;
29300Sstevel@tonic-gate port_kevent_t *pkevtp = NULL;
29314502Spraks int portused = 0;
29320Sstevel@tonic-gate port_notify32_t pnotify;
29331885Sraf int event;
29340Sstevel@tonic-gate
29350Sstevel@tonic-gate aiop = curproc->p_aio;
29360Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
29370Sstevel@tonic-gate return (EINVAL);
29380Sstevel@tonic-gate
29390Sstevel@tonic-gate ASSERT(get_udatamodel() == DATAMODEL_ILP32);
29400Sstevel@tonic-gate
29410Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent);
29420Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP);
29430Sstevel@tonic-gate ucbp = (caddr32_t *)cbplist;
29440Sstevel@tonic-gate
29451885Sraf if (copyin(aiocb_arg, cbplist, ssize) ||
29461885Sraf (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
29470Sstevel@tonic-gate kmem_free(cbplist, ssize);
29480Sstevel@tonic-gate return (EFAULT);
29490Sstevel@tonic-gate }
29500Sstevel@tonic-gate
29511885Sraf /* Event Ports */
29521885Sraf if (sigev &&
29531885Sraf (sigevk.sigev_notify == SIGEV_THREAD ||
29541885Sraf sigevk.sigev_notify == SIGEV_PORT)) {
29551885Sraf if (sigevk.sigev_notify == SIGEV_THREAD) {
29561885Sraf pnotify.portnfy_port = sigevk.sigev_signo;
29571885Sraf pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
29581885Sraf } else if (copyin(
29591885Sraf (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
29601885Sraf &pnotify, sizeof (pnotify))) {
29610Sstevel@tonic-gate kmem_free(cbplist, ssize);
29620Sstevel@tonic-gate return (EFAULT);
29630Sstevel@tonic-gate }
29641885Sraf error = port_alloc_event(pnotify.portnfy_port,
29651885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
29661885Sraf if (error) {
29671885Sraf if (error == ENOMEM || error == EAGAIN)
29681885Sraf error = EAGAIN;
29691885Sraf else
29701885Sraf error = EINVAL;
29711885Sraf kmem_free(cbplist, ssize);
29721885Sraf return (error);
29731885Sraf }
29741885Sraf lio_head_port = pnotify.portnfy_port;
29754502Spraks portused = 1;
29760Sstevel@tonic-gate }
29770Sstevel@tonic-gate
29780Sstevel@tonic-gate /*
29790Sstevel@tonic-gate * a list head should be allocated if notification is
29800Sstevel@tonic-gate * enabled for this list.
29810Sstevel@tonic-gate */
29820Sstevel@tonic-gate head = NULL;
29830Sstevel@tonic-gate
29841885Sraf if (mode_arg == LIO_WAIT || sigev) {
29850Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
29860Sstevel@tonic-gate error = aio_lio_alloc(&head);
29870Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
29880Sstevel@tonic-gate if (error)
29890Sstevel@tonic-gate goto done;
29900Sstevel@tonic-gate deadhead = 1;
29910Sstevel@tonic-gate head->lio_nent = nent;
29920Sstevel@tonic-gate head->lio_refcnt = nent;
29931885Sraf head->lio_port = -1;
29941885Sraf head->lio_portkev = NULL;
29951885Sraf if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
29961885Sraf sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
29970Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
29980Sstevel@tonic-gate if (sqp == NULL) {
29990Sstevel@tonic-gate error = EAGAIN;
30000Sstevel@tonic-gate goto done;
30010Sstevel@tonic-gate }
30020Sstevel@tonic-gate sqp->sq_func = NULL;
30030Sstevel@tonic-gate sqp->sq_next = NULL;
30040Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO;
30050Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid;
30060Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc);
30070Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid();
30080Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
30090Sstevel@tonic-gate sqp->sq_info.si_signo = sigevk.sigev_signo;
30100Sstevel@tonic-gate sqp->sq_info.si_value.sival_int =
30110Sstevel@tonic-gate sigevk.sigev_value.sival_int;
30120Sstevel@tonic-gate head->lio_sigqp = sqp;
30130Sstevel@tonic-gate } else {
30140Sstevel@tonic-gate head->lio_sigqp = NULL;
30150Sstevel@tonic-gate }
30161885Sraf if (pkevtp) {
30171885Sraf /*
30181885Sraf * Prepare data to send when list of aiocb's
30191885Sraf * has completed.
30201885Sraf */
30211885Sraf port_init_event(pkevtp, (uintptr_t)sigev,
30221885Sraf (void *)(uintptr_t)pnotify.portnfy_user,
30231885Sraf NULL, head);
30241885Sraf pkevtp->portkev_events = AIOLIO64;
30251885Sraf head->lio_portkev = pkevtp;
30261885Sraf head->lio_port = pnotify.portnfy_port;
30271885Sraf }
30280Sstevel@tonic-gate }
30290Sstevel@tonic-gate
30300Sstevel@tonic-gate for (i = 0; i < nent; i++, ucbp++) {
30310Sstevel@tonic-gate
30320Sstevel@tonic-gate cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
30330Sstevel@tonic-gate /* skip entry if it can't be copied. */
30341885Sraf if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
30350Sstevel@tonic-gate if (head) {
30360Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
30370Sstevel@tonic-gate head->lio_nent--;
30380Sstevel@tonic-gate head->lio_refcnt--;
30390Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
30400Sstevel@tonic-gate }
30410Sstevel@tonic-gate continue;
30420Sstevel@tonic-gate }
30430Sstevel@tonic-gate
30440Sstevel@tonic-gate /* skip if opcode for aiocb is LIO_NOP */
30450Sstevel@tonic-gate mode = aiocb->aio_lio_opcode;
30460Sstevel@tonic-gate if (mode == LIO_NOP) {
30470Sstevel@tonic-gate cbp = NULL;
30480Sstevel@tonic-gate if (head) {
30490Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
30500Sstevel@tonic-gate head->lio_nent--;
30510Sstevel@tonic-gate head->lio_refcnt--;
30520Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
30530Sstevel@tonic-gate }
30540Sstevel@tonic-gate continue;
30550Sstevel@tonic-gate }
30560Sstevel@tonic-gate
30570Sstevel@tonic-gate /* increment file descriptor's ref count. */
30580Sstevel@tonic-gate if ((fp = getf(aiocb->aio_fildes)) == NULL) {
30590Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF);
30600Sstevel@tonic-gate if (head) {
30610Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
30620Sstevel@tonic-gate head->lio_nent--;
30630Sstevel@tonic-gate head->lio_refcnt--;
30640Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
30650Sstevel@tonic-gate }
30660Sstevel@tonic-gate aio_errors++;
30670Sstevel@tonic-gate continue;
30680Sstevel@tonic-gate }
30690Sstevel@tonic-gate
30700Sstevel@tonic-gate /*
30710Sstevel@tonic-gate * check the permission of the partition
30720Sstevel@tonic-gate */
30730Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) {
30740Sstevel@tonic-gate releasef(aiocb->aio_fildes);
30750Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF);
30760Sstevel@tonic-gate if (head) {
30770Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
30780Sstevel@tonic-gate head->lio_nent--;
30790Sstevel@tonic-gate head->lio_refcnt--;
30800Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
30810Sstevel@tonic-gate }
30820Sstevel@tonic-gate aio_errors++;
30830Sstevel@tonic-gate continue;
30840Sstevel@tonic-gate }
30850Sstevel@tonic-gate
30860Sstevel@tonic-gate /*
30870Sstevel@tonic-gate * common case where requests are to the same fd
30880Sstevel@tonic-gate * for the same r/w operation
30890Sstevel@tonic-gate * for UFS, need to set EBADFD
30900Sstevel@tonic-gate */
30911885Sraf vp = fp->f_vnode;
30921885Sraf if (fp != prev_fp || mode != prev_mode) {
30930Sstevel@tonic-gate aio_func = check_vp(vp, mode);
30940Sstevel@tonic-gate if (aio_func == NULL) {
30950Sstevel@tonic-gate prev_fp = NULL;
30960Sstevel@tonic-gate releasef(aiocb->aio_fildes);
30970Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADFD);
30980Sstevel@tonic-gate aio_notsupported++;
30990Sstevel@tonic-gate if (head) {
31000Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
31010Sstevel@tonic-gate head->lio_nent--;
31020Sstevel@tonic-gate head->lio_refcnt--;
31030Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
31040Sstevel@tonic-gate }
31050Sstevel@tonic-gate continue;
31060Sstevel@tonic-gate } else {
31070Sstevel@tonic-gate prev_fp = fp;
31080Sstevel@tonic-gate prev_mode = mode;
31090Sstevel@tonic-gate }
31100Sstevel@tonic-gate }
31111885Sraf
31120Sstevel@tonic-gate #ifdef _LP64
31130Sstevel@tonic-gate aiocb_LFton(aiocb, &aiocb_n);
31140Sstevel@tonic-gate error = aio_req_setup(&reqp, aiop, &aiocb_n,
3115*10719SRoger.Faulkner@Sun.COM (aio_result_t *)&cbp->aio_resultp, vp, 0);
31160Sstevel@tonic-gate #else
31170Sstevel@tonic-gate error = aio_req_setupLF(&reqp, aiop, aiocb,
3118*10719SRoger.Faulkner@Sun.COM (aio_result_t *)&cbp->aio_resultp, vp, 0);
31190Sstevel@tonic-gate #endif /* _LP64 */
31200Sstevel@tonic-gate if (error) {
31210Sstevel@tonic-gate releasef(aiocb->aio_fildes);
31221885Sraf lio_set_uerror(&cbp->aio_resultp, error);
31230Sstevel@tonic-gate if (head) {
31240Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
31250Sstevel@tonic-gate head->lio_nent--;
31260Sstevel@tonic-gate head->lio_refcnt--;
31270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
31280Sstevel@tonic-gate }
31290Sstevel@tonic-gate aio_errors++;
31300Sstevel@tonic-gate continue;
31310Sstevel@tonic-gate }
31320Sstevel@tonic-gate
31330Sstevel@tonic-gate reqp->aio_req_lio = head;
31340Sstevel@tonic-gate deadhead = 0;
31350Sstevel@tonic-gate
31360Sstevel@tonic-gate /*
31370Sstevel@tonic-gate * Set the errno field now before sending the request to
31380Sstevel@tonic-gate * the driver to avoid a race condition
31390Sstevel@tonic-gate */
31400Sstevel@tonic-gate (void) suword32(&cbp->aio_resultp.aio_errno,
31410Sstevel@tonic-gate EINPROGRESS);
31420Sstevel@tonic-gate
31430Sstevel@tonic-gate reqp->aio_req_iocb.iocb32 = *ucbp;
31440Sstevel@tonic-gate
31451885Sraf event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
31461885Sraf aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
31471885Sraf aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
31481885Sraf if (aio_port | aio_thread) {
31491885Sraf port_kevent_t *lpkevp;
31501885Sraf /*
31511885Sraf * Prepare data to send with each aiocb completed.
31521885Sraf */
31531885Sraf if (aio_port) {
31541885Sraf void *paddr = (void *)(uintptr_t)
31551885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr;
31561885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify)))
31571885Sraf error = EFAULT;
31581885Sraf } else { /* aio_thread */
31591885Sraf pnotify.portnfy_port =
31601885Sraf aiocb->aio_sigevent.sigev_signo;
31611885Sraf pnotify.portnfy_user =
31621885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr;
31631885Sraf }
31641885Sraf if (error)
31651885Sraf /* EMPTY */;
31661885Sraf else if (pkevtp != NULL &&
31671885Sraf pnotify.portnfy_port == lio_head_port)
31681885Sraf error = port_dup_event(pkevtp, &lpkevp,
31691885Sraf PORT_ALLOC_DEFAULT);
31701885Sraf else
31711885Sraf error = port_alloc_event(pnotify.portnfy_port,
31721885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
31731885Sraf &lpkevp);
31741885Sraf if (error == 0) {
31751885Sraf port_init_event(lpkevp, (uintptr_t)*ucbp,
31761885Sraf (void *)(uintptr_t)pnotify.portnfy_user,
31771885Sraf aio_port_callback, reqp);
31781885Sraf lpkevp->portkev_events = event;
31791885Sraf reqp->aio_req_portkev = lpkevp;
31801885Sraf reqp->aio_req_port = pnotify.portnfy_port;
31811885Sraf }
31820Sstevel@tonic-gate }
31830Sstevel@tonic-gate
31840Sstevel@tonic-gate /*
31850Sstevel@tonic-gate * send the request to driver.
31860Sstevel@tonic-gate */
31870Sstevel@tonic-gate if (error == 0) {
31880Sstevel@tonic-gate if (aiocb->aio_nbytes == 0) {
31890Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes);
31900Sstevel@tonic-gate aio_zerolen(reqp);
31910Sstevel@tonic-gate continue;
31920Sstevel@tonic-gate }
31930Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
31940Sstevel@tonic-gate CRED());
31950Sstevel@tonic-gate }
31960Sstevel@tonic-gate
31970Sstevel@tonic-gate /*
31980Sstevel@tonic-gate * the fd's ref count is not decremented until the IO has
31990Sstevel@tonic-gate * completed unless there was an error.
32000Sstevel@tonic-gate */
32010Sstevel@tonic-gate if (error) {
32020Sstevel@tonic-gate releasef(aiocb->aio_fildes);
32030Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error);
32040Sstevel@tonic-gate if (head) {
32050Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
32060Sstevel@tonic-gate head->lio_nent--;
32070Sstevel@tonic-gate head->lio_refcnt--;
32080Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
32090Sstevel@tonic-gate }
32100Sstevel@tonic-gate if (error == ENOTSUP)
32110Sstevel@tonic-gate aio_notsupported++;
32120Sstevel@tonic-gate else
32130Sstevel@tonic-gate aio_errors++;
32144502Spraks lio_set_error(reqp, portused);
32150Sstevel@tonic-gate } else {
32160Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes);
32170Sstevel@tonic-gate }
32180Sstevel@tonic-gate }
32190Sstevel@tonic-gate
32200Sstevel@tonic-gate if (aio_notsupported) {
32210Sstevel@tonic-gate error = ENOTSUP;
32220Sstevel@tonic-gate } else if (aio_errors) {
32230Sstevel@tonic-gate /*
32240Sstevel@tonic-gate * return EIO if any request failed
32250Sstevel@tonic-gate */
32260Sstevel@tonic-gate error = EIO;
32270Sstevel@tonic-gate }
32280Sstevel@tonic-gate
32290Sstevel@tonic-gate if (mode_arg == LIO_WAIT) {
32300Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
32310Sstevel@tonic-gate while (head->lio_refcnt > 0) {
32320Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
32330Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
32340Sstevel@tonic-gate error = EINTR;
32350Sstevel@tonic-gate goto done;
32360Sstevel@tonic-gate }
32370Sstevel@tonic-gate }
32380Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
32390Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
32400Sstevel@tonic-gate }
32410Sstevel@tonic-gate
32420Sstevel@tonic-gate done:
32430Sstevel@tonic-gate kmem_free(cbplist, ssize);
32440Sstevel@tonic-gate if (deadhead) {
32450Sstevel@tonic-gate if (head->lio_sigqp)
32460Sstevel@tonic-gate kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
32471885Sraf if (head->lio_portkev)
32481885Sraf port_free_event(head->lio_portkev);
32490Sstevel@tonic-gate kmem_free(head, sizeof (aio_lio_t));
32500Sstevel@tonic-gate }
32510Sstevel@tonic-gate return (error);
32520Sstevel@tonic-gate }
32530Sstevel@tonic-gate
32540Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
32550Sstevel@tonic-gate static void
aiocb_LFton(aiocb64_32_t * src,aiocb_t * dest)32560Sstevel@tonic-gate aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
32570Sstevel@tonic-gate {
32580Sstevel@tonic-gate dest->aio_fildes = src->aio_fildes;
32590Sstevel@tonic-gate dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
32600Sstevel@tonic-gate dest->aio_nbytes = (size_t)src->aio_nbytes;
32610Sstevel@tonic-gate dest->aio_offset = (off_t)src->aio_offset;
32620Sstevel@tonic-gate dest->aio_reqprio = src->aio_reqprio;
32630Sstevel@tonic-gate dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
32640Sstevel@tonic-gate dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
32650Sstevel@tonic-gate
32660Sstevel@tonic-gate /*
32670Sstevel@tonic-gate * See comment in sigqueue32() on handling of 32-bit
32680Sstevel@tonic-gate * sigvals in a 64-bit kernel.
32690Sstevel@tonic-gate */
32700Sstevel@tonic-gate dest->aio_sigevent.sigev_value.sival_int =
32710Sstevel@tonic-gate (int)src->aio_sigevent.sigev_value.sival_int;
32720Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
32730Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_function;
32740Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
32750Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
32760Sstevel@tonic-gate dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
32770Sstevel@tonic-gate dest->aio_lio_opcode = src->aio_lio_opcode;
32780Sstevel@tonic-gate dest->aio_state = src->aio_state;
32790Sstevel@tonic-gate dest->aio__pad[0] = src->aio__pad[0];
32800Sstevel@tonic-gate }
32810Sstevel@tonic-gate #endif
32820Sstevel@tonic-gate
32830Sstevel@tonic-gate /*
32840Sstevel@tonic-gate * This function is used only for largefile calls made by
32851885Sraf * 32 bit applications.
32860Sstevel@tonic-gate */
32870Sstevel@tonic-gate static int
aio_req_setupLF(aio_req_t ** reqpp,aio_t * aiop,aiocb64_32_t * arg,aio_result_t * resultp,vnode_t * vp,int old_solaris_req)32880Sstevel@tonic-gate aio_req_setupLF(
32890Sstevel@tonic-gate aio_req_t **reqpp,
32900Sstevel@tonic-gate aio_t *aiop,
32910Sstevel@tonic-gate aiocb64_32_t *arg,
32920Sstevel@tonic-gate aio_result_t *resultp,
3293*10719SRoger.Faulkner@Sun.COM vnode_t *vp,
3294*10719SRoger.Faulkner@Sun.COM int old_solaris_req)
32950Sstevel@tonic-gate {
32961885Sraf sigqueue_t *sqp = NULL;
32970Sstevel@tonic-gate aio_req_t *reqp;
32981885Sraf struct uio *uio;
32991885Sraf struct sigevent32 *sigev;
33000Sstevel@tonic-gate int error;
33010Sstevel@tonic-gate
33021885Sraf sigev = &arg->aio_sigevent;
33031885Sraf if (sigev->sigev_notify == SIGEV_SIGNAL &&
33041885Sraf sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
33050Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
33060Sstevel@tonic-gate if (sqp == NULL)
33070Sstevel@tonic-gate return (EAGAIN);
33080Sstevel@tonic-gate sqp->sq_func = NULL;
33090Sstevel@tonic-gate sqp->sq_next = NULL;
33100Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO;
33110Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid;
33120Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc);
33130Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid();
33140Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
33150Sstevel@tonic-gate sqp->sq_info.si_signo = sigev->sigev_signo;
33161885Sraf sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
33171885Sraf }
33180Sstevel@tonic-gate
33190Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
33200Sstevel@tonic-gate
33210Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) {
33220Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
33230Sstevel@tonic-gate if (sqp)
33240Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t));
33250Sstevel@tonic-gate return (EIO);
33260Sstevel@tonic-gate }
33270Sstevel@tonic-gate /*
33280Sstevel@tonic-gate * get an aio_reqp from the free list or allocate one
33290Sstevel@tonic-gate * from dynamic memory.
33300Sstevel@tonic-gate */
33310Sstevel@tonic-gate if (error = aio_req_alloc(&reqp, resultp)) {
33320Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
33330Sstevel@tonic-gate if (sqp)
33340Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t));
33350Sstevel@tonic-gate return (error);
33360Sstevel@tonic-gate }
33370Sstevel@tonic-gate aiop->aio_pending++;
33380Sstevel@tonic-gate aiop->aio_outstanding++;
33390Sstevel@tonic-gate reqp->aio_req_flags = AIO_PENDING;
3340*10719SRoger.Faulkner@Sun.COM if (old_solaris_req) {
3341*10719SRoger.Faulkner@Sun.COM /* this is an old solaris aio request */
3342*10719SRoger.Faulkner@Sun.COM reqp->aio_req_flags |= AIO_SOLARIS;
3343*10719SRoger.Faulkner@Sun.COM aiop->aio_flags |= AIO_SOLARIS_REQ;
3344*10719SRoger.Faulkner@Sun.COM }
33451885Sraf if (sigev->sigev_notify == SIGEV_THREAD ||
33461885Sraf sigev->sigev_notify == SIGEV_PORT)
33471885Sraf aio_enq(&aiop->aio_portpending, reqp, 0);
33480Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
33490Sstevel@tonic-gate /*
33500Sstevel@tonic-gate * initialize aio request.
33510Sstevel@tonic-gate */
33520Sstevel@tonic-gate reqp->aio_req_fd = arg->aio_fildes;
33530Sstevel@tonic-gate reqp->aio_req_sigqp = sqp;
33540Sstevel@tonic-gate reqp->aio_req_iocb.iocb = NULL;
33551885Sraf reqp->aio_req_lio = NULL;
33560Sstevel@tonic-gate reqp->aio_req_buf.b_file = vp;
33570Sstevel@tonic-gate uio = reqp->aio_req.aio_uio;
33580Sstevel@tonic-gate uio->uio_iovcnt = 1;
33590Sstevel@tonic-gate uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
33600Sstevel@tonic-gate uio->uio_iov->iov_len = arg->aio_nbytes;
33610Sstevel@tonic-gate uio->uio_loffset = arg->aio_offset;
33620Sstevel@tonic-gate *reqpp = reqp;
33630Sstevel@tonic-gate return (0);
33640Sstevel@tonic-gate }
33650Sstevel@tonic-gate
33660Sstevel@tonic-gate /*
33670Sstevel@tonic-gate * This routine is called when a non largefile call is made by a 32bit
33680Sstevel@tonic-gate * process on a ILP32 or LP64 kernel.
33690Sstevel@tonic-gate */
33700Sstevel@tonic-gate static int
alio32(int mode_arg,void * aiocb_arg,int nent,void * sigev)33710Sstevel@tonic-gate alio32(
33720Sstevel@tonic-gate int mode_arg,
33730Sstevel@tonic-gate void *aiocb_arg,
33740Sstevel@tonic-gate int nent,
33751885Sraf void *sigev)
33760Sstevel@tonic-gate {
33770Sstevel@tonic-gate file_t *fp;
33780Sstevel@tonic-gate file_t *prev_fp = NULL;
33790Sstevel@tonic-gate int prev_mode = -1;
33800Sstevel@tonic-gate struct vnode *vp;
33810Sstevel@tonic-gate aio_lio_t *head;
33820Sstevel@tonic-gate aio_req_t *reqp;
33830Sstevel@tonic-gate aio_t *aiop;
33841885Sraf caddr_t cbplist;
33850Sstevel@tonic-gate aiocb_t cb;
33860Sstevel@tonic-gate aiocb_t *aiocb = &cb;
33870Sstevel@tonic-gate #ifdef _LP64
33880Sstevel@tonic-gate aiocb32_t *cbp;
33890Sstevel@tonic-gate caddr32_t *ucbp;
33900Sstevel@tonic-gate aiocb32_t cb32;
33910Sstevel@tonic-gate aiocb32_t *aiocb32 = &cb32;
33921885Sraf struct sigevent32 sigevk;
33930Sstevel@tonic-gate #else
33940Sstevel@tonic-gate aiocb_t *cbp, **ucbp;
33951885Sraf struct sigevent sigevk;
33960Sstevel@tonic-gate #endif
33970Sstevel@tonic-gate sigqueue_t *sqp;
33980Sstevel@tonic-gate int (*aio_func)();
33990Sstevel@tonic-gate int mode;
34001885Sraf int error = 0;
34011885Sraf int aio_errors = 0;
34020Sstevel@tonic-gate int i;
34030Sstevel@tonic-gate size_t ssize;
34040Sstevel@tonic-gate int deadhead = 0;
34050Sstevel@tonic-gate int aio_notsupported = 0;
34061885Sraf int lio_head_port;
34071885Sraf int aio_port;
34081885Sraf int aio_thread;
34090Sstevel@tonic-gate port_kevent_t *pkevtp = NULL;
34104502Spraks int portused = 0;
34110Sstevel@tonic-gate #ifdef _LP64
34120Sstevel@tonic-gate port_notify32_t pnotify;
34130Sstevel@tonic-gate #else
34140Sstevel@tonic-gate port_notify_t pnotify;
34150Sstevel@tonic-gate #endif
34161885Sraf int event;
34171885Sraf
34180Sstevel@tonic-gate aiop = curproc->p_aio;
34190Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
34200Sstevel@tonic-gate return (EINVAL);
34210Sstevel@tonic-gate
34220Sstevel@tonic-gate #ifdef _LP64
34230Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent);
34240Sstevel@tonic-gate #else
34250Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent);
34260Sstevel@tonic-gate #endif
34270Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP);
34280Sstevel@tonic-gate ucbp = (void *)cbplist;
34290Sstevel@tonic-gate
34301885Sraf if (copyin(aiocb_arg, cbplist, ssize) ||
34311885Sraf (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
34320Sstevel@tonic-gate kmem_free(cbplist, ssize);
34330Sstevel@tonic-gate return (EFAULT);
34340Sstevel@tonic-gate }
34350Sstevel@tonic-gate
34361885Sraf /* Event Ports */
34371885Sraf if (sigev &&
34381885Sraf (sigevk.sigev_notify == SIGEV_THREAD ||
34391885Sraf sigevk.sigev_notify == SIGEV_PORT)) {
34401885Sraf if (sigevk.sigev_notify == SIGEV_THREAD) {
34411885Sraf pnotify.portnfy_port = sigevk.sigev_signo;
34421885Sraf pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
34431885Sraf } else if (copyin(
34441885Sraf (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
34451885Sraf &pnotify, sizeof (pnotify))) {
34460Sstevel@tonic-gate kmem_free(cbplist, ssize);
34470Sstevel@tonic-gate return (EFAULT);
34480Sstevel@tonic-gate }
34491885Sraf error = port_alloc_event(pnotify.portnfy_port,
34501885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
34511885Sraf if (error) {
34521885Sraf if (error == ENOMEM || error == EAGAIN)
34531885Sraf error = EAGAIN;
34541885Sraf else
34551885Sraf error = EINVAL;
34561885Sraf kmem_free(cbplist, ssize);
34571885Sraf return (error);
34581885Sraf }
34591885Sraf lio_head_port = pnotify.portnfy_port;
34604502Spraks portused = 1;
34610Sstevel@tonic-gate }
34620Sstevel@tonic-gate
34630Sstevel@tonic-gate /*
34640Sstevel@tonic-gate * a list head should be allocated if notification is
34650Sstevel@tonic-gate * enabled for this list.
34660Sstevel@tonic-gate */
34670Sstevel@tonic-gate head = NULL;
34680Sstevel@tonic-gate
34691885Sraf if (mode_arg == LIO_WAIT || sigev) {
34700Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
34710Sstevel@tonic-gate error = aio_lio_alloc(&head);
34720Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
34730Sstevel@tonic-gate if (error)
34740Sstevel@tonic-gate goto done;
34750Sstevel@tonic-gate deadhead = 1;
34760Sstevel@tonic-gate head->lio_nent = nent;
34770Sstevel@tonic-gate head->lio_refcnt = nent;
34781885Sraf head->lio_port = -1;
34791885Sraf head->lio_portkev = NULL;
34801885Sraf if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
34811885Sraf sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
34820Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
34830Sstevel@tonic-gate if (sqp == NULL) {
34840Sstevel@tonic-gate error = EAGAIN;
34850Sstevel@tonic-gate goto done;
34860Sstevel@tonic-gate }
34870Sstevel@tonic-gate sqp->sq_func = NULL;
34880Sstevel@tonic-gate sqp->sq_next = NULL;
34890Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO;
34900Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid;
34910Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc);
34920Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid();
34930Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
34941885Sraf sqp->sq_info.si_signo = sigevk.sigev_signo;
34950Sstevel@tonic-gate sqp->sq_info.si_value.sival_int =
34961885Sraf sigevk.sigev_value.sival_int;
34970Sstevel@tonic-gate head->lio_sigqp = sqp;
34980Sstevel@tonic-gate } else {
34990Sstevel@tonic-gate head->lio_sigqp = NULL;
35000Sstevel@tonic-gate }
35011885Sraf if (pkevtp) {
35021885Sraf /*
35031885Sraf * Prepare data to send when list of aiocb's has
35041885Sraf * completed.
35051885Sraf */
35061885Sraf port_init_event(pkevtp, (uintptr_t)sigev,
35071885Sraf (void *)(uintptr_t)pnotify.portnfy_user,
35081885Sraf NULL, head);
35091885Sraf pkevtp->portkev_events = AIOLIO;
35101885Sraf head->lio_portkev = pkevtp;
35111885Sraf head->lio_port = pnotify.portnfy_port;
35121885Sraf }
35130Sstevel@tonic-gate }
35140Sstevel@tonic-gate
35150Sstevel@tonic-gate for (i = 0; i < nent; i++, ucbp++) {
35160Sstevel@tonic-gate
35170Sstevel@tonic-gate /* skip entry if it can't be copied. */
35180Sstevel@tonic-gate #ifdef _LP64
35190Sstevel@tonic-gate cbp = (aiocb32_t *)(uintptr_t)*ucbp;
35201885Sraf if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
35210Sstevel@tonic-gate #else
35220Sstevel@tonic-gate cbp = (aiocb_t *)*ucbp;
35231885Sraf if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
35240Sstevel@tonic-gate #endif
35251885Sraf {
35260Sstevel@tonic-gate if (head) {
35270Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
35280Sstevel@tonic-gate head->lio_nent--;
35290Sstevel@tonic-gate head->lio_refcnt--;
35300Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
35310Sstevel@tonic-gate }
35320Sstevel@tonic-gate continue;
35330Sstevel@tonic-gate }
35340Sstevel@tonic-gate #ifdef _LP64
35350Sstevel@tonic-gate /*
35360Sstevel@tonic-gate * copy 32 bit structure into 64 bit structure
35370Sstevel@tonic-gate */
35380Sstevel@tonic-gate aiocb_32ton(aiocb32, aiocb);
35390Sstevel@tonic-gate #endif /* _LP64 */
35400Sstevel@tonic-gate
35410Sstevel@tonic-gate /* skip if opcode for aiocb is LIO_NOP */
35420Sstevel@tonic-gate mode = aiocb->aio_lio_opcode;
35430Sstevel@tonic-gate if (mode == LIO_NOP) {
35440Sstevel@tonic-gate cbp = NULL;
35450Sstevel@tonic-gate if (head) {
35460Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
35470Sstevel@tonic-gate head->lio_nent--;
35480Sstevel@tonic-gate head->lio_refcnt--;
35490Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
35500Sstevel@tonic-gate }
35510Sstevel@tonic-gate continue;
35520Sstevel@tonic-gate }
35530Sstevel@tonic-gate
35540Sstevel@tonic-gate /* increment file descriptor's ref count. */
35550Sstevel@tonic-gate if ((fp = getf(aiocb->aio_fildes)) == NULL) {
35560Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF);
35570Sstevel@tonic-gate if (head) {
35580Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
35590Sstevel@tonic-gate head->lio_nent--;
35600Sstevel@tonic-gate head->lio_refcnt--;
35610Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
35620Sstevel@tonic-gate }
35630Sstevel@tonic-gate aio_errors++;
35640Sstevel@tonic-gate continue;
35650Sstevel@tonic-gate }
35660Sstevel@tonic-gate
35670Sstevel@tonic-gate /*
35680Sstevel@tonic-gate * check the permission of the partition
35690Sstevel@tonic-gate */
35700Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) {
35710Sstevel@tonic-gate releasef(aiocb->aio_fildes);
35720Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF);
35730Sstevel@tonic-gate if (head) {
35740Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
35750Sstevel@tonic-gate head->lio_nent--;
35760Sstevel@tonic-gate head->lio_refcnt--;
35770Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
35780Sstevel@tonic-gate }
35790Sstevel@tonic-gate aio_errors++;
35800Sstevel@tonic-gate continue;
35810Sstevel@tonic-gate }
35820Sstevel@tonic-gate
35830Sstevel@tonic-gate /*
35840Sstevel@tonic-gate * common case where requests are to the same fd
35850Sstevel@tonic-gate * for the same r/w operation
35860Sstevel@tonic-gate * for UFS, need to set EBADFD
35870Sstevel@tonic-gate */
35881885Sraf vp = fp->f_vnode;
35891885Sraf if (fp != prev_fp || mode != prev_mode) {
35900Sstevel@tonic-gate aio_func = check_vp(vp, mode);
35910Sstevel@tonic-gate if (aio_func == NULL) {
35920Sstevel@tonic-gate prev_fp = NULL;
35930Sstevel@tonic-gate releasef(aiocb->aio_fildes);
35941885Sraf lio_set_uerror(&cbp->aio_resultp, EBADFD);
35950Sstevel@tonic-gate aio_notsupported++;
35960Sstevel@tonic-gate if (head) {
35970Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
35980Sstevel@tonic-gate head->lio_nent--;
35990Sstevel@tonic-gate head->lio_refcnt--;
36000Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
36010Sstevel@tonic-gate }
36020Sstevel@tonic-gate continue;
36030Sstevel@tonic-gate } else {
36040Sstevel@tonic-gate prev_fp = fp;
36050Sstevel@tonic-gate prev_mode = mode;
36060Sstevel@tonic-gate }
36070Sstevel@tonic-gate }
36081885Sraf
36091885Sraf error = aio_req_setup(&reqp, aiop, aiocb,
3610*10719SRoger.Faulkner@Sun.COM (aio_result_t *)&cbp->aio_resultp, vp, 0);
36111885Sraf if (error) {
36120Sstevel@tonic-gate releasef(aiocb->aio_fildes);
36130Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error);
36140Sstevel@tonic-gate if (head) {
36150Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
36160Sstevel@tonic-gate head->lio_nent--;
36170Sstevel@tonic-gate head->lio_refcnt--;
36180Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
36190Sstevel@tonic-gate }
36200Sstevel@tonic-gate aio_errors++;
36210Sstevel@tonic-gate continue;
36220Sstevel@tonic-gate }
36230Sstevel@tonic-gate
36240Sstevel@tonic-gate reqp->aio_req_lio = head;
36250Sstevel@tonic-gate deadhead = 0;
36260Sstevel@tonic-gate
36270Sstevel@tonic-gate /*
36280Sstevel@tonic-gate * Set the errno field now before sending the request to
36290Sstevel@tonic-gate * the driver to avoid a race condition
36300Sstevel@tonic-gate */
36310Sstevel@tonic-gate (void) suword32(&cbp->aio_resultp.aio_errno,
36320Sstevel@tonic-gate EINPROGRESS);
36330Sstevel@tonic-gate
36341885Sraf reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
36351885Sraf
36361885Sraf event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
36371885Sraf aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
36381885Sraf aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
36391885Sraf if (aio_port | aio_thread) {
36401885Sraf port_kevent_t *lpkevp;
36411885Sraf /*
36421885Sraf * Prepare data to send with each aiocb completed.
36431885Sraf */
36440Sstevel@tonic-gate #ifdef _LP64
36451885Sraf if (aio_port) {
36461885Sraf void *paddr = (void *)(uintptr_t)
36471885Sraf aiocb32->aio_sigevent.sigev_value.sival_ptr;
36481885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify)))
36491885Sraf error = EFAULT;
36501885Sraf } else { /* aio_thread */
36511885Sraf pnotify.portnfy_port =
36521885Sraf aiocb32->aio_sigevent.sigev_signo;
36531885Sraf pnotify.portnfy_user =
36541885Sraf aiocb32->aio_sigevent.sigev_value.sival_ptr;
36551885Sraf }
36560Sstevel@tonic-gate #else
36571885Sraf if (aio_port) {
36581885Sraf void *paddr =
36591885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr;
36601885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify)))
36611885Sraf error = EFAULT;
36621885Sraf } else { /* aio_thread */
36631885Sraf pnotify.portnfy_port =
36641885Sraf aiocb->aio_sigevent.sigev_signo;
36651885Sraf pnotify.portnfy_user =
36661885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr;
36671885Sraf }
36680Sstevel@tonic-gate #endif
36691885Sraf if (error)
36701885Sraf /* EMPTY */;
36711885Sraf else if (pkevtp != NULL &&
36721885Sraf pnotify.portnfy_port == lio_head_port)
36731885Sraf error = port_dup_event(pkevtp, &lpkevp,
36741885Sraf PORT_ALLOC_DEFAULT);
36751885Sraf else
36761885Sraf error = port_alloc_event(pnotify.portnfy_port,
36771885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
36781885Sraf &lpkevp);
36791885Sraf if (error == 0) {
36801885Sraf port_init_event(lpkevp, (uintptr_t)cbp,
36811885Sraf (void *)(uintptr_t)pnotify.portnfy_user,
36821885Sraf aio_port_callback, reqp);
36831885Sraf lpkevp->portkev_events = event;
36841885Sraf reqp->aio_req_portkev = lpkevp;
36851885Sraf reqp->aio_req_port = pnotify.portnfy_port;
36861885Sraf }
36870Sstevel@tonic-gate }
36880Sstevel@tonic-gate
36890Sstevel@tonic-gate /*
36900Sstevel@tonic-gate * send the request to driver.
36910Sstevel@tonic-gate */
36920Sstevel@tonic-gate if (error == 0) {
36930Sstevel@tonic-gate if (aiocb->aio_nbytes == 0) {
36940Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes);
36950Sstevel@tonic-gate aio_zerolen(reqp);
36960Sstevel@tonic-gate continue;
36970Sstevel@tonic-gate }
36980Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
36990Sstevel@tonic-gate CRED());
37000Sstevel@tonic-gate }
37010Sstevel@tonic-gate
37020Sstevel@tonic-gate /*
37030Sstevel@tonic-gate * the fd's ref count is not decremented until the IO has
37040Sstevel@tonic-gate * completed unless there was an error.
37050Sstevel@tonic-gate */
37060Sstevel@tonic-gate if (error) {
37070Sstevel@tonic-gate releasef(aiocb->aio_fildes);
37080Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error);
37090Sstevel@tonic-gate if (head) {
37100Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
37110Sstevel@tonic-gate head->lio_nent--;
37120Sstevel@tonic-gate head->lio_refcnt--;
37130Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
37140Sstevel@tonic-gate }
37150Sstevel@tonic-gate if (error == ENOTSUP)
37160Sstevel@tonic-gate aio_notsupported++;
37170Sstevel@tonic-gate else
37180Sstevel@tonic-gate aio_errors++;
37194502Spraks lio_set_error(reqp, portused);
37200Sstevel@tonic-gate } else {
37210Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes);
37220Sstevel@tonic-gate }
37230Sstevel@tonic-gate }
37240Sstevel@tonic-gate
37250Sstevel@tonic-gate if (aio_notsupported) {
37260Sstevel@tonic-gate error = ENOTSUP;
37270Sstevel@tonic-gate } else if (aio_errors) {
37280Sstevel@tonic-gate /*
37290Sstevel@tonic-gate * return EIO if any request failed
37300Sstevel@tonic-gate */
37310Sstevel@tonic-gate error = EIO;
37320Sstevel@tonic-gate }
37330Sstevel@tonic-gate
37340Sstevel@tonic-gate if (mode_arg == LIO_WAIT) {
37350Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
37360Sstevel@tonic-gate while (head->lio_refcnt > 0) {
37370Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
37380Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
37390Sstevel@tonic-gate error = EINTR;
37400Sstevel@tonic-gate goto done;
37410Sstevel@tonic-gate }
37420Sstevel@tonic-gate }
37430Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
37440Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
37450Sstevel@tonic-gate }
37460Sstevel@tonic-gate
37470Sstevel@tonic-gate done:
37480Sstevel@tonic-gate kmem_free(cbplist, ssize);
37490Sstevel@tonic-gate if (deadhead) {
37500Sstevel@tonic-gate if (head->lio_sigqp)
37510Sstevel@tonic-gate kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
37521885Sraf if (head->lio_portkev)
37531885Sraf port_free_event(head->lio_portkev);
37540Sstevel@tonic-gate kmem_free(head, sizeof (aio_lio_t));
37550Sstevel@tonic-gate }
37560Sstevel@tonic-gate return (error);
37570Sstevel@tonic-gate }
37580Sstevel@tonic-gate
37590Sstevel@tonic-gate
37600Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL
37610Sstevel@tonic-gate void
aiocb_32ton(aiocb32_t * src,aiocb_t * dest)37620Sstevel@tonic-gate aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
37630Sstevel@tonic-gate {
37640Sstevel@tonic-gate dest->aio_fildes = src->aio_fildes;
37650Sstevel@tonic-gate dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
37660Sstevel@tonic-gate dest->aio_nbytes = (size_t)src->aio_nbytes;
37670Sstevel@tonic-gate dest->aio_offset = (off_t)src->aio_offset;
37680Sstevel@tonic-gate dest->aio_reqprio = src->aio_reqprio;
37690Sstevel@tonic-gate dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
37700Sstevel@tonic-gate dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
37710Sstevel@tonic-gate
37720Sstevel@tonic-gate /*
37730Sstevel@tonic-gate * See comment in sigqueue32() on handling of 32-bit
37740Sstevel@tonic-gate * sigvals in a 64-bit kernel.
37750Sstevel@tonic-gate */
37760Sstevel@tonic-gate dest->aio_sigevent.sigev_value.sival_int =
37770Sstevel@tonic-gate (int)src->aio_sigevent.sigev_value.sival_int;
37780Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
37790Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_function;
37800Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
37810Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
37820Sstevel@tonic-gate dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
37830Sstevel@tonic-gate dest->aio_lio_opcode = src->aio_lio_opcode;
37840Sstevel@tonic-gate dest->aio_state = src->aio_state;
37850Sstevel@tonic-gate dest->aio__pad[0] = src->aio__pad[0];
37860Sstevel@tonic-gate }
37870Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */
37880Sstevel@tonic-gate
37890Sstevel@tonic-gate /*
37900Sstevel@tonic-gate * aio_port_callback() is called just before the event is retrieved from the
37910Sstevel@tonic-gate * port. The task of this callback function is to finish the work of the
37920Sstevel@tonic-gate * transaction for the application, it means :
37930Sstevel@tonic-gate * - copyout transaction data to the application
37940Sstevel@tonic-gate * (this thread is running in the right process context)
37950Sstevel@tonic-gate * - keep trace of the transaction (update of counters).
37960Sstevel@tonic-gate * - free allocated buffers
37970Sstevel@tonic-gate * The aiocb pointer is the object element of the port_kevent_t structure.
37980Sstevel@tonic-gate *
37990Sstevel@tonic-gate * flag :
38000Sstevel@tonic-gate * PORT_CALLBACK_DEFAULT : do copyout and free resources
38010Sstevel@tonic-gate * PORT_CALLBACK_CLOSE : don't do copyout, free resources
38020Sstevel@tonic-gate */
38030Sstevel@tonic-gate
38040Sstevel@tonic-gate /*ARGSUSED*/
38050Sstevel@tonic-gate int
aio_port_callback(void * arg,int * events,pid_t pid,int flag,void * evp)38060Sstevel@tonic-gate aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
38070Sstevel@tonic-gate {
38080Sstevel@tonic-gate aio_t *aiop = curproc->p_aio;
38090Sstevel@tonic-gate aio_req_t *reqp = arg;
38100Sstevel@tonic-gate struct iovec *iov;
38110Sstevel@tonic-gate struct buf *bp;
38120Sstevel@tonic-gate void *resultp;
38130Sstevel@tonic-gate
38140Sstevel@tonic-gate if (pid != curproc->p_pid) {
38150Sstevel@tonic-gate /* wrong proc !!, can not deliver data here ... */
38160Sstevel@tonic-gate return (EACCES);
38170Sstevel@tonic-gate }
38180Sstevel@tonic-gate
38190Sstevel@tonic-gate mutex_enter(&aiop->aio_portq_mutex);
38200Sstevel@tonic-gate reqp->aio_req_portkev = NULL;
38210Sstevel@tonic-gate aio_req_remove_portq(aiop, reqp); /* remove request from portq */
38220Sstevel@tonic-gate mutex_exit(&aiop->aio_portq_mutex);
38230Sstevel@tonic-gate aphysio_unlock(reqp); /* unlock used pages */
38240Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex);
38250Sstevel@tonic-gate if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
38260Sstevel@tonic-gate aio_req_free_port(aiop, reqp); /* back to free list */
38270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
38280Sstevel@tonic-gate return (0);
38290Sstevel@tonic-gate }
38300Sstevel@tonic-gate
38310Sstevel@tonic-gate iov = reqp->aio_req_uio.uio_iov;
38320Sstevel@tonic-gate bp = &reqp->aio_req_buf;
38330Sstevel@tonic-gate resultp = (void *)reqp->aio_req_resultp;
38340Sstevel@tonic-gate aio_req_free_port(aiop, reqp); /* request struct back to free list */
38350Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex);
38360Sstevel@tonic-gate if (flag == PORT_CALLBACK_DEFAULT)
38370Sstevel@tonic-gate aio_copyout_result_port(iov, bp, resultp);
38380Sstevel@tonic-gate return (0);
38390Sstevel@tonic-gate }
3840