10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51885Sraf * Common Development and Distribution License (the "License"). 61885Sraf * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 211885Sraf 220Sstevel@tonic-gate /* 23*8519SVamsi.Krishna@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate /* 280Sstevel@tonic-gate * Kernel asynchronous I/O. 290Sstevel@tonic-gate * This is only for raw devices now (as of Nov. 1993). 300Sstevel@tonic-gate */ 310Sstevel@tonic-gate 320Sstevel@tonic-gate #include <sys/types.h> 330Sstevel@tonic-gate #include <sys/errno.h> 340Sstevel@tonic-gate #include <sys/conf.h> 350Sstevel@tonic-gate #include <sys/file.h> 360Sstevel@tonic-gate #include <sys/fs/snode.h> 370Sstevel@tonic-gate #include <sys/unistd.h> 380Sstevel@tonic-gate #include <sys/cmn_err.h> 390Sstevel@tonic-gate #include <vm/as.h> 400Sstevel@tonic-gate #include <vm/faultcode.h> 410Sstevel@tonic-gate #include <sys/sysmacros.h> 420Sstevel@tonic-gate #include <sys/procfs.h> 430Sstevel@tonic-gate #include <sys/kmem.h> 440Sstevel@tonic-gate #include <sys/autoconf.h> 450Sstevel@tonic-gate #include <sys/ddi_impldefs.h> 460Sstevel@tonic-gate #include <sys/sunddi.h> 470Sstevel@tonic-gate #include <sys/aio_impl.h> 480Sstevel@tonic-gate #include <sys/debug.h> 490Sstevel@tonic-gate #include <sys/param.h> 500Sstevel@tonic-gate #include <sys/systm.h> 510Sstevel@tonic-gate #include <sys/vmsystm.h> 520Sstevel@tonic-gate #include <sys/fs/pxfs_ki.h> 530Sstevel@tonic-gate #include <sys/contract/process_impl.h> 540Sstevel@tonic-gate 550Sstevel@tonic-gate /* 560Sstevel@tonic-gate * external entry point. 570Sstevel@tonic-gate */ 580Sstevel@tonic-gate #ifdef _LP64 590Sstevel@tonic-gate static int64_t kaioc(long, long, long, long, long, long); 600Sstevel@tonic-gate #endif 610Sstevel@tonic-gate static int kaio(ulong_t *, rval_t *); 620Sstevel@tonic-gate 630Sstevel@tonic-gate 640Sstevel@tonic-gate #define AIO_64 0 650Sstevel@tonic-gate #define AIO_32 1 660Sstevel@tonic-gate #define AIO_LARGEFILE 2 670Sstevel@tonic-gate 680Sstevel@tonic-gate /* 690Sstevel@tonic-gate * implementation specific functions (private) 700Sstevel@tonic-gate */ 710Sstevel@tonic-gate #ifdef _LP64 721885Sraf static int alio(int, aiocb_t **, int, struct sigevent *); 730Sstevel@tonic-gate #endif 740Sstevel@tonic-gate static int aionotify(void); 750Sstevel@tonic-gate static int aioinit(void); 760Sstevel@tonic-gate static int aiostart(void); 770Sstevel@tonic-gate static void alio_cleanup(aio_t *, aiocb_t **, int, int); 780Sstevel@tonic-gate static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 790Sstevel@tonic-gate cred_t *); 804502Spraks static void lio_set_error(aio_req_t *, int portused); 810Sstevel@tonic-gate static aio_t *aio_aiop_alloc(); 820Sstevel@tonic-gate static int aio_req_alloc(aio_req_t **, aio_result_t *); 830Sstevel@tonic-gate static int aio_lio_alloc(aio_lio_t **); 840Sstevel@tonic-gate static aio_req_t *aio_req_done(void *); 850Sstevel@tonic-gate static aio_req_t *aio_req_remove(aio_req_t *); 860Sstevel@tonic-gate static int aio_req_find(aio_result_t *, aio_req_t **); 870Sstevel@tonic-gate static int aio_hash_insert(struct aio_req_t *, aio_t *); 880Sstevel@tonic-gate static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 891885Sraf aio_result_t *, vnode_t *); 900Sstevel@tonic-gate static int aio_cleanup_thread(aio_t *); 910Sstevel@tonic-gate static aio_lio_t *aio_list_get(aio_result_t *); 920Sstevel@tonic-gate static void lio_set_uerror(void *, int); 930Sstevel@tonic-gate extern void aio_zerolen(aio_req_t *); 940Sstevel@tonic-gate static int aiowait(struct timeval *, int, long *); 950Sstevel@tonic-gate static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 960Sstevel@tonic-gate static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 970Sstevel@tonic-gate aio_req_t *reqlist, aio_t *aiop, model_t model); 980Sstevel@tonic-gate static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 990Sstevel@tonic-gate static int aiosuspend(void *, int, struct timespec *, int, 1000Sstevel@tonic-gate long *, int); 1010Sstevel@tonic-gate static int aliowait(int, void *, int, void *, int); 1020Sstevel@tonic-gate static int aioerror(void *, int); 1030Sstevel@tonic-gate static int aio_cancel(int, void *, long *, int); 1040Sstevel@tonic-gate static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 1050Sstevel@tonic-gate static int aiorw(int, void *, int, int); 1060Sstevel@tonic-gate 1070Sstevel@tonic-gate static int alioLF(int, void *, int, void *); 1081885Sraf static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *, 1091885Sraf aio_result_t *, vnode_t *); 1100Sstevel@tonic-gate static int alio32(int, void *, int, void *); 1110Sstevel@tonic-gate static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 1120Sstevel@tonic-gate static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 1130Sstevel@tonic-gate 1140Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1150Sstevel@tonic-gate static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 1160Sstevel@tonic-gate void aiocb_32ton(aiocb32_t *, aiocb_t *); 1170Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate /* 1200Sstevel@tonic-gate * implementation specific functions (external) 1210Sstevel@tonic-gate */ 1220Sstevel@tonic-gate void aio_req_free(aio_t *, aio_req_t *); 1230Sstevel@tonic-gate 1240Sstevel@tonic-gate /* 1250Sstevel@tonic-gate * Event Port framework 1260Sstevel@tonic-gate */ 1270Sstevel@tonic-gate 1280Sstevel@tonic-gate void aio_req_free_port(aio_t *, aio_req_t *); 1290Sstevel@tonic-gate static int aio_port_callback(void *, int *, pid_t, int, void *); 1300Sstevel@tonic-gate 1310Sstevel@tonic-gate /* 1320Sstevel@tonic-gate * This is the loadable module wrapper. 1330Sstevel@tonic-gate */ 1340Sstevel@tonic-gate #include <sys/modctl.h> 1350Sstevel@tonic-gate #include <sys/syscall.h> 1360Sstevel@tonic-gate 1370Sstevel@tonic-gate #ifdef _LP64 1380Sstevel@tonic-gate 1390Sstevel@tonic-gate static struct sysent kaio_sysent = { 1400Sstevel@tonic-gate 6, 1410Sstevel@tonic-gate SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 1420Sstevel@tonic-gate (int (*)())kaioc 1430Sstevel@tonic-gate }; 1440Sstevel@tonic-gate 1450Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1460Sstevel@tonic-gate static struct sysent kaio_sysent32 = { 1470Sstevel@tonic-gate 7, 1480Sstevel@tonic-gate SE_NOUNLOAD | SE_64RVAL, 1490Sstevel@tonic-gate kaio 1500Sstevel@tonic-gate }; 1510Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate #else /* _LP64 */ 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate static struct sysent kaio_sysent = { 1560Sstevel@tonic-gate 7, 1570Sstevel@tonic-gate SE_NOUNLOAD | SE_32RVAL1, 1580Sstevel@tonic-gate kaio 1590Sstevel@tonic-gate }; 1600Sstevel@tonic-gate 1610Sstevel@tonic-gate #endif /* _LP64 */ 1620Sstevel@tonic-gate 1630Sstevel@tonic-gate /* 1640Sstevel@tonic-gate * Module linkage information for the kernel. 1650Sstevel@tonic-gate */ 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate static struct modlsys modlsys = { 1680Sstevel@tonic-gate &mod_syscallops, 1690Sstevel@tonic-gate "kernel Async I/O", 1700Sstevel@tonic-gate &kaio_sysent 1710Sstevel@tonic-gate }; 1720Sstevel@tonic-gate 1730Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1740Sstevel@tonic-gate static struct modlsys modlsys32 = { 1750Sstevel@tonic-gate &mod_syscallops32, 1760Sstevel@tonic-gate "kernel Async I/O for 32 bit compatibility", 1770Sstevel@tonic-gate &kaio_sysent32 1780Sstevel@tonic-gate }; 1790Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate static struct modlinkage modlinkage = { 1830Sstevel@tonic-gate MODREV_1, 1840Sstevel@tonic-gate &modlsys, 1850Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1860Sstevel@tonic-gate &modlsys32, 1870Sstevel@tonic-gate #endif 1880Sstevel@tonic-gate NULL 1890Sstevel@tonic-gate }; 1900Sstevel@tonic-gate 1910Sstevel@tonic-gate int 1920Sstevel@tonic-gate _init(void) 1930Sstevel@tonic-gate { 1940Sstevel@tonic-gate int retval; 1950Sstevel@tonic-gate 1960Sstevel@tonic-gate if ((retval = mod_install(&modlinkage)) != 0) 1970Sstevel@tonic-gate return (retval); 1980Sstevel@tonic-gate 1990Sstevel@tonic-gate return (0); 2000Sstevel@tonic-gate } 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate int 2030Sstevel@tonic-gate _fini(void) 2040Sstevel@tonic-gate { 2050Sstevel@tonic-gate int retval; 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate retval = mod_remove(&modlinkage); 2080Sstevel@tonic-gate 2090Sstevel@tonic-gate return (retval); 2100Sstevel@tonic-gate } 2110Sstevel@tonic-gate 2120Sstevel@tonic-gate int 2130Sstevel@tonic-gate _info(struct modinfo *modinfop) 2140Sstevel@tonic-gate { 2150Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 2160Sstevel@tonic-gate } 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate #ifdef _LP64 2190Sstevel@tonic-gate static int64_t 2200Sstevel@tonic-gate kaioc( 2210Sstevel@tonic-gate long a0, 2220Sstevel@tonic-gate long a1, 2230Sstevel@tonic-gate long a2, 2240Sstevel@tonic-gate long a3, 2250Sstevel@tonic-gate long a4, 2260Sstevel@tonic-gate long a5) 2270Sstevel@tonic-gate { 2280Sstevel@tonic-gate int error; 2290Sstevel@tonic-gate long rval = 0; 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate switch ((int)a0 & ~AIO_POLL_BIT) { 2320Sstevel@tonic-gate case AIOREAD: 2330Sstevel@tonic-gate error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 2340Sstevel@tonic-gate (offset_t)a4, (aio_result_t *)a5, FREAD); 2350Sstevel@tonic-gate break; 2360Sstevel@tonic-gate case AIOWRITE: 2370Sstevel@tonic-gate error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 2380Sstevel@tonic-gate (offset_t)a4, (aio_result_t *)a5, FWRITE); 2390Sstevel@tonic-gate break; 2400Sstevel@tonic-gate case AIOWAIT: 2410Sstevel@tonic-gate error = aiowait((struct timeval *)a1, (int)a2, &rval); 2420Sstevel@tonic-gate break; 2430Sstevel@tonic-gate case AIOWAITN: 2440Sstevel@tonic-gate error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 2450Sstevel@tonic-gate (timespec_t *)a4); 2460Sstevel@tonic-gate break; 2470Sstevel@tonic-gate case AIONOTIFY: 2480Sstevel@tonic-gate error = aionotify(); 2490Sstevel@tonic-gate break; 2500Sstevel@tonic-gate case AIOINIT: 2510Sstevel@tonic-gate error = aioinit(); 2520Sstevel@tonic-gate break; 2530Sstevel@tonic-gate case AIOSTART: 2540Sstevel@tonic-gate error = aiostart(); 2550Sstevel@tonic-gate break; 2560Sstevel@tonic-gate case AIOLIO: 2571885Sraf error = alio((int)a1, (aiocb_t **)a2, (int)a3, 2580Sstevel@tonic-gate (struct sigevent *)a4); 2590Sstevel@tonic-gate break; 2600Sstevel@tonic-gate case AIOLIOWAIT: 2610Sstevel@tonic-gate error = aliowait((int)a1, (void *)a2, (int)a3, 2620Sstevel@tonic-gate (struct sigevent *)a4, AIO_64); 2630Sstevel@tonic-gate break; 2640Sstevel@tonic-gate case AIOSUSPEND: 2650Sstevel@tonic-gate error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 2660Sstevel@tonic-gate (int)a4, &rval, AIO_64); 2670Sstevel@tonic-gate break; 2680Sstevel@tonic-gate case AIOERROR: 2690Sstevel@tonic-gate error = aioerror((void *)a1, AIO_64); 2700Sstevel@tonic-gate break; 2710Sstevel@tonic-gate case AIOAREAD: 2720Sstevel@tonic-gate error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 2730Sstevel@tonic-gate break; 2740Sstevel@tonic-gate case AIOAWRITE: 2750Sstevel@tonic-gate error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 2760Sstevel@tonic-gate break; 2770Sstevel@tonic-gate case AIOCANCEL: 2780Sstevel@tonic-gate error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 2790Sstevel@tonic-gate break; 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate /* 2820Sstevel@tonic-gate * The large file related stuff is valid only for 2830Sstevel@tonic-gate * 32 bit kernel and not for 64 bit kernel 2840Sstevel@tonic-gate * On 64 bit kernel we convert large file calls 2850Sstevel@tonic-gate * to regular 64bit calls. 2860Sstevel@tonic-gate */ 2870Sstevel@tonic-gate 2880Sstevel@tonic-gate default: 2890Sstevel@tonic-gate error = EINVAL; 2900Sstevel@tonic-gate } 2910Sstevel@tonic-gate if (error) 2920Sstevel@tonic-gate return ((int64_t)set_errno(error)); 2930Sstevel@tonic-gate return (rval); 2940Sstevel@tonic-gate } 2950Sstevel@tonic-gate #endif 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate static int 2980Sstevel@tonic-gate kaio( 2990Sstevel@tonic-gate ulong_t *uap, 3000Sstevel@tonic-gate rval_t *rvp) 3010Sstevel@tonic-gate { 3020Sstevel@tonic-gate long rval = 0; 3030Sstevel@tonic-gate int error = 0; 3040Sstevel@tonic-gate offset_t off; 3050Sstevel@tonic-gate 3060Sstevel@tonic-gate 3070Sstevel@tonic-gate rvp->r_vals = 0; 3080Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN) 3090Sstevel@tonic-gate off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 3100Sstevel@tonic-gate #else 3110Sstevel@tonic-gate off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 3120Sstevel@tonic-gate #endif 3130Sstevel@tonic-gate 3140Sstevel@tonic-gate switch (uap[0] & ~AIO_POLL_BIT) { 3150Sstevel@tonic-gate /* 3160Sstevel@tonic-gate * It must be the 32 bit system call on 64 bit kernel 3170Sstevel@tonic-gate */ 3180Sstevel@tonic-gate case AIOREAD: 3190Sstevel@tonic-gate return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 3200Sstevel@tonic-gate (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 3210Sstevel@tonic-gate case AIOWRITE: 3220Sstevel@tonic-gate return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 3230Sstevel@tonic-gate (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 3240Sstevel@tonic-gate case AIOWAIT: 3250Sstevel@tonic-gate error = aiowait((struct timeval *)uap[1], (int)uap[2], 3260Sstevel@tonic-gate &rval); 3270Sstevel@tonic-gate break; 3280Sstevel@tonic-gate case AIOWAITN: 3290Sstevel@tonic-gate error = aiowaitn((void *)uap[1], (uint_t)uap[2], 3300Sstevel@tonic-gate (uint_t *)uap[3], (timespec_t *)uap[4]); 3310Sstevel@tonic-gate break; 3320Sstevel@tonic-gate case AIONOTIFY: 3330Sstevel@tonic-gate return (aionotify()); 3340Sstevel@tonic-gate case AIOINIT: 3350Sstevel@tonic-gate return (aioinit()); 3360Sstevel@tonic-gate case AIOSTART: 3370Sstevel@tonic-gate return (aiostart()); 3380Sstevel@tonic-gate case AIOLIO: 3390Sstevel@tonic-gate return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 3400Sstevel@tonic-gate (void *)uap[4])); 3410Sstevel@tonic-gate case AIOLIOWAIT: 3420Sstevel@tonic-gate return (aliowait((int)uap[1], (void *)uap[2], 3430Sstevel@tonic-gate (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 3440Sstevel@tonic-gate case AIOSUSPEND: 3450Sstevel@tonic-gate error = aiosuspend((void *)uap[1], (int)uap[2], 3460Sstevel@tonic-gate (timespec_t *)uap[3], (int)uap[4], 3470Sstevel@tonic-gate &rval, AIO_32); 3480Sstevel@tonic-gate break; 3490Sstevel@tonic-gate case AIOERROR: 3500Sstevel@tonic-gate return (aioerror((void *)uap[1], AIO_32)); 3510Sstevel@tonic-gate case AIOAREAD: 3520Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], 3530Sstevel@tonic-gate FREAD, AIO_32)); 3540Sstevel@tonic-gate case AIOAWRITE: 3550Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], 3560Sstevel@tonic-gate FWRITE, AIO_32)); 3570Sstevel@tonic-gate case AIOCANCEL: 3580Sstevel@tonic-gate error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 3590Sstevel@tonic-gate AIO_32)); 3600Sstevel@tonic-gate break; 3610Sstevel@tonic-gate case AIOLIO64: 3620Sstevel@tonic-gate return (alioLF((int)uap[1], (void *)uap[2], 3630Sstevel@tonic-gate (int)uap[3], (void *)uap[4])); 3640Sstevel@tonic-gate case AIOLIOWAIT64: 3650Sstevel@tonic-gate return (aliowait(uap[1], (void *)uap[2], 3660Sstevel@tonic-gate (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 3670Sstevel@tonic-gate case AIOSUSPEND64: 3680Sstevel@tonic-gate error = aiosuspend((void *)uap[1], (int)uap[2], 3690Sstevel@tonic-gate (timespec_t *)uap[3], (int)uap[4], &rval, 3700Sstevel@tonic-gate AIO_LARGEFILE); 3710Sstevel@tonic-gate break; 3720Sstevel@tonic-gate case AIOERROR64: 3730Sstevel@tonic-gate return (aioerror((void *)uap[1], AIO_LARGEFILE)); 3740Sstevel@tonic-gate case AIOAREAD64: 3750Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], FREAD, 3760Sstevel@tonic-gate AIO_LARGEFILE)); 3770Sstevel@tonic-gate case AIOAWRITE64: 3780Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 3790Sstevel@tonic-gate AIO_LARGEFILE)); 3800Sstevel@tonic-gate case AIOCANCEL64: 3810Sstevel@tonic-gate error = (aio_cancel((int)uap[1], (void *)uap[2], 3820Sstevel@tonic-gate &rval, AIO_LARGEFILE)); 3830Sstevel@tonic-gate break; 3840Sstevel@tonic-gate default: 3850Sstevel@tonic-gate return (EINVAL); 3860Sstevel@tonic-gate } 3870Sstevel@tonic-gate 3880Sstevel@tonic-gate rvp->r_val1 = rval; 3890Sstevel@tonic-gate return (error); 3900Sstevel@tonic-gate } 3910Sstevel@tonic-gate 3920Sstevel@tonic-gate /* 3930Sstevel@tonic-gate * wake up LWPs in this process that are sleeping in 3940Sstevel@tonic-gate * aiowait(). 3950Sstevel@tonic-gate */ 3960Sstevel@tonic-gate static int 3970Sstevel@tonic-gate aionotify(void) 3980Sstevel@tonic-gate { 3990Sstevel@tonic-gate aio_t *aiop; 4000Sstevel@tonic-gate 4010Sstevel@tonic-gate aiop = curproc->p_aio; 4020Sstevel@tonic-gate if (aiop == NULL) 4030Sstevel@tonic-gate return (0); 4040Sstevel@tonic-gate 4050Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 4060Sstevel@tonic-gate aiop->aio_notifycnt++; 4070Sstevel@tonic-gate cv_broadcast(&aiop->aio_waitcv); 4080Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 4090Sstevel@tonic-gate 4100Sstevel@tonic-gate return (0); 4110Sstevel@tonic-gate } 4120Sstevel@tonic-gate 4130Sstevel@tonic-gate static int 4140Sstevel@tonic-gate timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 4150Sstevel@tonic-gate timestruc_t **rqtp, int *blocking) 4160Sstevel@tonic-gate { 4170Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 4180Sstevel@tonic-gate struct timeval32 wait_time_32; 4190Sstevel@tonic-gate #endif 4200Sstevel@tonic-gate struct timeval wait_time; 4210Sstevel@tonic-gate model_t model = get_udatamodel(); 4220Sstevel@tonic-gate 4230Sstevel@tonic-gate *rqtp = NULL; 4240Sstevel@tonic-gate if (timout == NULL) { /* wait indefinitely */ 4250Sstevel@tonic-gate *blocking = 1; 4260Sstevel@tonic-gate return (0); 4270Sstevel@tonic-gate } 4280Sstevel@tonic-gate 4290Sstevel@tonic-gate /* 4300Sstevel@tonic-gate * Need to correctly compare with the -1 passed in for a user 4310Sstevel@tonic-gate * address pointer, with both 32 bit and 64 bit apps. 4320Sstevel@tonic-gate */ 4330Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 4340Sstevel@tonic-gate if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 4350Sstevel@tonic-gate *blocking = 0; 4360Sstevel@tonic-gate return (0); 4370Sstevel@tonic-gate } 4380Sstevel@tonic-gate 4390Sstevel@tonic-gate if (copyin(timout, &wait_time, sizeof (wait_time))) 4400Sstevel@tonic-gate return (EFAULT); 4410Sstevel@tonic-gate } 4420Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 4430Sstevel@tonic-gate else { 4440Sstevel@tonic-gate /* 4450Sstevel@tonic-gate * -1 from a 32bit app. It will not get sign extended. 4460Sstevel@tonic-gate * don't wait if -1. 4470Sstevel@tonic-gate */ 4480Sstevel@tonic-gate if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 4490Sstevel@tonic-gate *blocking = 0; 4500Sstevel@tonic-gate return (0); 4510Sstevel@tonic-gate } 4520Sstevel@tonic-gate 4530Sstevel@tonic-gate if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 4540Sstevel@tonic-gate return (EFAULT); 4550Sstevel@tonic-gate TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 4560Sstevel@tonic-gate } 4570Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 4600Sstevel@tonic-gate *blocking = 0; 4610Sstevel@tonic-gate return (0); 4620Sstevel@tonic-gate } 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate if (wait_time.tv_sec < 0 || 4650Sstevel@tonic-gate wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 4660Sstevel@tonic-gate return (EINVAL); 4670Sstevel@tonic-gate 4680Sstevel@tonic-gate rqtime->tv_sec = wait_time.tv_sec; 4690Sstevel@tonic-gate rqtime->tv_nsec = wait_time.tv_usec * 1000; 4700Sstevel@tonic-gate *rqtp = rqtime; 4710Sstevel@tonic-gate *blocking = 1; 4720Sstevel@tonic-gate 4730Sstevel@tonic-gate return (0); 4740Sstevel@tonic-gate } 4750Sstevel@tonic-gate 4760Sstevel@tonic-gate static int 4770Sstevel@tonic-gate timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 4780Sstevel@tonic-gate timestruc_t **rqtp, int *blocking) 4790Sstevel@tonic-gate { 4800Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 4810Sstevel@tonic-gate timespec32_t wait_time_32; 4820Sstevel@tonic-gate #endif 4830Sstevel@tonic-gate model_t model = get_udatamodel(); 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate *rqtp = NULL; 4860Sstevel@tonic-gate if (timout == NULL) { 4870Sstevel@tonic-gate *blocking = 1; 4880Sstevel@tonic-gate return (0); 4890Sstevel@tonic-gate } 4900Sstevel@tonic-gate 4910Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 4920Sstevel@tonic-gate if (copyin(timout, rqtime, sizeof (*rqtime))) 4930Sstevel@tonic-gate return (EFAULT); 4940Sstevel@tonic-gate } 4950Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 4960Sstevel@tonic-gate else { 4970Sstevel@tonic-gate if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 4980Sstevel@tonic-gate return (EFAULT); 4990Sstevel@tonic-gate TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 5000Sstevel@tonic-gate } 5010Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 5020Sstevel@tonic-gate 5030Sstevel@tonic-gate if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 5040Sstevel@tonic-gate *blocking = 0; 5050Sstevel@tonic-gate return (0); 5060Sstevel@tonic-gate } 5070Sstevel@tonic-gate 5080Sstevel@tonic-gate if (rqtime->tv_sec < 0 || 5090Sstevel@tonic-gate rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 5100Sstevel@tonic-gate return (EINVAL); 5110Sstevel@tonic-gate 5120Sstevel@tonic-gate *rqtp = rqtime; 5130Sstevel@tonic-gate *blocking = 1; 5140Sstevel@tonic-gate 5150Sstevel@tonic-gate return (0); 5160Sstevel@tonic-gate } 5170Sstevel@tonic-gate 5180Sstevel@tonic-gate /*ARGSUSED*/ 5190Sstevel@tonic-gate static int 5200Sstevel@tonic-gate aiowait( 5210Sstevel@tonic-gate struct timeval *timout, 5220Sstevel@tonic-gate int dontblockflg, 5230Sstevel@tonic-gate long *rval) 5240Sstevel@tonic-gate { 5250Sstevel@tonic-gate int error; 5260Sstevel@tonic-gate aio_t *aiop; 5270Sstevel@tonic-gate aio_req_t *reqp; 5280Sstevel@tonic-gate clock_t status; 5290Sstevel@tonic-gate int blocking; 5304123Sdm120769 int timecheck; 5310Sstevel@tonic-gate timestruc_t rqtime; 5320Sstevel@tonic-gate timestruc_t *rqtp; 5330Sstevel@tonic-gate 5340Sstevel@tonic-gate aiop = curproc->p_aio; 5350Sstevel@tonic-gate if (aiop == NULL) 5360Sstevel@tonic-gate return (EINVAL); 5370Sstevel@tonic-gate 5380Sstevel@tonic-gate /* 5390Sstevel@tonic-gate * Establish the absolute future time for the timeout. 5400Sstevel@tonic-gate */ 5410Sstevel@tonic-gate error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 5420Sstevel@tonic-gate if (error) 5430Sstevel@tonic-gate return (error); 5440Sstevel@tonic-gate if (rqtp) { 5450Sstevel@tonic-gate timestruc_t now; 5464123Sdm120769 timecheck = timechanged; 5470Sstevel@tonic-gate gethrestime(&now); 5480Sstevel@tonic-gate timespecadd(rqtp, &now); 5490Sstevel@tonic-gate } 5500Sstevel@tonic-gate 5510Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 5520Sstevel@tonic-gate for (;;) { 5530Sstevel@tonic-gate /* process requests on poll queue */ 5540Sstevel@tonic-gate if (aiop->aio_pollq) { 5550Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 5560Sstevel@tonic-gate aio_cleanup(0); 5570Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 5580Sstevel@tonic-gate } 5590Sstevel@tonic-gate if ((reqp = aio_req_remove(NULL)) != NULL) { 5600Sstevel@tonic-gate *rval = (long)reqp->aio_req_resultp; 5610Sstevel@tonic-gate break; 5620Sstevel@tonic-gate } 5630Sstevel@tonic-gate /* user-level done queue might not be empty */ 5640Sstevel@tonic-gate if (aiop->aio_notifycnt > 0) { 5650Sstevel@tonic-gate aiop->aio_notifycnt--; 5660Sstevel@tonic-gate *rval = 1; 5670Sstevel@tonic-gate break; 5680Sstevel@tonic-gate } 5690Sstevel@tonic-gate /* don't block if no outstanding aio */ 5700Sstevel@tonic-gate if (aiop->aio_outstanding == 0 && dontblockflg) { 5710Sstevel@tonic-gate error = EINVAL; 5720Sstevel@tonic-gate break; 5730Sstevel@tonic-gate } 5740Sstevel@tonic-gate if (blocking) { 5750Sstevel@tonic-gate status = cv_waituntil_sig(&aiop->aio_waitcv, 5764123Sdm120769 &aiop->aio_mutex, rqtp, timecheck); 5770Sstevel@tonic-gate 5780Sstevel@tonic-gate if (status > 0) /* check done queue again */ 5790Sstevel@tonic-gate continue; 5800Sstevel@tonic-gate if (status == 0) { /* interrupted by a signal */ 5810Sstevel@tonic-gate error = EINTR; 5820Sstevel@tonic-gate *rval = -1; 5830Sstevel@tonic-gate } else { /* timer expired */ 5840Sstevel@tonic-gate error = ETIME; 5850Sstevel@tonic-gate } 5860Sstevel@tonic-gate } 5870Sstevel@tonic-gate break; 5880Sstevel@tonic-gate } 5890Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 5900Sstevel@tonic-gate if (reqp) { 5910Sstevel@tonic-gate aphysio_unlock(reqp); 5920Sstevel@tonic-gate aio_copyout_result(reqp); 5930Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 5940Sstevel@tonic-gate aio_req_free(aiop, reqp); 5950Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 5960Sstevel@tonic-gate } 5970Sstevel@tonic-gate return (error); 5980Sstevel@tonic-gate } 5990Sstevel@tonic-gate 6000Sstevel@tonic-gate /* 6010Sstevel@tonic-gate * aiowaitn can be used to reap completed asynchronous requests submitted with 6020Sstevel@tonic-gate * lio_listio, aio_read or aio_write. 6030Sstevel@tonic-gate * This function only reaps asynchronous raw I/Os. 6040Sstevel@tonic-gate */ 6050Sstevel@tonic-gate 6060Sstevel@tonic-gate /*ARGSUSED*/ 6070Sstevel@tonic-gate static int 6080Sstevel@tonic-gate aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 6090Sstevel@tonic-gate { 6100Sstevel@tonic-gate int error = 0; 6110Sstevel@tonic-gate aio_t *aiop; 6120Sstevel@tonic-gate aio_req_t *reqlist = NULL; 6130Sstevel@tonic-gate caddr_t iocblist = NULL; /* array of iocb ptr's */ 6140Sstevel@tonic-gate uint_t waitcnt, cnt = 0; /* iocb cnt */ 6150Sstevel@tonic-gate size_t iocbsz; /* users iocb size */ 6160Sstevel@tonic-gate size_t riocbsz; /* returned iocb size */ 6170Sstevel@tonic-gate int iocb_index = 0; 6180Sstevel@tonic-gate model_t model = get_udatamodel(); 6190Sstevel@tonic-gate int blocking = 1; 6204123Sdm120769 int timecheck; 6210Sstevel@tonic-gate timestruc_t rqtime; 6220Sstevel@tonic-gate timestruc_t *rqtp; 6230Sstevel@tonic-gate 6240Sstevel@tonic-gate aiop = curproc->p_aio; 625*8519SVamsi.Krishna@Sun.COM if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX) 626*8519SVamsi.Krishna@Sun.COM return (EINVAL); 627*8519SVamsi.Krishna@Sun.COM 628*8519SVamsi.Krishna@Sun.COM if (aiop->aio_outstanding == 0) 6290Sstevel@tonic-gate return (EAGAIN); 6300Sstevel@tonic-gate 6310Sstevel@tonic-gate if (copyin(nwait, &waitcnt, sizeof (uint_t))) 6320Sstevel@tonic-gate return (EFAULT); 6330Sstevel@tonic-gate 6340Sstevel@tonic-gate /* set *nwait to zero, if we must return prematurely */ 6350Sstevel@tonic-gate if (copyout(&cnt, nwait, sizeof (uint_t))) 6360Sstevel@tonic-gate return (EFAULT); 6370Sstevel@tonic-gate 6380Sstevel@tonic-gate if (waitcnt == 0) { 6390Sstevel@tonic-gate blocking = 0; 6400Sstevel@tonic-gate rqtp = NULL; 6410Sstevel@tonic-gate waitcnt = nent; 6420Sstevel@tonic-gate } else { 6430Sstevel@tonic-gate error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 6440Sstevel@tonic-gate if (error) 6450Sstevel@tonic-gate return (error); 6460Sstevel@tonic-gate } 6470Sstevel@tonic-gate 6480Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 6490Sstevel@tonic-gate iocbsz = (sizeof (aiocb_t *) * nent); 6500Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 6510Sstevel@tonic-gate else 6520Sstevel@tonic-gate iocbsz = (sizeof (caddr32_t) * nent); 6530Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 6540Sstevel@tonic-gate 6550Sstevel@tonic-gate /* 6560Sstevel@tonic-gate * Only one aio_waitn call is allowed at a time. 6570Sstevel@tonic-gate * The active aio_waitn will collect all requests 6580Sstevel@tonic-gate * out of the "done" list and if necessary it will wait 6590Sstevel@tonic-gate * for some/all pending requests to fulfill the nwait 6600Sstevel@tonic-gate * parameter. 6610Sstevel@tonic-gate * A second or further aio_waitn calls will sleep here 6620Sstevel@tonic-gate * until the active aio_waitn finishes and leaves the kernel 6630Sstevel@tonic-gate * If the second call does not block (poll), then return 6640Sstevel@tonic-gate * immediately with the error code : EAGAIN. 6650Sstevel@tonic-gate * If the second call should block, then sleep here, but 6660Sstevel@tonic-gate * do not touch the timeout. The timeout starts when this 6670Sstevel@tonic-gate * aio_waitn-call becomes active. 6680Sstevel@tonic-gate */ 6690Sstevel@tonic-gate 6700Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate while (aiop->aio_flags & AIO_WAITN) { 6730Sstevel@tonic-gate if (blocking == 0) { 6740Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 6750Sstevel@tonic-gate return (EAGAIN); 6760Sstevel@tonic-gate } 6770Sstevel@tonic-gate 6780Sstevel@tonic-gate /* block, no timeout */ 6790Sstevel@tonic-gate aiop->aio_flags |= AIO_WAITN_PENDING; 6800Sstevel@tonic-gate if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 6810Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 6820Sstevel@tonic-gate return (EINTR); 6830Sstevel@tonic-gate } 6840Sstevel@tonic-gate } 6850Sstevel@tonic-gate 6860Sstevel@tonic-gate /* 6870Sstevel@tonic-gate * Establish the absolute future time for the timeout. 6880Sstevel@tonic-gate */ 6890Sstevel@tonic-gate if (rqtp) { 6900Sstevel@tonic-gate timestruc_t now; 6914123Sdm120769 timecheck = timechanged; 6920Sstevel@tonic-gate gethrestime(&now); 6930Sstevel@tonic-gate timespecadd(rqtp, &now); 6940Sstevel@tonic-gate } 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 6970Sstevel@tonic-gate kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 6980Sstevel@tonic-gate aiop->aio_iocb = NULL; 6990Sstevel@tonic-gate } 7000Sstevel@tonic-gate 7010Sstevel@tonic-gate if (aiop->aio_iocb == NULL) { 7020Sstevel@tonic-gate iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 7030Sstevel@tonic-gate if (iocblist == NULL) { 7040Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 7050Sstevel@tonic-gate return (ENOMEM); 7060Sstevel@tonic-gate } 7070Sstevel@tonic-gate aiop->aio_iocb = (aiocb_t **)iocblist; 7080Sstevel@tonic-gate aiop->aio_iocbsz = iocbsz; 7090Sstevel@tonic-gate } else { 7100Sstevel@tonic-gate iocblist = (char *)aiop->aio_iocb; 7110Sstevel@tonic-gate } 7120Sstevel@tonic-gate 7130Sstevel@tonic-gate aiop->aio_waitncnt = waitcnt; 7140Sstevel@tonic-gate aiop->aio_flags |= AIO_WAITN; 7150Sstevel@tonic-gate 7160Sstevel@tonic-gate for (;;) { 7170Sstevel@tonic-gate /* push requests on poll queue to done queue */ 7180Sstevel@tonic-gate if (aiop->aio_pollq) { 7190Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 7200Sstevel@tonic-gate aio_cleanup(0); 7210Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 7220Sstevel@tonic-gate } 7230Sstevel@tonic-gate 7240Sstevel@tonic-gate /* check for requests on done queue */ 7250Sstevel@tonic-gate if (aiop->aio_doneq) { 7260Sstevel@tonic-gate cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 7270Sstevel@tonic-gate aiop->aio_waitncnt = waitcnt - cnt; 7280Sstevel@tonic-gate } 7290Sstevel@tonic-gate 7300Sstevel@tonic-gate /* user-level done queue might not be empty */ 7310Sstevel@tonic-gate if (aiop->aio_notifycnt > 0) { 7320Sstevel@tonic-gate aiop->aio_notifycnt--; 7330Sstevel@tonic-gate error = 0; 7340Sstevel@tonic-gate break; 7350Sstevel@tonic-gate } 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate /* 7380Sstevel@tonic-gate * if we are here second time as a result of timer 7390Sstevel@tonic-gate * expiration, we reset error if there are enough 7400Sstevel@tonic-gate * aiocb's to satisfy request. 7410Sstevel@tonic-gate * We return also if all requests are already done 7420Sstevel@tonic-gate * and we picked up the whole done queue. 7430Sstevel@tonic-gate */ 7440Sstevel@tonic-gate 7450Sstevel@tonic-gate if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 7460Sstevel@tonic-gate aiop->aio_doneq == NULL)) { 7470Sstevel@tonic-gate error = 0; 7480Sstevel@tonic-gate break; 7490Sstevel@tonic-gate } 7500Sstevel@tonic-gate 7510Sstevel@tonic-gate if ((cnt < waitcnt) && blocking) { 7520Sstevel@tonic-gate int rval = cv_waituntil_sig(&aiop->aio_waitcv, 7534502Spraks &aiop->aio_mutex, rqtp, timecheck); 7540Sstevel@tonic-gate if (rval > 0) 7550Sstevel@tonic-gate continue; 7560Sstevel@tonic-gate if (rval < 0) { 7570Sstevel@tonic-gate error = ETIME; 7580Sstevel@tonic-gate blocking = 0; 7590Sstevel@tonic-gate continue; 7600Sstevel@tonic-gate } 7610Sstevel@tonic-gate error = EINTR; 7620Sstevel@tonic-gate } 7630Sstevel@tonic-gate break; 7640Sstevel@tonic-gate } 7650Sstevel@tonic-gate 7660Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 7670Sstevel@tonic-gate 7680Sstevel@tonic-gate if (cnt > 0) { 7690Sstevel@tonic-gate 7700Sstevel@tonic-gate iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 7710Sstevel@tonic-gate aiop, model); 7720Sstevel@tonic-gate 7730Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 7740Sstevel@tonic-gate riocbsz = (sizeof (aiocb_t *) * cnt); 7750Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 7760Sstevel@tonic-gate else 7770Sstevel@tonic-gate riocbsz = (sizeof (caddr32_t) * cnt); 7780Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 7790Sstevel@tonic-gate 7800Sstevel@tonic-gate if (copyout(iocblist, uiocb, riocbsz) || 7810Sstevel@tonic-gate copyout(&cnt, nwait, sizeof (uint_t))) 7820Sstevel@tonic-gate error = EFAULT; 7830Sstevel@tonic-gate } 7840Sstevel@tonic-gate 7850Sstevel@tonic-gate /* check if there is another thread waiting for execution */ 7860Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 7870Sstevel@tonic-gate aiop->aio_flags &= ~AIO_WAITN; 7880Sstevel@tonic-gate if (aiop->aio_flags & AIO_WAITN_PENDING) { 7890Sstevel@tonic-gate aiop->aio_flags &= ~AIO_WAITN_PENDING; 7900Sstevel@tonic-gate cv_signal(&aiop->aio_waitncv); 7910Sstevel@tonic-gate } 7920Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 7930Sstevel@tonic-gate 7940Sstevel@tonic-gate return (error); 7950Sstevel@tonic-gate } 7960Sstevel@tonic-gate 7970Sstevel@tonic-gate /* 7980Sstevel@tonic-gate * aio_unlock_requests 7990Sstevel@tonic-gate * copyouts the result of the request as well as the return value. 8000Sstevel@tonic-gate * It builds the list of completed asynchronous requests, 8010Sstevel@tonic-gate * unlocks the allocated memory ranges and 8020Sstevel@tonic-gate * put the aio request structure back into the free list. 8030Sstevel@tonic-gate */ 8040Sstevel@tonic-gate 8050Sstevel@tonic-gate static int 8060Sstevel@tonic-gate aio_unlock_requests( 8070Sstevel@tonic-gate caddr_t iocblist, 8080Sstevel@tonic-gate int iocb_index, 8090Sstevel@tonic-gate aio_req_t *reqlist, 8100Sstevel@tonic-gate aio_t *aiop, 8110Sstevel@tonic-gate model_t model) 8120Sstevel@tonic-gate { 8130Sstevel@tonic-gate aio_req_t *reqp, *nreqp; 8140Sstevel@tonic-gate 8150Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 8160Sstevel@tonic-gate for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 8170Sstevel@tonic-gate (((caddr_t *)iocblist)[iocb_index++]) = 8180Sstevel@tonic-gate reqp->aio_req_iocb.iocb; 8190Sstevel@tonic-gate nreqp = reqp->aio_req_next; 8200Sstevel@tonic-gate aphysio_unlock(reqp); 8210Sstevel@tonic-gate aio_copyout_result(reqp); 8220Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 8230Sstevel@tonic-gate aio_req_free(aiop, reqp); 8240Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 8250Sstevel@tonic-gate } 8260Sstevel@tonic-gate } 8270Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 8280Sstevel@tonic-gate else { 8290Sstevel@tonic-gate for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 8300Sstevel@tonic-gate ((caddr32_t *)iocblist)[iocb_index++] = 8310Sstevel@tonic-gate reqp->aio_req_iocb.iocb32; 8320Sstevel@tonic-gate nreqp = reqp->aio_req_next; 8330Sstevel@tonic-gate aphysio_unlock(reqp); 8340Sstevel@tonic-gate aio_copyout_result(reqp); 8350Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 8360Sstevel@tonic-gate aio_req_free(aiop, reqp); 8370Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 8380Sstevel@tonic-gate } 8390Sstevel@tonic-gate } 8400Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 8410Sstevel@tonic-gate return (iocb_index); 8420Sstevel@tonic-gate } 8430Sstevel@tonic-gate 8440Sstevel@tonic-gate /* 8450Sstevel@tonic-gate * aio_reqlist_concat 8460Sstevel@tonic-gate * moves "max" elements from the done queue to the reqlist queue and removes 8470Sstevel@tonic-gate * the AIO_DONEQ flag. 8480Sstevel@tonic-gate * - reqlist queue is a simple linked list 8490Sstevel@tonic-gate * - done queue is a double linked list 8500Sstevel@tonic-gate */ 8510Sstevel@tonic-gate 8520Sstevel@tonic-gate static int 8530Sstevel@tonic-gate aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 8540Sstevel@tonic-gate { 8550Sstevel@tonic-gate aio_req_t *q2, *q2work, *list; 8560Sstevel@tonic-gate int count = 0; 8570Sstevel@tonic-gate 8580Sstevel@tonic-gate list = *reqlist; 8590Sstevel@tonic-gate q2 = aiop->aio_doneq; 8600Sstevel@tonic-gate q2work = q2; 8610Sstevel@tonic-gate while (max-- > 0) { 8620Sstevel@tonic-gate q2work->aio_req_flags &= ~AIO_DONEQ; 8630Sstevel@tonic-gate q2work = q2work->aio_req_next; 8640Sstevel@tonic-gate count++; 8650Sstevel@tonic-gate if (q2work == q2) 8660Sstevel@tonic-gate break; 8670Sstevel@tonic-gate } 8680Sstevel@tonic-gate 8690Sstevel@tonic-gate if (q2work == q2) { 8700Sstevel@tonic-gate /* all elements revised */ 8710Sstevel@tonic-gate q2->aio_req_prev->aio_req_next = list; 8720Sstevel@tonic-gate list = q2; 8730Sstevel@tonic-gate aiop->aio_doneq = NULL; 8740Sstevel@tonic-gate } else { 8750Sstevel@tonic-gate /* 8760Sstevel@tonic-gate * max < elements in the doneq 8770Sstevel@tonic-gate * detach only the required amount of elements 8780Sstevel@tonic-gate * out of the doneq 8790Sstevel@tonic-gate */ 8800Sstevel@tonic-gate q2work->aio_req_prev->aio_req_next = list; 8810Sstevel@tonic-gate list = q2; 8820Sstevel@tonic-gate 8830Sstevel@tonic-gate aiop->aio_doneq = q2work; 8840Sstevel@tonic-gate q2work->aio_req_prev = q2->aio_req_prev; 8850Sstevel@tonic-gate q2->aio_req_prev->aio_req_next = q2work; 8860Sstevel@tonic-gate } 8870Sstevel@tonic-gate *reqlist = list; 8880Sstevel@tonic-gate return (count); 8890Sstevel@tonic-gate } 8900Sstevel@tonic-gate 8910Sstevel@tonic-gate /*ARGSUSED*/ 8920Sstevel@tonic-gate static int 8930Sstevel@tonic-gate aiosuspend( 8940Sstevel@tonic-gate void *aiocb, 8950Sstevel@tonic-gate int nent, 8960Sstevel@tonic-gate struct timespec *timout, 8970Sstevel@tonic-gate int flag, 8980Sstevel@tonic-gate long *rval, 8990Sstevel@tonic-gate int run_mode) 9000Sstevel@tonic-gate { 9010Sstevel@tonic-gate int error; 9020Sstevel@tonic-gate aio_t *aiop; 9030Sstevel@tonic-gate aio_req_t *reqp, *found, *next; 9040Sstevel@tonic-gate caddr_t cbplist = NULL; 9050Sstevel@tonic-gate aiocb_t *cbp, **ucbp; 9060Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 9070Sstevel@tonic-gate aiocb32_t *cbp32; 9080Sstevel@tonic-gate caddr32_t *ucbp32; 9090Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 9100Sstevel@tonic-gate aiocb64_32_t *cbp64; 9110Sstevel@tonic-gate int rv; 9120Sstevel@tonic-gate int i; 9130Sstevel@tonic-gate size_t ssize; 9140Sstevel@tonic-gate model_t model = get_udatamodel(); 9150Sstevel@tonic-gate int blocking; 9164123Sdm120769 int timecheck; 9170Sstevel@tonic-gate timestruc_t rqtime; 9180Sstevel@tonic-gate timestruc_t *rqtp; 9190Sstevel@tonic-gate 9200Sstevel@tonic-gate aiop = curproc->p_aio; 921*8519SVamsi.Krishna@Sun.COM if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 9220Sstevel@tonic-gate return (EINVAL); 9230Sstevel@tonic-gate 9240Sstevel@tonic-gate /* 9250Sstevel@tonic-gate * Establish the absolute future time for the timeout. 9260Sstevel@tonic-gate */ 9270Sstevel@tonic-gate error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 9280Sstevel@tonic-gate if (error) 9290Sstevel@tonic-gate return (error); 9300Sstevel@tonic-gate if (rqtp) { 9310Sstevel@tonic-gate timestruc_t now; 9324123Sdm120769 timecheck = timechanged; 9330Sstevel@tonic-gate gethrestime(&now); 9340Sstevel@tonic-gate timespecadd(rqtp, &now); 9350Sstevel@tonic-gate } 9360Sstevel@tonic-gate 9370Sstevel@tonic-gate /* 9380Sstevel@tonic-gate * If we are not blocking and there's no IO complete 9390Sstevel@tonic-gate * skip aiocb copyin. 9400Sstevel@tonic-gate */ 9410Sstevel@tonic-gate if (!blocking && (aiop->aio_pollq == NULL) && 9420Sstevel@tonic-gate (aiop->aio_doneq == NULL)) { 9430Sstevel@tonic-gate return (EAGAIN); 9440Sstevel@tonic-gate } 9450Sstevel@tonic-gate 9460Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 9470Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent); 9480Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 9490Sstevel@tonic-gate else 9500Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent); 9510Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 9520Sstevel@tonic-gate 9530Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_NOSLEEP); 9540Sstevel@tonic-gate if (cbplist == NULL) 9550Sstevel@tonic-gate return (ENOMEM); 9560Sstevel@tonic-gate 9570Sstevel@tonic-gate if (copyin(aiocb, cbplist, ssize)) { 9580Sstevel@tonic-gate error = EFAULT; 9590Sstevel@tonic-gate goto done; 9600Sstevel@tonic-gate } 9610Sstevel@tonic-gate 9620Sstevel@tonic-gate found = NULL; 9630Sstevel@tonic-gate /* 9640Sstevel@tonic-gate * we need to get the aio_cleanupq_mutex since we call 9650Sstevel@tonic-gate * aio_req_done(). 9660Sstevel@tonic-gate */ 9670Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 9680Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 9690Sstevel@tonic-gate for (;;) { 9700Sstevel@tonic-gate /* push requests on poll queue to done queue */ 9710Sstevel@tonic-gate if (aiop->aio_pollq) { 9720Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 9730Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 9740Sstevel@tonic-gate aio_cleanup(0); 9750Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 9760Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 9770Sstevel@tonic-gate } 9780Sstevel@tonic-gate /* check for requests on done queue */ 9790Sstevel@tonic-gate if (aiop->aio_doneq) { 9800Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 9810Sstevel@tonic-gate ucbp = (aiocb_t **)cbplist; 9820Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 9830Sstevel@tonic-gate else 9840Sstevel@tonic-gate ucbp32 = (caddr32_t *)cbplist; 9850Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 9860Sstevel@tonic-gate for (i = 0; i < nent; i++) { 9870Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 9880Sstevel@tonic-gate if ((cbp = *ucbp++) == NULL) 9890Sstevel@tonic-gate continue; 9900Sstevel@tonic-gate if (run_mode != AIO_LARGEFILE) 9910Sstevel@tonic-gate reqp = aio_req_done( 9920Sstevel@tonic-gate &cbp->aio_resultp); 9930Sstevel@tonic-gate else { 9940Sstevel@tonic-gate cbp64 = (aiocb64_32_t *)cbp; 9950Sstevel@tonic-gate reqp = aio_req_done( 9960Sstevel@tonic-gate &cbp64->aio_resultp); 9970Sstevel@tonic-gate } 9980Sstevel@tonic-gate } 9990Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 10000Sstevel@tonic-gate else { 10010Sstevel@tonic-gate if (run_mode == AIO_32) { 10020Sstevel@tonic-gate if ((cbp32 = 10030Sstevel@tonic-gate (aiocb32_t *)(uintptr_t) 10040Sstevel@tonic-gate *ucbp32++) == NULL) 10050Sstevel@tonic-gate continue; 10060Sstevel@tonic-gate reqp = aio_req_done( 10070Sstevel@tonic-gate &cbp32->aio_resultp); 10080Sstevel@tonic-gate } else if (run_mode == AIO_LARGEFILE) { 10090Sstevel@tonic-gate if ((cbp64 = 10100Sstevel@tonic-gate (aiocb64_32_t *)(uintptr_t) 10110Sstevel@tonic-gate *ucbp32++) == NULL) 10120Sstevel@tonic-gate continue; 10134502Spraks reqp = aio_req_done( 10144502Spraks &cbp64->aio_resultp); 10150Sstevel@tonic-gate } 10160Sstevel@tonic-gate 10170Sstevel@tonic-gate } 10180Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 10190Sstevel@tonic-gate if (reqp) { 10200Sstevel@tonic-gate reqp->aio_req_next = found; 10210Sstevel@tonic-gate found = reqp; 10220Sstevel@tonic-gate } 10230Sstevel@tonic-gate if (aiop->aio_doneq == NULL) 10240Sstevel@tonic-gate break; 10250Sstevel@tonic-gate } 10260Sstevel@tonic-gate if (found) 10270Sstevel@tonic-gate break; 10280Sstevel@tonic-gate } 10290Sstevel@tonic-gate if (aiop->aio_notifycnt > 0) { 10300Sstevel@tonic-gate /* 10310Sstevel@tonic-gate * nothing on the kernel's queue. the user 10320Sstevel@tonic-gate * has notified the kernel that it has items 10330Sstevel@tonic-gate * on a user-level queue. 10340Sstevel@tonic-gate */ 10350Sstevel@tonic-gate aiop->aio_notifycnt--; 10360Sstevel@tonic-gate *rval = 1; 10370Sstevel@tonic-gate error = 0; 10380Sstevel@tonic-gate break; 10390Sstevel@tonic-gate } 10400Sstevel@tonic-gate /* don't block if nothing is outstanding */ 10410Sstevel@tonic-gate if (aiop->aio_outstanding == 0) { 10420Sstevel@tonic-gate error = EAGAIN; 10430Sstevel@tonic-gate break; 10440Sstevel@tonic-gate } 10450Sstevel@tonic-gate if (blocking) { 10460Sstevel@tonic-gate /* 10470Sstevel@tonic-gate * drop the aio_cleanupq_mutex as we are 10480Sstevel@tonic-gate * going to block. 10490Sstevel@tonic-gate */ 10500Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 10510Sstevel@tonic-gate rv = cv_waituntil_sig(&aiop->aio_waitcv, 10524502Spraks &aiop->aio_mutex, rqtp, timecheck); 10530Sstevel@tonic-gate /* 10540Sstevel@tonic-gate * we have to drop aio_mutex and 10550Sstevel@tonic-gate * grab it in the right order. 10560Sstevel@tonic-gate */ 10570Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 10580Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 10590Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 10600Sstevel@tonic-gate if (rv > 0) /* check done queue again */ 10610Sstevel@tonic-gate continue; 10620Sstevel@tonic-gate if (rv == 0) /* interrupted by a signal */ 10630Sstevel@tonic-gate error = EINTR; 10640Sstevel@tonic-gate else /* timer expired */ 10650Sstevel@tonic-gate error = ETIME; 10660Sstevel@tonic-gate } else { 10670Sstevel@tonic-gate error = EAGAIN; 10680Sstevel@tonic-gate } 10690Sstevel@tonic-gate break; 10700Sstevel@tonic-gate } 10710Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 10720Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 10730Sstevel@tonic-gate for (reqp = found; reqp != NULL; reqp = next) { 10740Sstevel@tonic-gate next = reqp->aio_req_next; 10750Sstevel@tonic-gate aphysio_unlock(reqp); 10760Sstevel@tonic-gate aio_copyout_result(reqp); 10770Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 10780Sstevel@tonic-gate aio_req_free(aiop, reqp); 10790Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 10800Sstevel@tonic-gate } 10810Sstevel@tonic-gate done: 10820Sstevel@tonic-gate kmem_free(cbplist, ssize); 10830Sstevel@tonic-gate return (error); 10840Sstevel@tonic-gate } 10850Sstevel@tonic-gate 10860Sstevel@tonic-gate /* 10870Sstevel@tonic-gate * initialize aio by allocating an aio_t struct for this 10880Sstevel@tonic-gate * process. 10890Sstevel@tonic-gate */ 10900Sstevel@tonic-gate static int 10910Sstevel@tonic-gate aioinit(void) 10920Sstevel@tonic-gate { 10930Sstevel@tonic-gate proc_t *p = curproc; 10940Sstevel@tonic-gate aio_t *aiop; 10950Sstevel@tonic-gate mutex_enter(&p->p_lock); 10960Sstevel@tonic-gate if ((aiop = p->p_aio) == NULL) { 10970Sstevel@tonic-gate aiop = aio_aiop_alloc(); 10980Sstevel@tonic-gate p->p_aio = aiop; 10990Sstevel@tonic-gate } 11000Sstevel@tonic-gate mutex_exit(&p->p_lock); 11010Sstevel@tonic-gate if (aiop == NULL) 11020Sstevel@tonic-gate return (ENOMEM); 11030Sstevel@tonic-gate return (0); 11040Sstevel@tonic-gate } 11050Sstevel@tonic-gate 11060Sstevel@tonic-gate /* 11070Sstevel@tonic-gate * start a special thread that will cleanup after aio requests 11080Sstevel@tonic-gate * that are preventing a segment from being unmapped. as_unmap() 11090Sstevel@tonic-gate * blocks until all phsyio to this segment is completed. this 11100Sstevel@tonic-gate * doesn't happen until all the pages in this segment are not 11110Sstevel@tonic-gate * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 11120Sstevel@tonic-gate * requests still outstanding. this special thread will make sure 11130Sstevel@tonic-gate * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 11140Sstevel@tonic-gate * 11150Sstevel@tonic-gate * this function will return an error if the process has only 11160Sstevel@tonic-gate * one LWP. the assumption is that the caller is a separate LWP 11170Sstevel@tonic-gate * that remains blocked in the kernel for the life of this process. 11180Sstevel@tonic-gate */ 11190Sstevel@tonic-gate static int 11200Sstevel@tonic-gate aiostart(void) 11210Sstevel@tonic-gate { 11220Sstevel@tonic-gate proc_t *p = curproc; 11230Sstevel@tonic-gate aio_t *aiop; 11240Sstevel@tonic-gate int first, error = 0; 11250Sstevel@tonic-gate 11260Sstevel@tonic-gate if (p->p_lwpcnt == 1) 11270Sstevel@tonic-gate return (EDEADLK); 11280Sstevel@tonic-gate mutex_enter(&p->p_lock); 11290Sstevel@tonic-gate if ((aiop = p->p_aio) == NULL) 11300Sstevel@tonic-gate error = EINVAL; 11310Sstevel@tonic-gate else { 11320Sstevel@tonic-gate first = aiop->aio_ok; 11330Sstevel@tonic-gate if (aiop->aio_ok == 0) 11340Sstevel@tonic-gate aiop->aio_ok = 1; 11350Sstevel@tonic-gate } 11360Sstevel@tonic-gate mutex_exit(&p->p_lock); 11370Sstevel@tonic-gate if (error == 0 && first == 0) { 11380Sstevel@tonic-gate return (aio_cleanup_thread(aiop)); 11390Sstevel@tonic-gate /* should return only to exit */ 11400Sstevel@tonic-gate } 11410Sstevel@tonic-gate return (error); 11420Sstevel@tonic-gate } 11430Sstevel@tonic-gate 11440Sstevel@tonic-gate /* 11450Sstevel@tonic-gate * Associate an aiocb with a port. 11460Sstevel@tonic-gate * This function is used by aiorw() to associate a transaction with a port. 11470Sstevel@tonic-gate * Allocate an event port structure (port_alloc_event()) and store the 11480Sstevel@tonic-gate * delivered user pointer (portnfy_user) in the portkev_user field of the 11490Sstevel@tonic-gate * port_kevent_t structure.. 11500Sstevel@tonic-gate * The aio_req_portkev pointer in the aio_req_t structure was added to identify 11510Sstevel@tonic-gate * the port association. 11520Sstevel@tonic-gate */ 11530Sstevel@tonic-gate 11540Sstevel@tonic-gate static int 11551885Sraf aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, 11561885Sraf aio_req_t *reqp, int event) 11570Sstevel@tonic-gate { 11580Sstevel@tonic-gate port_kevent_t *pkevp = NULL; 11590Sstevel@tonic-gate int error; 11600Sstevel@tonic-gate 11610Sstevel@tonic-gate error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 11620Sstevel@tonic-gate PORT_SOURCE_AIO, &pkevp); 11630Sstevel@tonic-gate if (error) { 11640Sstevel@tonic-gate if ((error == ENOMEM) || (error == EAGAIN)) 11650Sstevel@tonic-gate error = EAGAIN; 11660Sstevel@tonic-gate else 11670Sstevel@tonic-gate error = EINVAL; 11680Sstevel@tonic-gate } else { 11690Sstevel@tonic-gate port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 11700Sstevel@tonic-gate aio_port_callback, reqp); 11711885Sraf pkevp->portkev_events = event; 11720Sstevel@tonic-gate reqp->aio_req_portkev = pkevp; 11730Sstevel@tonic-gate reqp->aio_req_port = pntfy->portnfy_port; 11740Sstevel@tonic-gate } 11750Sstevel@tonic-gate return (error); 11760Sstevel@tonic-gate } 11770Sstevel@tonic-gate 11780Sstevel@tonic-gate #ifdef _LP64 11790Sstevel@tonic-gate 11800Sstevel@tonic-gate /* 11810Sstevel@tonic-gate * Asynchronous list IO. A chain of aiocb's are copied in 11820Sstevel@tonic-gate * one at a time. If the aiocb is invalid, it is skipped. 11830Sstevel@tonic-gate * For each aiocb, the appropriate driver entry point is 11840Sstevel@tonic-gate * called. Optimize for the common case where the list 11850Sstevel@tonic-gate * of requests is to the same file descriptor. 11860Sstevel@tonic-gate * 11870Sstevel@tonic-gate * One possible optimization is to define a new driver entry 11880Sstevel@tonic-gate * point that supports a list of IO requests. Whether this 11890Sstevel@tonic-gate * improves performance depends somewhat on the driver's 11900Sstevel@tonic-gate * locking strategy. Processing a list could adversely impact 11910Sstevel@tonic-gate * the driver's interrupt latency. 11920Sstevel@tonic-gate */ 11930Sstevel@tonic-gate static int 11940Sstevel@tonic-gate alio( 11951885Sraf int mode_arg, 11961885Sraf aiocb_t **aiocb_arg, 11971885Sraf int nent, 11981885Sraf struct sigevent *sigev) 11990Sstevel@tonic-gate { 12000Sstevel@tonic-gate file_t *fp; 12010Sstevel@tonic-gate file_t *prev_fp = NULL; 12020Sstevel@tonic-gate int prev_mode = -1; 12030Sstevel@tonic-gate struct vnode *vp; 12040Sstevel@tonic-gate aio_lio_t *head; 12050Sstevel@tonic-gate aio_req_t *reqp; 12060Sstevel@tonic-gate aio_t *aiop; 12070Sstevel@tonic-gate caddr_t cbplist; 12080Sstevel@tonic-gate aiocb_t cb; 12090Sstevel@tonic-gate aiocb_t *aiocb = &cb; 12101885Sraf aiocb_t *cbp; 12111885Sraf aiocb_t **ucbp; 12120Sstevel@tonic-gate struct sigevent sigevk; 12130Sstevel@tonic-gate sigqueue_t *sqp; 12140Sstevel@tonic-gate int (*aio_func)(); 12150Sstevel@tonic-gate int mode; 12160Sstevel@tonic-gate int error = 0; 12170Sstevel@tonic-gate int aio_errors = 0; 12180Sstevel@tonic-gate int i; 12190Sstevel@tonic-gate size_t ssize; 12200Sstevel@tonic-gate int deadhead = 0; 12210Sstevel@tonic-gate int aio_notsupported = 0; 12221885Sraf int lio_head_port; 12231885Sraf int aio_port; 12241885Sraf int aio_thread; 12250Sstevel@tonic-gate port_kevent_t *pkevtp = NULL; 12264502Spraks int portused = 0; 12270Sstevel@tonic-gate port_notify_t pnotify; 12281885Sraf int event; 12290Sstevel@tonic-gate 12300Sstevel@tonic-gate aiop = curproc->p_aio; 12310Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 12320Sstevel@tonic-gate return (EINVAL); 12330Sstevel@tonic-gate 12340Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent); 12350Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP); 12360Sstevel@tonic-gate ucbp = (aiocb_t **)cbplist; 12370Sstevel@tonic-gate 12381885Sraf if (copyin(aiocb_arg, cbplist, ssize) || 12391885Sraf (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) { 12400Sstevel@tonic-gate kmem_free(cbplist, ssize); 12410Sstevel@tonic-gate return (EFAULT); 12420Sstevel@tonic-gate } 12430Sstevel@tonic-gate 12441885Sraf /* Event Ports */ 12451885Sraf if (sigev && 12461885Sraf (sigevk.sigev_notify == SIGEV_THREAD || 12471885Sraf sigevk.sigev_notify == SIGEV_PORT)) { 12481885Sraf if (sigevk.sigev_notify == SIGEV_THREAD) { 12491885Sraf pnotify.portnfy_port = sigevk.sigev_signo; 12501885Sraf pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 12511885Sraf } else if (copyin(sigevk.sigev_value.sival_ptr, 12521885Sraf &pnotify, sizeof (pnotify))) { 12530Sstevel@tonic-gate kmem_free(cbplist, ssize); 12540Sstevel@tonic-gate return (EFAULT); 12550Sstevel@tonic-gate } 12561885Sraf error = port_alloc_event(pnotify.portnfy_port, 12571885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 12581885Sraf if (error) { 12591885Sraf if (error == ENOMEM || error == EAGAIN) 12601885Sraf error = EAGAIN; 12611885Sraf else 12621885Sraf error = EINVAL; 12631885Sraf kmem_free(cbplist, ssize); 12641885Sraf return (error); 12651885Sraf } 12661885Sraf lio_head_port = pnotify.portnfy_port; 12674502Spraks portused = 1; 12680Sstevel@tonic-gate } 12690Sstevel@tonic-gate 12700Sstevel@tonic-gate /* 12710Sstevel@tonic-gate * a list head should be allocated if notification is 12720Sstevel@tonic-gate * enabled for this list. 12730Sstevel@tonic-gate */ 12740Sstevel@tonic-gate head = NULL; 12750Sstevel@tonic-gate 12761885Sraf if (mode_arg == LIO_WAIT || sigev) { 12770Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 12780Sstevel@tonic-gate error = aio_lio_alloc(&head); 12790Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 12800Sstevel@tonic-gate if (error) 12810Sstevel@tonic-gate goto done; 12820Sstevel@tonic-gate deadhead = 1; 12830Sstevel@tonic-gate head->lio_nent = nent; 12840Sstevel@tonic-gate head->lio_refcnt = nent; 12851885Sraf head->lio_port = -1; 12861885Sraf head->lio_portkev = NULL; 12871885Sraf if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 12881885Sraf sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 12890Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 12900Sstevel@tonic-gate if (sqp == NULL) { 12910Sstevel@tonic-gate error = EAGAIN; 12920Sstevel@tonic-gate goto done; 12930Sstevel@tonic-gate } 12940Sstevel@tonic-gate sqp->sq_func = NULL; 12950Sstevel@tonic-gate sqp->sq_next = NULL; 12960Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 12970Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 12980Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 12990Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 13000Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 13010Sstevel@tonic-gate sqp->sq_info.si_signo = sigevk.sigev_signo; 13020Sstevel@tonic-gate sqp->sq_info.si_value = sigevk.sigev_value; 13030Sstevel@tonic-gate head->lio_sigqp = sqp; 13040Sstevel@tonic-gate } else { 13050Sstevel@tonic-gate head->lio_sigqp = NULL; 13060Sstevel@tonic-gate } 13071885Sraf if (pkevtp) { 13081885Sraf /* 13091885Sraf * Prepare data to send when list of aiocb's 13101885Sraf * has completed. 13111885Sraf */ 13121885Sraf port_init_event(pkevtp, (uintptr_t)sigev, 13131885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 13141885Sraf NULL, head); 13151885Sraf pkevtp->portkev_events = AIOLIO; 13161885Sraf head->lio_portkev = pkevtp; 13171885Sraf head->lio_port = pnotify.portnfy_port; 13181885Sraf } 13190Sstevel@tonic-gate } 13200Sstevel@tonic-gate 13210Sstevel@tonic-gate for (i = 0; i < nent; i++, ucbp++) { 13220Sstevel@tonic-gate 13230Sstevel@tonic-gate cbp = *ucbp; 13240Sstevel@tonic-gate /* skip entry if it can't be copied. */ 13251885Sraf if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 13260Sstevel@tonic-gate if (head) { 13270Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13280Sstevel@tonic-gate head->lio_nent--; 13290Sstevel@tonic-gate head->lio_refcnt--; 13300Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 13310Sstevel@tonic-gate } 13320Sstevel@tonic-gate continue; 13330Sstevel@tonic-gate } 13340Sstevel@tonic-gate 13350Sstevel@tonic-gate /* skip if opcode for aiocb is LIO_NOP */ 13360Sstevel@tonic-gate mode = aiocb->aio_lio_opcode; 13370Sstevel@tonic-gate if (mode == LIO_NOP) { 13380Sstevel@tonic-gate cbp = NULL; 13390Sstevel@tonic-gate if (head) { 13400Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13410Sstevel@tonic-gate head->lio_nent--; 13420Sstevel@tonic-gate head->lio_refcnt--; 13430Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 13440Sstevel@tonic-gate } 13450Sstevel@tonic-gate continue; 13460Sstevel@tonic-gate } 13470Sstevel@tonic-gate 13480Sstevel@tonic-gate /* increment file descriptor's ref count. */ 13490Sstevel@tonic-gate if ((fp = getf(aiocb->aio_fildes)) == NULL) { 13500Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 13510Sstevel@tonic-gate if (head) { 13520Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13530Sstevel@tonic-gate head->lio_nent--; 13540Sstevel@tonic-gate head->lio_refcnt--; 13550Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 13560Sstevel@tonic-gate } 13570Sstevel@tonic-gate aio_errors++; 13580Sstevel@tonic-gate continue; 13590Sstevel@tonic-gate } 13600Sstevel@tonic-gate 13610Sstevel@tonic-gate /* 13620Sstevel@tonic-gate * check the permission of the partition 13630Sstevel@tonic-gate */ 13640Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 13650Sstevel@tonic-gate releasef(aiocb->aio_fildes); 13660Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 13670Sstevel@tonic-gate if (head) { 13680Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13690Sstevel@tonic-gate head->lio_nent--; 13700Sstevel@tonic-gate head->lio_refcnt--; 13710Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 13720Sstevel@tonic-gate } 13730Sstevel@tonic-gate aio_errors++; 13740Sstevel@tonic-gate continue; 13750Sstevel@tonic-gate } 13760Sstevel@tonic-gate 13770Sstevel@tonic-gate /* 13781885Sraf * common case where requests are to the same fd 13791885Sraf * for the same r/w operation. 13800Sstevel@tonic-gate * for UFS, need to set EBADFD 13810Sstevel@tonic-gate */ 13821885Sraf vp = fp->f_vnode; 13831885Sraf if (fp != prev_fp || mode != prev_mode) { 13840Sstevel@tonic-gate aio_func = check_vp(vp, mode); 13850Sstevel@tonic-gate if (aio_func == NULL) { 13860Sstevel@tonic-gate prev_fp = NULL; 13870Sstevel@tonic-gate releasef(aiocb->aio_fildes); 13880Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADFD); 13890Sstevel@tonic-gate aio_notsupported++; 13900Sstevel@tonic-gate if (head) { 13910Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13920Sstevel@tonic-gate head->lio_nent--; 13930Sstevel@tonic-gate head->lio_refcnt--; 13940Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 13950Sstevel@tonic-gate } 13960Sstevel@tonic-gate continue; 13970Sstevel@tonic-gate } else { 13980Sstevel@tonic-gate prev_fp = fp; 13990Sstevel@tonic-gate prev_mode = mode; 14000Sstevel@tonic-gate } 14010Sstevel@tonic-gate } 14020Sstevel@tonic-gate 14031885Sraf error = aio_req_setup(&reqp, aiop, aiocb, 14041885Sraf &cbp->aio_resultp, vp); 14051885Sraf if (error) { 14060Sstevel@tonic-gate releasef(aiocb->aio_fildes); 14070Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 14080Sstevel@tonic-gate if (head) { 14090Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 14100Sstevel@tonic-gate head->lio_nent--; 14110Sstevel@tonic-gate head->lio_refcnt--; 14120Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 14130Sstevel@tonic-gate } 14140Sstevel@tonic-gate aio_errors++; 14150Sstevel@tonic-gate continue; 14160Sstevel@tonic-gate } 14170Sstevel@tonic-gate 14180Sstevel@tonic-gate reqp->aio_req_lio = head; 14190Sstevel@tonic-gate deadhead = 0; 14200Sstevel@tonic-gate 14210Sstevel@tonic-gate /* 14220Sstevel@tonic-gate * Set the errno field now before sending the request to 14230Sstevel@tonic-gate * the driver to avoid a race condition 14240Sstevel@tonic-gate */ 14250Sstevel@tonic-gate (void) suword32(&cbp->aio_resultp.aio_errno, 14260Sstevel@tonic-gate EINPROGRESS); 14270Sstevel@tonic-gate 14280Sstevel@tonic-gate reqp->aio_req_iocb.iocb = (caddr_t)cbp; 14290Sstevel@tonic-gate 14301885Sraf event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 14311885Sraf aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 14321885Sraf aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 14331885Sraf if (aio_port | aio_thread) { 14341885Sraf port_kevent_t *lpkevp; 14351885Sraf /* 14361885Sraf * Prepare data to send with each aiocb completed. 14371885Sraf */ 14381885Sraf if (aio_port) { 14391885Sraf void *paddr = 14401885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 14411885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify))) 14421885Sraf error = EFAULT; 14431885Sraf } else { /* aio_thread */ 14441885Sraf pnotify.portnfy_port = 14451885Sraf aiocb->aio_sigevent.sigev_signo; 14461885Sraf pnotify.portnfy_user = 14471885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 14481885Sraf } 14491885Sraf if (error) 14501885Sraf /* EMPTY */; 14511885Sraf else if (pkevtp != NULL && 14521885Sraf pnotify.portnfy_port == lio_head_port) 14531885Sraf error = port_dup_event(pkevtp, &lpkevp, 14541885Sraf PORT_ALLOC_DEFAULT); 14551885Sraf else 14561885Sraf error = port_alloc_event(pnotify.portnfy_port, 14571885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 14581885Sraf &lpkevp); 14591885Sraf if (error == 0) { 14601885Sraf port_init_event(lpkevp, (uintptr_t)cbp, 14611885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 14621885Sraf aio_port_callback, reqp); 14631885Sraf lpkevp->portkev_events = event; 14641885Sraf reqp->aio_req_portkev = lpkevp; 14651885Sraf reqp->aio_req_port = pnotify.portnfy_port; 14661885Sraf } 14670Sstevel@tonic-gate } 14680Sstevel@tonic-gate 14690Sstevel@tonic-gate /* 14700Sstevel@tonic-gate * send the request to driver. 14710Sstevel@tonic-gate */ 14720Sstevel@tonic-gate if (error == 0) { 14730Sstevel@tonic-gate if (aiocb->aio_nbytes == 0) { 14740Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 14750Sstevel@tonic-gate aio_zerolen(reqp); 14760Sstevel@tonic-gate continue; 14770Sstevel@tonic-gate } 14780Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 14790Sstevel@tonic-gate CRED()); 14800Sstevel@tonic-gate } 14811885Sraf 14820Sstevel@tonic-gate /* 14830Sstevel@tonic-gate * the fd's ref count is not decremented until the IO has 14840Sstevel@tonic-gate * completed unless there was an error. 14850Sstevel@tonic-gate */ 14860Sstevel@tonic-gate if (error) { 14870Sstevel@tonic-gate releasef(aiocb->aio_fildes); 14880Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 14890Sstevel@tonic-gate if (head) { 14900Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 14910Sstevel@tonic-gate head->lio_nent--; 14920Sstevel@tonic-gate head->lio_refcnt--; 14930Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 14940Sstevel@tonic-gate } 14950Sstevel@tonic-gate if (error == ENOTSUP) 14960Sstevel@tonic-gate aio_notsupported++; 14970Sstevel@tonic-gate else 14980Sstevel@tonic-gate aio_errors++; 14994502Spraks lio_set_error(reqp, portused); 15000Sstevel@tonic-gate } else { 15010Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 15020Sstevel@tonic-gate } 15030Sstevel@tonic-gate } 15040Sstevel@tonic-gate 15050Sstevel@tonic-gate if (aio_notsupported) { 15060Sstevel@tonic-gate error = ENOTSUP; 15070Sstevel@tonic-gate } else if (aio_errors) { 15080Sstevel@tonic-gate /* 15090Sstevel@tonic-gate * return EIO if any request failed 15100Sstevel@tonic-gate */ 15110Sstevel@tonic-gate error = EIO; 15120Sstevel@tonic-gate } 15130Sstevel@tonic-gate 15140Sstevel@tonic-gate if (mode_arg == LIO_WAIT) { 15150Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 15160Sstevel@tonic-gate while (head->lio_refcnt > 0) { 15170Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 15180Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 15190Sstevel@tonic-gate error = EINTR; 15200Sstevel@tonic-gate goto done; 15210Sstevel@tonic-gate } 15220Sstevel@tonic-gate } 15230Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 15240Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 15250Sstevel@tonic-gate } 15260Sstevel@tonic-gate 15270Sstevel@tonic-gate done: 15280Sstevel@tonic-gate kmem_free(cbplist, ssize); 15290Sstevel@tonic-gate if (deadhead) { 15300Sstevel@tonic-gate if (head->lio_sigqp) 15310Sstevel@tonic-gate kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 15321885Sraf if (head->lio_portkev) 15331885Sraf port_free_event(head->lio_portkev); 15340Sstevel@tonic-gate kmem_free(head, sizeof (aio_lio_t)); 15350Sstevel@tonic-gate } 15360Sstevel@tonic-gate return (error); 15370Sstevel@tonic-gate } 15380Sstevel@tonic-gate 15390Sstevel@tonic-gate #endif /* _LP64 */ 15400Sstevel@tonic-gate 15410Sstevel@tonic-gate /* 15420Sstevel@tonic-gate * Asynchronous list IO. 15430Sstevel@tonic-gate * If list I/O is called with LIO_WAIT it can still return 15440Sstevel@tonic-gate * before all the I/O's are completed if a signal is caught 15450Sstevel@tonic-gate * or if the list include UFS I/O requests. If this happens, 15460Sstevel@tonic-gate * libaio will call aliowait() to wait for the I/O's to 15470Sstevel@tonic-gate * complete 15480Sstevel@tonic-gate */ 15490Sstevel@tonic-gate /*ARGSUSED*/ 15500Sstevel@tonic-gate static int 15510Sstevel@tonic-gate aliowait( 15520Sstevel@tonic-gate int mode, 15530Sstevel@tonic-gate void *aiocb, 15540Sstevel@tonic-gate int nent, 15550Sstevel@tonic-gate void *sigev, 15560Sstevel@tonic-gate int run_mode) 15570Sstevel@tonic-gate { 15580Sstevel@tonic-gate aio_lio_t *head; 15590Sstevel@tonic-gate aio_t *aiop; 15600Sstevel@tonic-gate caddr_t cbplist; 15610Sstevel@tonic-gate aiocb_t *cbp, **ucbp; 15620Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 15630Sstevel@tonic-gate aiocb32_t *cbp32; 15640Sstevel@tonic-gate caddr32_t *ucbp32; 15650Sstevel@tonic-gate aiocb64_32_t *cbp64; 15660Sstevel@tonic-gate #endif 15670Sstevel@tonic-gate int error = 0; 15680Sstevel@tonic-gate int i; 15690Sstevel@tonic-gate size_t ssize = 0; 15700Sstevel@tonic-gate model_t model = get_udatamodel(); 15710Sstevel@tonic-gate 15720Sstevel@tonic-gate aiop = curproc->p_aio; 15730Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 15740Sstevel@tonic-gate return (EINVAL); 15750Sstevel@tonic-gate 15760Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 15770Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent); 15780Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 15790Sstevel@tonic-gate else 15800Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent); 15810Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 15820Sstevel@tonic-gate 15830Sstevel@tonic-gate if (ssize == 0) 15840Sstevel@tonic-gate return (EINVAL); 15850Sstevel@tonic-gate 15860Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP); 15870Sstevel@tonic-gate 15880Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 15890Sstevel@tonic-gate ucbp = (aiocb_t **)cbplist; 15900Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 15910Sstevel@tonic-gate else 15920Sstevel@tonic-gate ucbp32 = (caddr32_t *)cbplist; 15930Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 15940Sstevel@tonic-gate 15950Sstevel@tonic-gate if (copyin(aiocb, cbplist, ssize)) { 15960Sstevel@tonic-gate error = EFAULT; 15970Sstevel@tonic-gate goto done; 15980Sstevel@tonic-gate } 15990Sstevel@tonic-gate 16000Sstevel@tonic-gate /* 16010Sstevel@tonic-gate * To find the list head, we go through the 16020Sstevel@tonic-gate * list of aiocb structs, find the request 16030Sstevel@tonic-gate * its for, then get the list head that reqp 16040Sstevel@tonic-gate * points to 16050Sstevel@tonic-gate */ 16060Sstevel@tonic-gate head = NULL; 16070Sstevel@tonic-gate 16080Sstevel@tonic-gate for (i = 0; i < nent; i++) { 16090Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 16100Sstevel@tonic-gate /* 16110Sstevel@tonic-gate * Since we are only checking for a NULL pointer 16120Sstevel@tonic-gate * Following should work on both native data sizes 16130Sstevel@tonic-gate * as well as for largefile aiocb. 16140Sstevel@tonic-gate */ 16150Sstevel@tonic-gate if ((cbp = *ucbp++) == NULL) 16160Sstevel@tonic-gate continue; 16170Sstevel@tonic-gate if (run_mode != AIO_LARGEFILE) 16180Sstevel@tonic-gate if (head = aio_list_get(&cbp->aio_resultp)) 16190Sstevel@tonic-gate break; 16200Sstevel@tonic-gate else { 16210Sstevel@tonic-gate /* 16220Sstevel@tonic-gate * This is a case when largefile call is 16230Sstevel@tonic-gate * made on 32 bit kernel. 16240Sstevel@tonic-gate * Treat each pointer as pointer to 16250Sstevel@tonic-gate * aiocb64_32 16260Sstevel@tonic-gate */ 16270Sstevel@tonic-gate if (head = aio_list_get((aio_result_t *) 16280Sstevel@tonic-gate &(((aiocb64_32_t *)cbp)->aio_resultp))) 16290Sstevel@tonic-gate break; 16300Sstevel@tonic-gate } 16310Sstevel@tonic-gate } 16320Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 16330Sstevel@tonic-gate else { 16340Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) { 16350Sstevel@tonic-gate if ((cbp64 = (aiocb64_32_t *) 16360Sstevel@tonic-gate (uintptr_t)*ucbp32++) == NULL) 16370Sstevel@tonic-gate continue; 16380Sstevel@tonic-gate if (head = aio_list_get((aio_result_t *) 16390Sstevel@tonic-gate &cbp64->aio_resultp)) 16400Sstevel@tonic-gate break; 16410Sstevel@tonic-gate } else if (run_mode == AIO_32) { 16420Sstevel@tonic-gate if ((cbp32 = (aiocb32_t *) 16430Sstevel@tonic-gate (uintptr_t)*ucbp32++) == NULL) 16440Sstevel@tonic-gate continue; 16450Sstevel@tonic-gate if (head = aio_list_get((aio_result_t *) 16460Sstevel@tonic-gate &cbp32->aio_resultp)) 16470Sstevel@tonic-gate break; 16480Sstevel@tonic-gate } 16490Sstevel@tonic-gate } 16500Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 16510Sstevel@tonic-gate } 16520Sstevel@tonic-gate 16530Sstevel@tonic-gate if (head == NULL) { 16540Sstevel@tonic-gate error = EINVAL; 16550Sstevel@tonic-gate goto done; 16560Sstevel@tonic-gate } 16570Sstevel@tonic-gate 16580Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 16590Sstevel@tonic-gate while (head->lio_refcnt > 0) { 16600Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 16610Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 16620Sstevel@tonic-gate error = EINTR; 16630Sstevel@tonic-gate goto done; 16640Sstevel@tonic-gate } 16650Sstevel@tonic-gate } 16660Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 16670Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 16680Sstevel@tonic-gate done: 16690Sstevel@tonic-gate kmem_free(cbplist, ssize); 16700Sstevel@tonic-gate return (error); 16710Sstevel@tonic-gate } 16720Sstevel@tonic-gate 16730Sstevel@tonic-gate aio_lio_t * 16740Sstevel@tonic-gate aio_list_get(aio_result_t *resultp) 16750Sstevel@tonic-gate { 16760Sstevel@tonic-gate aio_lio_t *head = NULL; 16770Sstevel@tonic-gate aio_t *aiop; 16780Sstevel@tonic-gate aio_req_t **bucket; 16790Sstevel@tonic-gate aio_req_t *reqp; 16800Sstevel@tonic-gate long index; 16810Sstevel@tonic-gate 16820Sstevel@tonic-gate aiop = curproc->p_aio; 16830Sstevel@tonic-gate if (aiop == NULL) 16840Sstevel@tonic-gate return (NULL); 16850Sstevel@tonic-gate 16860Sstevel@tonic-gate if (resultp) { 16870Sstevel@tonic-gate index = AIO_HASH(resultp); 16880Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 16890Sstevel@tonic-gate for (reqp = *bucket; reqp != NULL; 16900Sstevel@tonic-gate reqp = reqp->aio_hash_next) { 16910Sstevel@tonic-gate if (reqp->aio_req_resultp == resultp) { 16920Sstevel@tonic-gate head = reqp->aio_req_lio; 16930Sstevel@tonic-gate return (head); 16940Sstevel@tonic-gate } 16950Sstevel@tonic-gate } 16960Sstevel@tonic-gate } 16970Sstevel@tonic-gate return (NULL); 16980Sstevel@tonic-gate } 16990Sstevel@tonic-gate 17000Sstevel@tonic-gate 17010Sstevel@tonic-gate static void 17020Sstevel@tonic-gate lio_set_uerror(void *resultp, int error) 17030Sstevel@tonic-gate { 17040Sstevel@tonic-gate /* 17050Sstevel@tonic-gate * the resultp field is a pointer to where the 17060Sstevel@tonic-gate * error should be written out to the user's 17070Sstevel@tonic-gate * aiocb. 17080Sstevel@tonic-gate * 17090Sstevel@tonic-gate */ 17100Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 17110Sstevel@tonic-gate (void) sulword(&((aio_result_t *)resultp)->aio_return, 17120Sstevel@tonic-gate (ssize_t)-1); 17130Sstevel@tonic-gate (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 17140Sstevel@tonic-gate } 17150Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 17160Sstevel@tonic-gate else { 17170Sstevel@tonic-gate (void) suword32(&((aio_result32_t *)resultp)->aio_return, 17180Sstevel@tonic-gate (uint_t)-1); 17190Sstevel@tonic-gate (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 17200Sstevel@tonic-gate } 17210Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 17220Sstevel@tonic-gate } 17230Sstevel@tonic-gate 17240Sstevel@tonic-gate /* 17250Sstevel@tonic-gate * do cleanup completion for all requests in list. memory for 17260Sstevel@tonic-gate * each request is also freed. 17270Sstevel@tonic-gate */ 17280Sstevel@tonic-gate static void 17290Sstevel@tonic-gate alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 17300Sstevel@tonic-gate { 17310Sstevel@tonic-gate int i; 17320Sstevel@tonic-gate aio_req_t *reqp; 17330Sstevel@tonic-gate aio_result_t *resultp; 17341885Sraf aiocb64_32_t *aiocb_64; 17350Sstevel@tonic-gate 17360Sstevel@tonic-gate for (i = 0; i < nent; i++) { 17370Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 17380Sstevel@tonic-gate if (cbp[i] == NULL) 17390Sstevel@tonic-gate continue; 17400Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) { 17410Sstevel@tonic-gate aiocb_64 = (aiocb64_32_t *)cbp[i]; 17421885Sraf resultp = (aio_result_t *) 17431885Sraf &aiocb_64->aio_resultp; 17440Sstevel@tonic-gate } else 17450Sstevel@tonic-gate resultp = &cbp[i]->aio_resultp; 17460Sstevel@tonic-gate } 17470Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 17480Sstevel@tonic-gate else { 17491885Sraf aiocb32_t *aiocb_32; 17501885Sraf caddr32_t *cbp32; 17510Sstevel@tonic-gate 17520Sstevel@tonic-gate cbp32 = (caddr32_t *)cbp; 17530Sstevel@tonic-gate if (cbp32[i] == NULL) 17540Sstevel@tonic-gate continue; 17550Sstevel@tonic-gate if (run_mode == AIO_32) { 17560Sstevel@tonic-gate aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 17570Sstevel@tonic-gate resultp = (aio_result_t *)&aiocb_32-> 17580Sstevel@tonic-gate aio_resultp; 17590Sstevel@tonic-gate } else if (run_mode == AIO_LARGEFILE) { 17600Sstevel@tonic-gate aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 17610Sstevel@tonic-gate resultp = (aio_result_t *)&aiocb_64-> 17620Sstevel@tonic-gate aio_resultp; 17630Sstevel@tonic-gate } 17640Sstevel@tonic-gate } 17650Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 17660Sstevel@tonic-gate /* 17670Sstevel@tonic-gate * we need to get the aio_cleanupq_mutex since we call 17680Sstevel@tonic-gate * aio_req_done(). 17690Sstevel@tonic-gate */ 17700Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 17710Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 17720Sstevel@tonic-gate reqp = aio_req_done(resultp); 17730Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 17740Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 17750Sstevel@tonic-gate if (reqp != NULL) { 17760Sstevel@tonic-gate aphysio_unlock(reqp); 17770Sstevel@tonic-gate aio_copyout_result(reqp); 17780Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 17790Sstevel@tonic-gate aio_req_free(aiop, reqp); 17800Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 17810Sstevel@tonic-gate } 17820Sstevel@tonic-gate } 17830Sstevel@tonic-gate } 17840Sstevel@tonic-gate 17850Sstevel@tonic-gate /* 17861885Sraf * Write out the results for an aio request that is done. 17870Sstevel@tonic-gate */ 17880Sstevel@tonic-gate static int 17890Sstevel@tonic-gate aioerror(void *cb, int run_mode) 17900Sstevel@tonic-gate { 17910Sstevel@tonic-gate aio_result_t *resultp; 17920Sstevel@tonic-gate aio_t *aiop; 17930Sstevel@tonic-gate aio_req_t *reqp; 17940Sstevel@tonic-gate int retval; 17950Sstevel@tonic-gate 17960Sstevel@tonic-gate aiop = curproc->p_aio; 17970Sstevel@tonic-gate if (aiop == NULL || cb == NULL) 17980Sstevel@tonic-gate return (EINVAL); 17990Sstevel@tonic-gate 18000Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 18010Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) 18020Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 18030Sstevel@tonic-gate aio_resultp; 18040Sstevel@tonic-gate else 18050Sstevel@tonic-gate resultp = &((aiocb_t *)cb)->aio_resultp; 18060Sstevel@tonic-gate } 18070Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 18080Sstevel@tonic-gate else { 18090Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) 18100Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 18110Sstevel@tonic-gate aio_resultp; 18120Sstevel@tonic-gate else if (run_mode == AIO_32) 18130Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 18140Sstevel@tonic-gate aio_resultp; 18150Sstevel@tonic-gate } 18160Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 18170Sstevel@tonic-gate /* 18180Sstevel@tonic-gate * we need to get the aio_cleanupq_mutex since we call 18190Sstevel@tonic-gate * aio_req_find(). 18200Sstevel@tonic-gate */ 18210Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 18220Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 18230Sstevel@tonic-gate retval = aio_req_find(resultp, &reqp); 18240Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 18250Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 18260Sstevel@tonic-gate if (retval == 0) { 18270Sstevel@tonic-gate aphysio_unlock(reqp); 18280Sstevel@tonic-gate aio_copyout_result(reqp); 18290Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 18300Sstevel@tonic-gate aio_req_free(aiop, reqp); 18310Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 18320Sstevel@tonic-gate return (0); 18330Sstevel@tonic-gate } else if (retval == 1) 18340Sstevel@tonic-gate return (EINPROGRESS); 18350Sstevel@tonic-gate else if (retval == 2) 18360Sstevel@tonic-gate return (EINVAL); 18370Sstevel@tonic-gate return (0); 18380Sstevel@tonic-gate } 18390Sstevel@tonic-gate 18400Sstevel@tonic-gate /* 18410Sstevel@tonic-gate * aio_cancel - if no requests outstanding, 18420Sstevel@tonic-gate * return AIO_ALLDONE 18430Sstevel@tonic-gate * else 18440Sstevel@tonic-gate * return AIO_NOTCANCELED 18450Sstevel@tonic-gate */ 18460Sstevel@tonic-gate static int 18470Sstevel@tonic-gate aio_cancel( 18480Sstevel@tonic-gate int fildes, 18490Sstevel@tonic-gate void *cb, 18500Sstevel@tonic-gate long *rval, 18510Sstevel@tonic-gate int run_mode) 18520Sstevel@tonic-gate { 18530Sstevel@tonic-gate aio_t *aiop; 18540Sstevel@tonic-gate void *resultp; 18550Sstevel@tonic-gate int index; 18560Sstevel@tonic-gate aio_req_t **bucket; 18570Sstevel@tonic-gate aio_req_t *ent; 18580Sstevel@tonic-gate 18590Sstevel@tonic-gate 18600Sstevel@tonic-gate /* 18610Sstevel@tonic-gate * Verify valid file descriptor 18620Sstevel@tonic-gate */ 18630Sstevel@tonic-gate if ((getf(fildes)) == NULL) { 18640Sstevel@tonic-gate return (EBADF); 18650Sstevel@tonic-gate } 18660Sstevel@tonic-gate releasef(fildes); 18670Sstevel@tonic-gate 18680Sstevel@tonic-gate aiop = curproc->p_aio; 18690Sstevel@tonic-gate if (aiop == NULL) 18700Sstevel@tonic-gate return (EINVAL); 18710Sstevel@tonic-gate 18720Sstevel@tonic-gate if (aiop->aio_outstanding == 0) { 18730Sstevel@tonic-gate *rval = AIO_ALLDONE; 18740Sstevel@tonic-gate return (0); 18750Sstevel@tonic-gate } 18760Sstevel@tonic-gate 18770Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 18780Sstevel@tonic-gate if (cb != NULL) { 18790Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 18800Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) 18810Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 18820Sstevel@tonic-gate ->aio_resultp; 18830Sstevel@tonic-gate else 18840Sstevel@tonic-gate resultp = &((aiocb_t *)cb)->aio_resultp; 18850Sstevel@tonic-gate } 18860Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 18870Sstevel@tonic-gate else { 18880Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) 18890Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 18900Sstevel@tonic-gate ->aio_resultp; 18910Sstevel@tonic-gate else if (run_mode == AIO_32) 18920Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb32_t *)cb) 18930Sstevel@tonic-gate ->aio_resultp; 18940Sstevel@tonic-gate } 18950Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 18960Sstevel@tonic-gate index = AIO_HASH(resultp); 18970Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 18980Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 18990Sstevel@tonic-gate if (ent->aio_req_resultp == resultp) { 19000Sstevel@tonic-gate if ((ent->aio_req_flags & AIO_PENDING) == 0) { 19010Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19020Sstevel@tonic-gate *rval = AIO_ALLDONE; 19030Sstevel@tonic-gate return (0); 19040Sstevel@tonic-gate } 19050Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19060Sstevel@tonic-gate *rval = AIO_NOTCANCELED; 19070Sstevel@tonic-gate return (0); 19080Sstevel@tonic-gate } 19090Sstevel@tonic-gate } 19100Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19110Sstevel@tonic-gate *rval = AIO_ALLDONE; 19120Sstevel@tonic-gate return (0); 19130Sstevel@tonic-gate } 19140Sstevel@tonic-gate 19150Sstevel@tonic-gate for (index = 0; index < AIO_HASHSZ; index++) { 19160Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 19170Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 19180Sstevel@tonic-gate if (ent->aio_req_fd == fildes) { 19190Sstevel@tonic-gate if ((ent->aio_req_flags & AIO_PENDING) != 0) { 19200Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19210Sstevel@tonic-gate *rval = AIO_NOTCANCELED; 19220Sstevel@tonic-gate return (0); 19230Sstevel@tonic-gate } 19240Sstevel@tonic-gate } 19250Sstevel@tonic-gate } 19260Sstevel@tonic-gate } 19270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19280Sstevel@tonic-gate *rval = AIO_ALLDONE; 19290Sstevel@tonic-gate return (0); 19300Sstevel@tonic-gate } 19310Sstevel@tonic-gate 19320Sstevel@tonic-gate /* 19330Sstevel@tonic-gate * solaris version of asynchronous read and write 19340Sstevel@tonic-gate */ 19350Sstevel@tonic-gate static int 19360Sstevel@tonic-gate arw( 19370Sstevel@tonic-gate int opcode, 19380Sstevel@tonic-gate int fdes, 19390Sstevel@tonic-gate char *bufp, 19400Sstevel@tonic-gate int bufsize, 19410Sstevel@tonic-gate offset_t offset, 19420Sstevel@tonic-gate aio_result_t *resultp, 19430Sstevel@tonic-gate int mode) 19440Sstevel@tonic-gate { 19450Sstevel@tonic-gate file_t *fp; 19460Sstevel@tonic-gate int error; 19470Sstevel@tonic-gate struct vnode *vp; 19480Sstevel@tonic-gate aio_req_t *reqp; 19490Sstevel@tonic-gate aio_t *aiop; 19500Sstevel@tonic-gate int (*aio_func)(); 19510Sstevel@tonic-gate #ifdef _LP64 19520Sstevel@tonic-gate aiocb_t aiocb; 19530Sstevel@tonic-gate #else 19540Sstevel@tonic-gate aiocb64_32_t aiocb64; 19550Sstevel@tonic-gate #endif 19560Sstevel@tonic-gate 19570Sstevel@tonic-gate aiop = curproc->p_aio; 19580Sstevel@tonic-gate if (aiop == NULL) 19590Sstevel@tonic-gate return (EINVAL); 19600Sstevel@tonic-gate 19610Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) { 19620Sstevel@tonic-gate return (EBADF); 19630Sstevel@tonic-gate } 19640Sstevel@tonic-gate 19650Sstevel@tonic-gate /* 19660Sstevel@tonic-gate * check the permission of the partition 19670Sstevel@tonic-gate */ 19680Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 19690Sstevel@tonic-gate releasef(fdes); 19700Sstevel@tonic-gate return (EBADF); 19710Sstevel@tonic-gate } 19720Sstevel@tonic-gate 19730Sstevel@tonic-gate vp = fp->f_vnode; 19740Sstevel@tonic-gate aio_func = check_vp(vp, mode); 19750Sstevel@tonic-gate if (aio_func == NULL) { 19760Sstevel@tonic-gate releasef(fdes); 19770Sstevel@tonic-gate return (EBADFD); 19780Sstevel@tonic-gate } 19790Sstevel@tonic-gate #ifdef _LP64 19800Sstevel@tonic-gate aiocb.aio_fildes = fdes; 19810Sstevel@tonic-gate aiocb.aio_buf = bufp; 19820Sstevel@tonic-gate aiocb.aio_nbytes = bufsize; 19830Sstevel@tonic-gate aiocb.aio_offset = offset; 19840Sstevel@tonic-gate aiocb.aio_sigevent.sigev_notify = 0; 19851885Sraf error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 19860Sstevel@tonic-gate #else 19870Sstevel@tonic-gate aiocb64.aio_fildes = fdes; 19880Sstevel@tonic-gate aiocb64.aio_buf = (caddr32_t)bufp; 19890Sstevel@tonic-gate aiocb64.aio_nbytes = bufsize; 19900Sstevel@tonic-gate aiocb64.aio_offset = offset; 19910Sstevel@tonic-gate aiocb64.aio_sigevent.sigev_notify = 0; 19921885Sraf error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 19930Sstevel@tonic-gate #endif 19940Sstevel@tonic-gate if (error) { 19950Sstevel@tonic-gate releasef(fdes); 19960Sstevel@tonic-gate return (error); 19970Sstevel@tonic-gate } 19980Sstevel@tonic-gate 19990Sstevel@tonic-gate /* 20000Sstevel@tonic-gate * enable polling on this request if the opcode has 20010Sstevel@tonic-gate * the AIO poll bit set 20020Sstevel@tonic-gate */ 20030Sstevel@tonic-gate if (opcode & AIO_POLL_BIT) 20040Sstevel@tonic-gate reqp->aio_req_flags |= AIO_POLL; 20050Sstevel@tonic-gate 20060Sstevel@tonic-gate if (bufsize == 0) { 20070Sstevel@tonic-gate clear_active_fd(fdes); 20080Sstevel@tonic-gate aio_zerolen(reqp); 20090Sstevel@tonic-gate return (0); 20100Sstevel@tonic-gate } 20110Sstevel@tonic-gate /* 20120Sstevel@tonic-gate * send the request to driver. 20130Sstevel@tonic-gate */ 20140Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 20150Sstevel@tonic-gate /* 20160Sstevel@tonic-gate * the fd is stored in the aio_req_t by aio_req_setup(), and 20170Sstevel@tonic-gate * is released by the aio_cleanup_thread() when the IO has 20180Sstevel@tonic-gate * completed. 20190Sstevel@tonic-gate */ 20200Sstevel@tonic-gate if (error) { 20210Sstevel@tonic-gate releasef(fdes); 20220Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 20230Sstevel@tonic-gate aio_req_free(aiop, reqp); 20240Sstevel@tonic-gate aiop->aio_pending--; 20250Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) 20260Sstevel@tonic-gate cv_signal(&aiop->aio_cleanupcv); 20270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 20280Sstevel@tonic-gate return (error); 20290Sstevel@tonic-gate } 20300Sstevel@tonic-gate clear_active_fd(fdes); 20310Sstevel@tonic-gate return (0); 20320Sstevel@tonic-gate } 20330Sstevel@tonic-gate 20340Sstevel@tonic-gate /* 20350Sstevel@tonic-gate * posix version of asynchronous read and write 20360Sstevel@tonic-gate */ 20371885Sraf static int 20380Sstevel@tonic-gate aiorw( 20390Sstevel@tonic-gate int opcode, 20400Sstevel@tonic-gate void *aiocb_arg, 20410Sstevel@tonic-gate int mode, 20420Sstevel@tonic-gate int run_mode) 20430Sstevel@tonic-gate { 20440Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 20450Sstevel@tonic-gate aiocb32_t aiocb32; 20460Sstevel@tonic-gate struct sigevent32 *sigev32; 20470Sstevel@tonic-gate port_notify32_t pntfy32; 20480Sstevel@tonic-gate #endif 20490Sstevel@tonic-gate aiocb64_32_t aiocb64; 20500Sstevel@tonic-gate aiocb_t aiocb; 20510Sstevel@tonic-gate file_t *fp; 20520Sstevel@tonic-gate int error, fd; 20530Sstevel@tonic-gate size_t bufsize; 20540Sstevel@tonic-gate struct vnode *vp; 20550Sstevel@tonic-gate aio_req_t *reqp; 20560Sstevel@tonic-gate aio_t *aiop; 20570Sstevel@tonic-gate int (*aio_func)(); 20580Sstevel@tonic-gate aio_result_t *resultp; 20590Sstevel@tonic-gate struct sigevent *sigev; 20600Sstevel@tonic-gate model_t model; 20610Sstevel@tonic-gate int aio_use_port = 0; 20620Sstevel@tonic-gate port_notify_t pntfy; 20630Sstevel@tonic-gate 20640Sstevel@tonic-gate model = get_udatamodel(); 20650Sstevel@tonic-gate aiop = curproc->p_aio; 20660Sstevel@tonic-gate if (aiop == NULL) 20670Sstevel@tonic-gate return (EINVAL); 20680Sstevel@tonic-gate 20690Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 20700Sstevel@tonic-gate if (run_mode != AIO_LARGEFILE) { 20710Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 20720Sstevel@tonic-gate return (EFAULT); 20730Sstevel@tonic-gate bufsize = aiocb.aio_nbytes; 20740Sstevel@tonic-gate resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 20750Sstevel@tonic-gate if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 20760Sstevel@tonic-gate return (EBADF); 20770Sstevel@tonic-gate } 20780Sstevel@tonic-gate sigev = &aiocb.aio_sigevent; 20790Sstevel@tonic-gate } else { 20800Sstevel@tonic-gate /* 20810Sstevel@tonic-gate * We come here only when we make largefile 20820Sstevel@tonic-gate * call on 32 bit kernel using 32 bit library. 20830Sstevel@tonic-gate */ 20840Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 20850Sstevel@tonic-gate return (EFAULT); 20860Sstevel@tonic-gate bufsize = aiocb64.aio_nbytes; 20870Sstevel@tonic-gate resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 20880Sstevel@tonic-gate ->aio_resultp); 20891885Sraf if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 20900Sstevel@tonic-gate return (EBADF); 20910Sstevel@tonic-gate sigev = (struct sigevent *)&aiocb64.aio_sigevent; 20920Sstevel@tonic-gate } 20930Sstevel@tonic-gate 20940Sstevel@tonic-gate if (sigev->sigev_notify == SIGEV_PORT) { 20950Sstevel@tonic-gate if (copyin((void *)sigev->sigev_value.sival_ptr, 20960Sstevel@tonic-gate &pntfy, sizeof (port_notify_t))) { 20970Sstevel@tonic-gate releasef(fd); 20980Sstevel@tonic-gate return (EFAULT); 20990Sstevel@tonic-gate } 21000Sstevel@tonic-gate aio_use_port = 1; 21011885Sraf } else if (sigev->sigev_notify == SIGEV_THREAD) { 21021885Sraf pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo; 21031885Sraf pntfy.portnfy_user = 21041885Sraf aiocb.aio_sigevent.sigev_value.sival_ptr; 21051885Sraf aio_use_port = 1; 21060Sstevel@tonic-gate } 21070Sstevel@tonic-gate } 21080Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 21090Sstevel@tonic-gate else { 21100Sstevel@tonic-gate if (run_mode == AIO_32) { 21110Sstevel@tonic-gate /* 32 bit system call is being made on 64 bit kernel */ 21120Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 21130Sstevel@tonic-gate return (EFAULT); 21140Sstevel@tonic-gate 21150Sstevel@tonic-gate bufsize = aiocb32.aio_nbytes; 21160Sstevel@tonic-gate aiocb_32ton(&aiocb32, &aiocb); 21170Sstevel@tonic-gate resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 21180Sstevel@tonic-gate aio_resultp); 21190Sstevel@tonic-gate if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 21200Sstevel@tonic-gate return (EBADF); 21210Sstevel@tonic-gate } 21220Sstevel@tonic-gate sigev32 = &aiocb32.aio_sigevent; 21230Sstevel@tonic-gate } else if (run_mode == AIO_LARGEFILE) { 21240Sstevel@tonic-gate /* 21250Sstevel@tonic-gate * We come here only when we make largefile 21260Sstevel@tonic-gate * call on 64 bit kernel using 32 bit library. 21270Sstevel@tonic-gate */ 21280Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 21290Sstevel@tonic-gate return (EFAULT); 21300Sstevel@tonic-gate bufsize = aiocb64.aio_nbytes; 21310Sstevel@tonic-gate aiocb_LFton(&aiocb64, &aiocb); 21320Sstevel@tonic-gate resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 21330Sstevel@tonic-gate ->aio_resultp); 21340Sstevel@tonic-gate if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 21350Sstevel@tonic-gate return (EBADF); 21360Sstevel@tonic-gate sigev32 = &aiocb64.aio_sigevent; 21370Sstevel@tonic-gate } 21380Sstevel@tonic-gate 21390Sstevel@tonic-gate if (sigev32->sigev_notify == SIGEV_PORT) { 21400Sstevel@tonic-gate if (copyin( 21410Sstevel@tonic-gate (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 21420Sstevel@tonic-gate &pntfy32, sizeof (port_notify32_t))) { 21430Sstevel@tonic-gate releasef(fd); 21440Sstevel@tonic-gate return (EFAULT); 21450Sstevel@tonic-gate } 21460Sstevel@tonic-gate pntfy.portnfy_port = pntfy32.portnfy_port; 21471885Sraf pntfy.portnfy_user = (void *)(uintptr_t) 21481885Sraf pntfy32.portnfy_user; 21491885Sraf aio_use_port = 1; 21501885Sraf } else if (sigev32->sigev_notify == SIGEV_THREAD) { 21511885Sraf pntfy.portnfy_port = sigev32->sigev_signo; 21521885Sraf pntfy.portnfy_user = (void *)(uintptr_t) 21531885Sraf sigev32->sigev_value.sival_ptr; 21540Sstevel@tonic-gate aio_use_port = 1; 21550Sstevel@tonic-gate } 21560Sstevel@tonic-gate } 21570Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 21580Sstevel@tonic-gate 21590Sstevel@tonic-gate /* 21600Sstevel@tonic-gate * check the permission of the partition 21610Sstevel@tonic-gate */ 21620Sstevel@tonic-gate 21630Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 21640Sstevel@tonic-gate releasef(fd); 21650Sstevel@tonic-gate return (EBADF); 21660Sstevel@tonic-gate } 21670Sstevel@tonic-gate 21680Sstevel@tonic-gate vp = fp->f_vnode; 21690Sstevel@tonic-gate aio_func = check_vp(vp, mode); 21700Sstevel@tonic-gate if (aio_func == NULL) { 21710Sstevel@tonic-gate releasef(fd); 21720Sstevel@tonic-gate return (EBADFD); 21730Sstevel@tonic-gate } 21741885Sraf if (run_mode == AIO_LARGEFILE) 21751885Sraf error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 21760Sstevel@tonic-gate else 21771885Sraf error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 21780Sstevel@tonic-gate 21790Sstevel@tonic-gate if (error) { 21800Sstevel@tonic-gate releasef(fd); 21810Sstevel@tonic-gate return (error); 21820Sstevel@tonic-gate } 21830Sstevel@tonic-gate /* 21840Sstevel@tonic-gate * enable polling on this request if the opcode has 21850Sstevel@tonic-gate * the AIO poll bit set 21860Sstevel@tonic-gate */ 21870Sstevel@tonic-gate if (opcode & AIO_POLL_BIT) 21880Sstevel@tonic-gate reqp->aio_req_flags |= AIO_POLL; 21890Sstevel@tonic-gate 21900Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 21910Sstevel@tonic-gate reqp->aio_req_iocb.iocb = aiocb_arg; 21920Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 21930Sstevel@tonic-gate else 21940Sstevel@tonic-gate reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 21950Sstevel@tonic-gate #endif 21960Sstevel@tonic-gate 21971885Sraf if (aio_use_port) { 21981885Sraf int event = (run_mode == AIO_LARGEFILE)? 21991885Sraf ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) : 22001885Sraf ((mode == FREAD)? AIOAREAD : AIOAWRITE); 22011885Sraf error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event); 22021885Sraf } 22030Sstevel@tonic-gate 22040Sstevel@tonic-gate /* 22050Sstevel@tonic-gate * send the request to driver. 22060Sstevel@tonic-gate */ 22070Sstevel@tonic-gate if (error == 0) { 22080Sstevel@tonic-gate if (bufsize == 0) { 22090Sstevel@tonic-gate clear_active_fd(fd); 22100Sstevel@tonic-gate aio_zerolen(reqp); 22110Sstevel@tonic-gate return (0); 22120Sstevel@tonic-gate } 22130Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 22140Sstevel@tonic-gate } 22150Sstevel@tonic-gate 22160Sstevel@tonic-gate /* 22170Sstevel@tonic-gate * the fd is stored in the aio_req_t by aio_req_setup(), and 22180Sstevel@tonic-gate * is released by the aio_cleanup_thread() when the IO has 22190Sstevel@tonic-gate * completed. 22200Sstevel@tonic-gate */ 22210Sstevel@tonic-gate if (error) { 22220Sstevel@tonic-gate releasef(fd); 22230Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 22244502Spraks if (aio_use_port) 22254502Spraks aio_deq(&aiop->aio_portpending, reqp); 22260Sstevel@tonic-gate aio_req_free(aiop, reqp); 22270Sstevel@tonic-gate aiop->aio_pending--; 22280Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) 22290Sstevel@tonic-gate cv_signal(&aiop->aio_cleanupcv); 22300Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 22310Sstevel@tonic-gate return (error); 22320Sstevel@tonic-gate } 22330Sstevel@tonic-gate clear_active_fd(fd); 22340Sstevel@tonic-gate return (0); 22350Sstevel@tonic-gate } 22360Sstevel@tonic-gate 22370Sstevel@tonic-gate 22380Sstevel@tonic-gate /* 22390Sstevel@tonic-gate * set error for a list IO entry that failed. 22400Sstevel@tonic-gate */ 22410Sstevel@tonic-gate static void 22424502Spraks lio_set_error(aio_req_t *reqp, int portused) 22430Sstevel@tonic-gate { 22440Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 22450Sstevel@tonic-gate 22460Sstevel@tonic-gate if (aiop == NULL) 22470Sstevel@tonic-gate return; 22480Sstevel@tonic-gate 22490Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 22504502Spraks if (portused) 22514502Spraks aio_deq(&aiop->aio_portpending, reqp); 22520Sstevel@tonic-gate aiop->aio_pending--; 22530Sstevel@tonic-gate /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 22540Sstevel@tonic-gate reqp->aio_req_flags |= AIO_PHYSIODONE; 22550Sstevel@tonic-gate /* 22560Sstevel@tonic-gate * Need to free the request now as its never 22570Sstevel@tonic-gate * going to get on the done queue 22580Sstevel@tonic-gate * 22590Sstevel@tonic-gate * Note: aio_outstanding is decremented in 22600Sstevel@tonic-gate * aio_req_free() 22610Sstevel@tonic-gate */ 22620Sstevel@tonic-gate aio_req_free(aiop, reqp); 22630Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) 22640Sstevel@tonic-gate cv_signal(&aiop->aio_cleanupcv); 22650Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 22660Sstevel@tonic-gate } 22670Sstevel@tonic-gate 22680Sstevel@tonic-gate /* 22690Sstevel@tonic-gate * check if a specified request is done, and remove it from 22700Sstevel@tonic-gate * the done queue. otherwise remove anybody from the done queue 22710Sstevel@tonic-gate * if NULL is specified. 22720Sstevel@tonic-gate */ 22730Sstevel@tonic-gate static aio_req_t * 22740Sstevel@tonic-gate aio_req_done(void *resultp) 22750Sstevel@tonic-gate { 22760Sstevel@tonic-gate aio_req_t **bucket; 22770Sstevel@tonic-gate aio_req_t *ent; 22780Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 22790Sstevel@tonic-gate long index; 22800Sstevel@tonic-gate 22810Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 22820Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 22830Sstevel@tonic-gate 22840Sstevel@tonic-gate if (resultp) { 22850Sstevel@tonic-gate index = AIO_HASH(resultp); 22860Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 22870Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 22880Sstevel@tonic-gate if (ent->aio_req_resultp == (aio_result_t *)resultp) { 22890Sstevel@tonic-gate if (ent->aio_req_flags & AIO_DONEQ) { 22900Sstevel@tonic-gate return (aio_req_remove(ent)); 22910Sstevel@tonic-gate } 22920Sstevel@tonic-gate return (NULL); 22930Sstevel@tonic-gate } 22940Sstevel@tonic-gate } 22950Sstevel@tonic-gate /* no match, resultp is invalid */ 22960Sstevel@tonic-gate return (NULL); 22970Sstevel@tonic-gate } 22980Sstevel@tonic-gate return (aio_req_remove(NULL)); 22990Sstevel@tonic-gate } 23000Sstevel@tonic-gate 23010Sstevel@tonic-gate /* 23020Sstevel@tonic-gate * determine if a user-level resultp pointer is associated with an 23030Sstevel@tonic-gate * active IO request. Zero is returned when the request is done, 23040Sstevel@tonic-gate * and the request is removed from the done queue. Only when the 23050Sstevel@tonic-gate * return value is zero, is the "reqp" pointer valid. One is returned 23060Sstevel@tonic-gate * when the request is inprogress. Two is returned when the request 23070Sstevel@tonic-gate * is invalid. 23080Sstevel@tonic-gate */ 23090Sstevel@tonic-gate static int 23100Sstevel@tonic-gate aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 23110Sstevel@tonic-gate { 23120Sstevel@tonic-gate aio_req_t **bucket; 23130Sstevel@tonic-gate aio_req_t *ent; 23140Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 23150Sstevel@tonic-gate long index; 23160Sstevel@tonic-gate 23170Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 23180Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 23190Sstevel@tonic-gate 23200Sstevel@tonic-gate index = AIO_HASH(resultp); 23210Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 23220Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 23230Sstevel@tonic-gate if (ent->aio_req_resultp == resultp) { 23240Sstevel@tonic-gate if (ent->aio_req_flags & AIO_DONEQ) { 23250Sstevel@tonic-gate *reqp = aio_req_remove(ent); 23260Sstevel@tonic-gate return (0); 23270Sstevel@tonic-gate } 23280Sstevel@tonic-gate return (1); 23290Sstevel@tonic-gate } 23300Sstevel@tonic-gate } 23310Sstevel@tonic-gate /* no match, resultp is invalid */ 23320Sstevel@tonic-gate return (2); 23330Sstevel@tonic-gate } 23340Sstevel@tonic-gate 23350Sstevel@tonic-gate /* 23360Sstevel@tonic-gate * remove a request from the done queue. 23370Sstevel@tonic-gate */ 23380Sstevel@tonic-gate static aio_req_t * 23390Sstevel@tonic-gate aio_req_remove(aio_req_t *reqp) 23400Sstevel@tonic-gate { 23410Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 23420Sstevel@tonic-gate 23430Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 23440Sstevel@tonic-gate 23451885Sraf if (reqp != NULL) { 23460Sstevel@tonic-gate ASSERT(reqp->aio_req_flags & AIO_DONEQ); 23470Sstevel@tonic-gate if (reqp->aio_req_next == reqp) { 23480Sstevel@tonic-gate /* only one request on queue */ 23490Sstevel@tonic-gate if (reqp == aiop->aio_doneq) { 23500Sstevel@tonic-gate aiop->aio_doneq = NULL; 23510Sstevel@tonic-gate } else { 23520Sstevel@tonic-gate ASSERT(reqp == aiop->aio_cleanupq); 23530Sstevel@tonic-gate aiop->aio_cleanupq = NULL; 23540Sstevel@tonic-gate } 23550Sstevel@tonic-gate } else { 23560Sstevel@tonic-gate reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 23570Sstevel@tonic-gate reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 23580Sstevel@tonic-gate /* 23590Sstevel@tonic-gate * The request can be either on the aio_doneq or the 23600Sstevel@tonic-gate * aio_cleanupq 23610Sstevel@tonic-gate */ 23620Sstevel@tonic-gate if (reqp == aiop->aio_doneq) 23630Sstevel@tonic-gate aiop->aio_doneq = reqp->aio_req_next; 23640Sstevel@tonic-gate 23650Sstevel@tonic-gate if (reqp == aiop->aio_cleanupq) 23660Sstevel@tonic-gate aiop->aio_cleanupq = reqp->aio_req_next; 23670Sstevel@tonic-gate } 23680Sstevel@tonic-gate reqp->aio_req_flags &= ~AIO_DONEQ; 23691885Sraf reqp->aio_req_next = NULL; 23701885Sraf reqp->aio_req_prev = NULL; 23711885Sraf } else if ((reqp = aiop->aio_doneq) != NULL) { 23721885Sraf ASSERT(reqp->aio_req_flags & AIO_DONEQ); 23731885Sraf if (reqp == reqp->aio_req_next) { 23740Sstevel@tonic-gate /* only one request on queue */ 23750Sstevel@tonic-gate aiop->aio_doneq = NULL; 23760Sstevel@tonic-gate } else { 23771885Sraf reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 23781885Sraf reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 23791885Sraf aiop->aio_doneq = reqp->aio_req_next; 23800Sstevel@tonic-gate } 23811885Sraf reqp->aio_req_flags &= ~AIO_DONEQ; 23821885Sraf reqp->aio_req_next = NULL; 23831885Sraf reqp->aio_req_prev = NULL; 23840Sstevel@tonic-gate } 23851885Sraf if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN)) 23861885Sraf cv_broadcast(&aiop->aio_waitcv); 23871885Sraf return (reqp); 23880Sstevel@tonic-gate } 23890Sstevel@tonic-gate 23900Sstevel@tonic-gate static int 23910Sstevel@tonic-gate aio_req_setup( 23920Sstevel@tonic-gate aio_req_t **reqpp, 23930Sstevel@tonic-gate aio_t *aiop, 23940Sstevel@tonic-gate aiocb_t *arg, 23950Sstevel@tonic-gate aio_result_t *resultp, 23960Sstevel@tonic-gate vnode_t *vp) 23970Sstevel@tonic-gate { 23981885Sraf sigqueue_t *sqp = NULL; 23990Sstevel@tonic-gate aio_req_t *reqp; 24000Sstevel@tonic-gate struct uio *uio; 24010Sstevel@tonic-gate struct sigevent *sigev; 24020Sstevel@tonic-gate int error; 24030Sstevel@tonic-gate 24040Sstevel@tonic-gate sigev = &arg->aio_sigevent; 24051885Sraf if (sigev->sigev_notify == SIGEV_SIGNAL && 24061885Sraf sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 24070Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 24080Sstevel@tonic-gate if (sqp == NULL) 24090Sstevel@tonic-gate return (EAGAIN); 24100Sstevel@tonic-gate sqp->sq_func = NULL; 24110Sstevel@tonic-gate sqp->sq_next = NULL; 24120Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 24130Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 24140Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 24150Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 24160Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 24170Sstevel@tonic-gate sqp->sq_info.si_signo = sigev->sigev_signo; 24180Sstevel@tonic-gate sqp->sq_info.si_value = sigev->sigev_value; 24191885Sraf } 24200Sstevel@tonic-gate 24210Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 24220Sstevel@tonic-gate 24230Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) { 24240Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 24250Sstevel@tonic-gate if (sqp) 24260Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t)); 24270Sstevel@tonic-gate return (EIO); 24280Sstevel@tonic-gate } 24290Sstevel@tonic-gate /* 24300Sstevel@tonic-gate * get an aio_reqp from the free list or allocate one 24310Sstevel@tonic-gate * from dynamic memory. 24320Sstevel@tonic-gate */ 24330Sstevel@tonic-gate if (error = aio_req_alloc(&reqp, resultp)) { 24340Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 24350Sstevel@tonic-gate if (sqp) 24360Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t)); 24370Sstevel@tonic-gate return (error); 24380Sstevel@tonic-gate } 24390Sstevel@tonic-gate aiop->aio_pending++; 24400Sstevel@tonic-gate aiop->aio_outstanding++; 24410Sstevel@tonic-gate reqp->aio_req_flags = AIO_PENDING; 24421885Sraf if (sigev->sigev_notify == SIGEV_THREAD || 24431885Sraf sigev->sigev_notify == SIGEV_PORT) 24441885Sraf aio_enq(&aiop->aio_portpending, reqp, 0); 24450Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 24460Sstevel@tonic-gate /* 24470Sstevel@tonic-gate * initialize aio request. 24480Sstevel@tonic-gate */ 24490Sstevel@tonic-gate reqp->aio_req_fd = arg->aio_fildes; 24500Sstevel@tonic-gate reqp->aio_req_sigqp = sqp; 24510Sstevel@tonic-gate reqp->aio_req_iocb.iocb = NULL; 24521885Sraf reqp->aio_req_lio = NULL; 24530Sstevel@tonic-gate reqp->aio_req_buf.b_file = vp; 24540Sstevel@tonic-gate uio = reqp->aio_req.aio_uio; 24550Sstevel@tonic-gate uio->uio_iovcnt = 1; 24560Sstevel@tonic-gate uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 24570Sstevel@tonic-gate uio->uio_iov->iov_len = arg->aio_nbytes; 24580Sstevel@tonic-gate uio->uio_loffset = arg->aio_offset; 24590Sstevel@tonic-gate *reqpp = reqp; 24600Sstevel@tonic-gate return (0); 24610Sstevel@tonic-gate } 24620Sstevel@tonic-gate 24630Sstevel@tonic-gate /* 24640Sstevel@tonic-gate * Allocate p_aio struct. 24650Sstevel@tonic-gate */ 24660Sstevel@tonic-gate static aio_t * 24670Sstevel@tonic-gate aio_aiop_alloc(void) 24680Sstevel@tonic-gate { 24690Sstevel@tonic-gate aio_t *aiop; 24700Sstevel@tonic-gate 24710Sstevel@tonic-gate ASSERT(MUTEX_HELD(&curproc->p_lock)); 24720Sstevel@tonic-gate 24730Sstevel@tonic-gate aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 24740Sstevel@tonic-gate if (aiop) { 24750Sstevel@tonic-gate mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 24760Sstevel@tonic-gate mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 24774502Spraks NULL); 24780Sstevel@tonic-gate mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 24790Sstevel@tonic-gate } 24800Sstevel@tonic-gate return (aiop); 24810Sstevel@tonic-gate } 24820Sstevel@tonic-gate 24830Sstevel@tonic-gate /* 24840Sstevel@tonic-gate * Allocate an aio_req struct. 24850Sstevel@tonic-gate */ 24860Sstevel@tonic-gate static int 24870Sstevel@tonic-gate aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 24880Sstevel@tonic-gate { 24890Sstevel@tonic-gate aio_req_t *reqp; 24900Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 24910Sstevel@tonic-gate 24920Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 24930Sstevel@tonic-gate 24940Sstevel@tonic-gate if ((reqp = aiop->aio_free) != NULL) { 24950Sstevel@tonic-gate aiop->aio_free = reqp->aio_req_next; 24961885Sraf bzero(reqp, sizeof (*reqp)); 24970Sstevel@tonic-gate } else { 24980Sstevel@tonic-gate /* 24990Sstevel@tonic-gate * Check whether memory is getting tight. 25000Sstevel@tonic-gate * This is a temporary mechanism to avoid memory 25010Sstevel@tonic-gate * exhaustion by a single process until we come up 25020Sstevel@tonic-gate * with a per process solution such as setrlimit(). 25030Sstevel@tonic-gate */ 25040Sstevel@tonic-gate if (freemem < desfree) 25050Sstevel@tonic-gate return (EAGAIN); 25060Sstevel@tonic-gate reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 25070Sstevel@tonic-gate if (reqp == NULL) 25080Sstevel@tonic-gate return (EAGAIN); 25090Sstevel@tonic-gate } 25101885Sraf reqp->aio_req.aio_uio = &reqp->aio_req_uio; 25111885Sraf reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov; 25121885Sraf reqp->aio_req.aio_private = reqp; 25130Sstevel@tonic-gate reqp->aio_req_buf.b_offset = -1; 25140Sstevel@tonic-gate reqp->aio_req_resultp = resultp; 25150Sstevel@tonic-gate if (aio_hash_insert(reqp, aiop)) { 25160Sstevel@tonic-gate reqp->aio_req_next = aiop->aio_free; 25170Sstevel@tonic-gate aiop->aio_free = reqp; 25180Sstevel@tonic-gate return (EINVAL); 25190Sstevel@tonic-gate } 25200Sstevel@tonic-gate *nreqp = reqp; 25210Sstevel@tonic-gate return (0); 25220Sstevel@tonic-gate } 25230Sstevel@tonic-gate 25240Sstevel@tonic-gate /* 25250Sstevel@tonic-gate * Allocate an aio_lio_t struct. 25260Sstevel@tonic-gate */ 25270Sstevel@tonic-gate static int 25280Sstevel@tonic-gate aio_lio_alloc(aio_lio_t **head) 25290Sstevel@tonic-gate { 25300Sstevel@tonic-gate aio_lio_t *liop; 25310Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 25320Sstevel@tonic-gate 25330Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 25340Sstevel@tonic-gate 25350Sstevel@tonic-gate if ((liop = aiop->aio_lio_free) != NULL) { 25360Sstevel@tonic-gate aiop->aio_lio_free = liop->lio_next; 25370Sstevel@tonic-gate } else { 25380Sstevel@tonic-gate /* 25390Sstevel@tonic-gate * Check whether memory is getting tight. 25400Sstevel@tonic-gate * This is a temporary mechanism to avoid memory 25410Sstevel@tonic-gate * exhaustion by a single process until we come up 25420Sstevel@tonic-gate * with a per process solution such as setrlimit(). 25430Sstevel@tonic-gate */ 25440Sstevel@tonic-gate if (freemem < desfree) 25450Sstevel@tonic-gate return (EAGAIN); 25460Sstevel@tonic-gate 25470Sstevel@tonic-gate liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 25480Sstevel@tonic-gate if (liop == NULL) 25490Sstevel@tonic-gate return (EAGAIN); 25500Sstevel@tonic-gate } 25510Sstevel@tonic-gate *head = liop; 25520Sstevel@tonic-gate return (0); 25530Sstevel@tonic-gate } 25540Sstevel@tonic-gate 25550Sstevel@tonic-gate /* 25560Sstevel@tonic-gate * this is a special per-process thread that is only activated if 25570Sstevel@tonic-gate * the process is unmapping a segment with outstanding aio. normally, 25580Sstevel@tonic-gate * the process will have completed the aio before unmapping the 25590Sstevel@tonic-gate * segment. If the process does unmap a segment with outstanding aio, 25600Sstevel@tonic-gate * this special thread will guarentee that the locked pages due to 25610Sstevel@tonic-gate * aphysio() are released, thereby permitting the segment to be 2562304Spraks * unmapped. In addition to this, the cleanup thread is woken up 2563304Spraks * during DR operations to release the locked pages. 25640Sstevel@tonic-gate */ 25650Sstevel@tonic-gate 25660Sstevel@tonic-gate static int 25670Sstevel@tonic-gate aio_cleanup_thread(aio_t *aiop) 25680Sstevel@tonic-gate { 25690Sstevel@tonic-gate proc_t *p = curproc; 25700Sstevel@tonic-gate struct as *as = p->p_as; 25710Sstevel@tonic-gate int poked = 0; 25720Sstevel@tonic-gate kcondvar_t *cvp; 25730Sstevel@tonic-gate int exit_flag = 0; 2574304Spraks int rqclnup = 0; 25750Sstevel@tonic-gate 25760Sstevel@tonic-gate sigfillset(&curthread->t_hold); 25770Sstevel@tonic-gate sigdiffset(&curthread->t_hold, &cantmask); 25780Sstevel@tonic-gate for (;;) { 25790Sstevel@tonic-gate /* 25800Sstevel@tonic-gate * if a segment is being unmapped, and the current 25810Sstevel@tonic-gate * process's done queue is not empty, then every request 25820Sstevel@tonic-gate * on the doneq with locked resources should be forced 25830Sstevel@tonic-gate * to release their locks. By moving the doneq request 25840Sstevel@tonic-gate * to the cleanupq, aio_cleanup() will process the cleanupq, 25850Sstevel@tonic-gate * and place requests back onto the doneq. All requests 25860Sstevel@tonic-gate * processed by aio_cleanup() will have their physical 25870Sstevel@tonic-gate * resources unlocked. 25880Sstevel@tonic-gate */ 25890Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 25900Sstevel@tonic-gate if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 25910Sstevel@tonic-gate aiop->aio_flags |= AIO_CLEANUP; 25920Sstevel@tonic-gate mutex_enter(&as->a_contents); 2593304Spraks if (aiop->aio_rqclnup) { 2594304Spraks aiop->aio_rqclnup = 0; 2595304Spraks rqclnup = 1; 2596304Spraks } 2597304Spraks 2598304Spraks if ((rqclnup || AS_ISUNMAPWAIT(as)) && 25991885Sraf aiop->aio_doneq) { 26000Sstevel@tonic-gate aio_req_t *doneqhead = aiop->aio_doneq; 26010Sstevel@tonic-gate mutex_exit(&as->a_contents); 26020Sstevel@tonic-gate aiop->aio_doneq = NULL; 26030Sstevel@tonic-gate aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 26040Sstevel@tonic-gate } else { 26050Sstevel@tonic-gate mutex_exit(&as->a_contents); 26060Sstevel@tonic-gate } 26070Sstevel@tonic-gate } 26080Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 26090Sstevel@tonic-gate aio_cleanup(AIO_CLEANUP_THREAD); 26100Sstevel@tonic-gate /* 26110Sstevel@tonic-gate * thread should block on the cleanupcv while 26120Sstevel@tonic-gate * AIO_CLEANUP is set. 26130Sstevel@tonic-gate */ 26140Sstevel@tonic-gate cvp = &aiop->aio_cleanupcv; 26150Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 26160Sstevel@tonic-gate 26170Sstevel@tonic-gate if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 26180Sstevel@tonic-gate aiop->aio_notifyq != NULL || 26190Sstevel@tonic-gate aiop->aio_portcleanupq != NULL) { 26200Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 26210Sstevel@tonic-gate continue; 26220Sstevel@tonic-gate } 26230Sstevel@tonic-gate mutex_enter(&as->a_contents); 26240Sstevel@tonic-gate 26250Sstevel@tonic-gate /* 26260Sstevel@tonic-gate * AIO_CLEANUP determines when the cleanup thread 2627304Spraks * should be active. This flag is set when 2628304Spraks * the cleanup thread is awakened by as_unmap() or 2629304Spraks * due to DR operations. 26300Sstevel@tonic-gate * The flag is cleared when the blocking as_unmap() 26310Sstevel@tonic-gate * that originally awakened us is allowed to 26320Sstevel@tonic-gate * complete. as_unmap() blocks when trying to 26330Sstevel@tonic-gate * unmap a segment that has SOFTLOCKed pages. when 26340Sstevel@tonic-gate * the segment's pages are all SOFTUNLOCKed, 2635304Spraks * as->a_flags & AS_UNMAPWAIT should be zero. 2636304Spraks * 2637304Spraks * In case of cleanup request by DR, the flag is cleared 2638304Spraks * once all the pending aio requests have been processed. 2639304Spraks * 2640304Spraks * The flag shouldn't be cleared right away if the 2641304Spraks * cleanup thread was interrupted because the process 2642304Spraks * is doing forkall(). This happens when cv_wait_sig() 2643304Spraks * returns zero, because it was awakened by a pokelwps(). 2644304Spraks * If the process is not exiting, it must be doing forkall(). 26450Sstevel@tonic-gate */ 26460Sstevel@tonic-gate if ((poked == 0) && 26474502Spraks ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) || 26484502Spraks (aiop->aio_pending == 0))) { 26490Sstevel@tonic-gate aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 26500Sstevel@tonic-gate cvp = &as->a_cv; 2651304Spraks rqclnup = 0; 26520Sstevel@tonic-gate } 26530Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 26540Sstevel@tonic-gate if (poked) { 26550Sstevel@tonic-gate /* 26560Sstevel@tonic-gate * If the process is exiting/killed, don't return 26570Sstevel@tonic-gate * immediately without waiting for pending I/O's 26580Sstevel@tonic-gate * and releasing the page locks. 26590Sstevel@tonic-gate */ 26600Sstevel@tonic-gate if (p->p_flag & (SEXITLWPS|SKILLED)) { 26610Sstevel@tonic-gate /* 26620Sstevel@tonic-gate * If exit_flag is set, then it is 26630Sstevel@tonic-gate * safe to exit because we have released 26640Sstevel@tonic-gate * page locks of completed I/O's. 26650Sstevel@tonic-gate */ 26660Sstevel@tonic-gate if (exit_flag) 26670Sstevel@tonic-gate break; 26680Sstevel@tonic-gate 26690Sstevel@tonic-gate mutex_exit(&as->a_contents); 26700Sstevel@tonic-gate 26710Sstevel@tonic-gate /* 26720Sstevel@tonic-gate * Wait for all the pending aio to complete. 26730Sstevel@tonic-gate */ 26740Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 26750Sstevel@tonic-gate aiop->aio_flags |= AIO_REQ_BLOCK; 26760Sstevel@tonic-gate while (aiop->aio_pending != 0) 26770Sstevel@tonic-gate cv_wait(&aiop->aio_cleanupcv, 26784502Spraks &aiop->aio_mutex); 26790Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 26800Sstevel@tonic-gate exit_flag = 1; 26810Sstevel@tonic-gate continue; 26820Sstevel@tonic-gate } else if (p->p_flag & 26830Sstevel@tonic-gate (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 26840Sstevel@tonic-gate /* 26850Sstevel@tonic-gate * hold LWP until it 26860Sstevel@tonic-gate * is continued. 26870Sstevel@tonic-gate */ 26880Sstevel@tonic-gate mutex_exit(&as->a_contents); 26890Sstevel@tonic-gate mutex_enter(&p->p_lock); 26900Sstevel@tonic-gate stop(PR_SUSPENDED, SUSPEND_NORMAL); 26910Sstevel@tonic-gate mutex_exit(&p->p_lock); 26920Sstevel@tonic-gate poked = 0; 26930Sstevel@tonic-gate continue; 26940Sstevel@tonic-gate } 26950Sstevel@tonic-gate } else { 26960Sstevel@tonic-gate /* 26970Sstevel@tonic-gate * When started this thread will sleep on as->a_cv. 26980Sstevel@tonic-gate * as_unmap will awake this thread if the 26990Sstevel@tonic-gate * segment has SOFTLOCKed pages (poked = 0). 27000Sstevel@tonic-gate * 1. pokelwps() awakes this thread => 27010Sstevel@tonic-gate * break the loop to check SEXITLWPS, SHOLDFORK, etc 27020Sstevel@tonic-gate * 2. as_unmap awakes this thread => 27030Sstevel@tonic-gate * to break the loop it is necessary that 27040Sstevel@tonic-gate * - AS_UNMAPWAIT is set (as_unmap is waiting for 27050Sstevel@tonic-gate * memory to be unlocked) 27060Sstevel@tonic-gate * - AIO_CLEANUP is not set 27070Sstevel@tonic-gate * (if AIO_CLEANUP is set we have to wait for 27080Sstevel@tonic-gate * pending requests. aio_done will send a signal 27090Sstevel@tonic-gate * for every request which completes to continue 27100Sstevel@tonic-gate * unmapping the corresponding address range) 2711304Spraks * 3. A cleanup request will wake this thread up, ex. 2712304Spraks * by the DR operations. The aio_rqclnup flag will 2713304Spraks * be set. 27140Sstevel@tonic-gate */ 27150Sstevel@tonic-gate while (poked == 0) { 2716304Spraks /* 27174532Ssp92102 * The clean up requests that came in 27184532Ssp92102 * after we had just cleaned up, couldn't 27194532Ssp92102 * be causing the unmap thread to block - as 27204532Ssp92102 * unmap event happened first. 27214532Ssp92102 * Let aio_done() wake us up if it sees a need. 2722304Spraks */ 27234532Ssp92102 if (aiop->aio_rqclnup && 27244502Spraks (aiop->aio_flags & AIO_CLEANUP) == 0) 27250Sstevel@tonic-gate break; 27260Sstevel@tonic-gate poked = !cv_wait_sig(cvp, &as->a_contents); 27270Sstevel@tonic-gate if (AS_ISUNMAPWAIT(as) == 0) 27280Sstevel@tonic-gate cv_signal(cvp); 27290Sstevel@tonic-gate if (aiop->aio_outstanding != 0) 27300Sstevel@tonic-gate break; 27310Sstevel@tonic-gate } 27320Sstevel@tonic-gate } 27330Sstevel@tonic-gate mutex_exit(&as->a_contents); 27340Sstevel@tonic-gate } 27350Sstevel@tonic-gate exit: 27360Sstevel@tonic-gate mutex_exit(&as->a_contents); 27370Sstevel@tonic-gate ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 27380Sstevel@tonic-gate aston(curthread); /* make thread do post_syscall */ 27390Sstevel@tonic-gate return (0); 27400Sstevel@tonic-gate } 27410Sstevel@tonic-gate 27420Sstevel@tonic-gate /* 27430Sstevel@tonic-gate * save a reference to a user's outstanding aio in a hash list. 27440Sstevel@tonic-gate */ 27450Sstevel@tonic-gate static int 27460Sstevel@tonic-gate aio_hash_insert( 27470Sstevel@tonic-gate aio_req_t *aio_reqp, 27480Sstevel@tonic-gate aio_t *aiop) 27490Sstevel@tonic-gate { 27500Sstevel@tonic-gate long index; 27510Sstevel@tonic-gate aio_result_t *resultp = aio_reqp->aio_req_resultp; 27520Sstevel@tonic-gate aio_req_t *current; 27530Sstevel@tonic-gate aio_req_t **nextp; 27540Sstevel@tonic-gate 27550Sstevel@tonic-gate index = AIO_HASH(resultp); 27560Sstevel@tonic-gate nextp = &aiop->aio_hash[index]; 27570Sstevel@tonic-gate while ((current = *nextp) != NULL) { 27580Sstevel@tonic-gate if (current->aio_req_resultp == resultp) 27590Sstevel@tonic-gate return (DUPLICATE); 27600Sstevel@tonic-gate nextp = ¤t->aio_hash_next; 27610Sstevel@tonic-gate } 27620Sstevel@tonic-gate *nextp = aio_reqp; 27630Sstevel@tonic-gate aio_reqp->aio_hash_next = NULL; 27640Sstevel@tonic-gate return (0); 27650Sstevel@tonic-gate } 27660Sstevel@tonic-gate 27670Sstevel@tonic-gate static int 27680Sstevel@tonic-gate (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 27690Sstevel@tonic-gate cred_t *) 27700Sstevel@tonic-gate { 27710Sstevel@tonic-gate struct snode *sp; 27720Sstevel@tonic-gate dev_t dev; 27730Sstevel@tonic-gate struct cb_ops *cb; 27740Sstevel@tonic-gate major_t major; 27750Sstevel@tonic-gate int (*aio_func)(); 27760Sstevel@tonic-gate 27770Sstevel@tonic-gate dev = vp->v_rdev; 27780Sstevel@tonic-gate major = getmajor(dev); 27790Sstevel@tonic-gate 27800Sstevel@tonic-gate /* 27810Sstevel@tonic-gate * return NULL for requests to files and STREAMs so 27820Sstevel@tonic-gate * that libaio takes care of them. 27830Sstevel@tonic-gate */ 27840Sstevel@tonic-gate if (vp->v_type == VCHR) { 27850Sstevel@tonic-gate /* no stream device for kaio */ 27860Sstevel@tonic-gate if (STREAMSTAB(major)) { 27870Sstevel@tonic-gate return (NULL); 27880Sstevel@tonic-gate } 27890Sstevel@tonic-gate } else { 27900Sstevel@tonic-gate return (NULL); 27910Sstevel@tonic-gate } 27920Sstevel@tonic-gate 27930Sstevel@tonic-gate /* 27940Sstevel@tonic-gate * Check old drivers which do not have async I/O entry points. 27950Sstevel@tonic-gate */ 27960Sstevel@tonic-gate if (devopsp[major]->devo_rev < 3) 27970Sstevel@tonic-gate return (NULL); 27980Sstevel@tonic-gate 27990Sstevel@tonic-gate cb = devopsp[major]->devo_cb_ops; 28000Sstevel@tonic-gate 28010Sstevel@tonic-gate if (cb->cb_rev < 1) 28020Sstevel@tonic-gate return (NULL); 28030Sstevel@tonic-gate 28040Sstevel@tonic-gate /* 28050Sstevel@tonic-gate * Check whether this device is a block device. 28060Sstevel@tonic-gate * Kaio is not supported for devices like tty. 28070Sstevel@tonic-gate */ 28080Sstevel@tonic-gate if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 28090Sstevel@tonic-gate return (NULL); 28100Sstevel@tonic-gate 28110Sstevel@tonic-gate /* 28120Sstevel@tonic-gate * Clustering: If vnode is a PXFS vnode, then the device may be remote. 28130Sstevel@tonic-gate * We cannot call the driver directly. Instead return the 28140Sstevel@tonic-gate * PXFS functions. 28150Sstevel@tonic-gate */ 28160Sstevel@tonic-gate 28170Sstevel@tonic-gate if (IS_PXFSVP(vp)) { 28180Sstevel@tonic-gate if (mode & FREAD) 28190Sstevel@tonic-gate return (clpxfs_aio_read); 28200Sstevel@tonic-gate else 28210Sstevel@tonic-gate return (clpxfs_aio_write); 28220Sstevel@tonic-gate } 28230Sstevel@tonic-gate if (mode & FREAD) 28240Sstevel@tonic-gate aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 28250Sstevel@tonic-gate else 28260Sstevel@tonic-gate aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 28270Sstevel@tonic-gate 28280Sstevel@tonic-gate /* 28290Sstevel@tonic-gate * Do we need this ? 28300Sstevel@tonic-gate * nodev returns ENXIO anyway. 28310Sstevel@tonic-gate */ 28320Sstevel@tonic-gate if (aio_func == nodev) 28330Sstevel@tonic-gate return (NULL); 28340Sstevel@tonic-gate 28350Sstevel@tonic-gate sp = VTOS(vp); 28360Sstevel@tonic-gate smark(sp, SACC); 28370Sstevel@tonic-gate return (aio_func); 28380Sstevel@tonic-gate } 28390Sstevel@tonic-gate 28400Sstevel@tonic-gate /* 28410Sstevel@tonic-gate * Clustering: We want check_vp to return a function prototyped 28420Sstevel@tonic-gate * correctly that will be common to both PXFS and regular case. 28430Sstevel@tonic-gate * We define this intermediate function that will do the right 28440Sstevel@tonic-gate * thing for driver cases. 28450Sstevel@tonic-gate */ 28460Sstevel@tonic-gate 28470Sstevel@tonic-gate static int 28480Sstevel@tonic-gate driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 28490Sstevel@tonic-gate { 28500Sstevel@tonic-gate dev_t dev; 28510Sstevel@tonic-gate struct cb_ops *cb; 28520Sstevel@tonic-gate 28530Sstevel@tonic-gate ASSERT(vp->v_type == VCHR); 28540Sstevel@tonic-gate ASSERT(!IS_PXFSVP(vp)); 28550Sstevel@tonic-gate dev = VTOS(vp)->s_dev; 28560Sstevel@tonic-gate ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 28570Sstevel@tonic-gate 28580Sstevel@tonic-gate cb = devopsp[getmajor(dev)]->devo_cb_ops; 28590Sstevel@tonic-gate 28600Sstevel@tonic-gate ASSERT(cb->cb_awrite != nodev); 28610Sstevel@tonic-gate return ((*cb->cb_awrite)(dev, aio, cred_p)); 28620Sstevel@tonic-gate } 28630Sstevel@tonic-gate 28640Sstevel@tonic-gate /* 28650Sstevel@tonic-gate * Clustering: We want check_vp to return a function prototyped 28660Sstevel@tonic-gate * correctly that will be common to both PXFS and regular case. 28670Sstevel@tonic-gate * We define this intermediate function that will do the right 28680Sstevel@tonic-gate * thing for driver cases. 28690Sstevel@tonic-gate */ 28700Sstevel@tonic-gate 28710Sstevel@tonic-gate static int 28720Sstevel@tonic-gate driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 28730Sstevel@tonic-gate { 28740Sstevel@tonic-gate dev_t dev; 28750Sstevel@tonic-gate struct cb_ops *cb; 28760Sstevel@tonic-gate 28770Sstevel@tonic-gate ASSERT(vp->v_type == VCHR); 28780Sstevel@tonic-gate ASSERT(!IS_PXFSVP(vp)); 28790Sstevel@tonic-gate dev = VTOS(vp)->s_dev; 28800Sstevel@tonic-gate ASSERT(!STREAMSTAB(getmajor(dev))); 28810Sstevel@tonic-gate 28820Sstevel@tonic-gate cb = devopsp[getmajor(dev)]->devo_cb_ops; 28830Sstevel@tonic-gate 28840Sstevel@tonic-gate ASSERT(cb->cb_aread != nodev); 28850Sstevel@tonic-gate return ((*cb->cb_aread)(dev, aio, cred_p)); 28860Sstevel@tonic-gate } 28870Sstevel@tonic-gate 28880Sstevel@tonic-gate /* 28890Sstevel@tonic-gate * This routine is called when a largefile call is made by a 32bit 28900Sstevel@tonic-gate * process on a ILP32 or LP64 kernel. All 64bit processes are large 28910Sstevel@tonic-gate * file by definition and will call alio() instead. 28920Sstevel@tonic-gate */ 28930Sstevel@tonic-gate static int 28940Sstevel@tonic-gate alioLF( 28950Sstevel@tonic-gate int mode_arg, 28960Sstevel@tonic-gate void *aiocb_arg, 28970Sstevel@tonic-gate int nent, 28980Sstevel@tonic-gate void *sigev) 28990Sstevel@tonic-gate { 29000Sstevel@tonic-gate file_t *fp; 29010Sstevel@tonic-gate file_t *prev_fp = NULL; 29020Sstevel@tonic-gate int prev_mode = -1; 29030Sstevel@tonic-gate struct vnode *vp; 29040Sstevel@tonic-gate aio_lio_t *head; 29050Sstevel@tonic-gate aio_req_t *reqp; 29060Sstevel@tonic-gate aio_t *aiop; 29070Sstevel@tonic-gate caddr_t cbplist; 29081885Sraf aiocb64_32_t cb64; 29091885Sraf aiocb64_32_t *aiocb = &cb64; 29100Sstevel@tonic-gate aiocb64_32_t *cbp; 29110Sstevel@tonic-gate caddr32_t *ucbp; 29120Sstevel@tonic-gate #ifdef _LP64 29130Sstevel@tonic-gate aiocb_t aiocb_n; 29140Sstevel@tonic-gate #endif 29150Sstevel@tonic-gate struct sigevent32 sigevk; 29160Sstevel@tonic-gate sigqueue_t *sqp; 29170Sstevel@tonic-gate int (*aio_func)(); 29180Sstevel@tonic-gate int mode; 29191885Sraf int error = 0; 29201885Sraf int aio_errors = 0; 29210Sstevel@tonic-gate int i; 29220Sstevel@tonic-gate size_t ssize; 29230Sstevel@tonic-gate int deadhead = 0; 29240Sstevel@tonic-gate int aio_notsupported = 0; 29251885Sraf int lio_head_port; 29261885Sraf int aio_port; 29271885Sraf int aio_thread; 29280Sstevel@tonic-gate port_kevent_t *pkevtp = NULL; 29294502Spraks int portused = 0; 29300Sstevel@tonic-gate port_notify32_t pnotify; 29311885Sraf int event; 29320Sstevel@tonic-gate 29330Sstevel@tonic-gate aiop = curproc->p_aio; 29340Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 29350Sstevel@tonic-gate return (EINVAL); 29360Sstevel@tonic-gate 29370Sstevel@tonic-gate ASSERT(get_udatamodel() == DATAMODEL_ILP32); 29380Sstevel@tonic-gate 29390Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent); 29400Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP); 29410Sstevel@tonic-gate ucbp = (caddr32_t *)cbplist; 29420Sstevel@tonic-gate 29431885Sraf if (copyin(aiocb_arg, cbplist, ssize) || 29441885Sraf (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) { 29450Sstevel@tonic-gate kmem_free(cbplist, ssize); 29460Sstevel@tonic-gate return (EFAULT); 29470Sstevel@tonic-gate } 29480Sstevel@tonic-gate 29491885Sraf /* Event Ports */ 29501885Sraf if (sigev && 29511885Sraf (sigevk.sigev_notify == SIGEV_THREAD || 29521885Sraf sigevk.sigev_notify == SIGEV_PORT)) { 29531885Sraf if (sigevk.sigev_notify == SIGEV_THREAD) { 29541885Sraf pnotify.portnfy_port = sigevk.sigev_signo; 29551885Sraf pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 29561885Sraf } else if (copyin( 29571885Sraf (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 29581885Sraf &pnotify, sizeof (pnotify))) { 29590Sstevel@tonic-gate kmem_free(cbplist, ssize); 29600Sstevel@tonic-gate return (EFAULT); 29610Sstevel@tonic-gate } 29621885Sraf error = port_alloc_event(pnotify.portnfy_port, 29631885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 29641885Sraf if (error) { 29651885Sraf if (error == ENOMEM || error == EAGAIN) 29661885Sraf error = EAGAIN; 29671885Sraf else 29681885Sraf error = EINVAL; 29691885Sraf kmem_free(cbplist, ssize); 29701885Sraf return (error); 29711885Sraf } 29721885Sraf lio_head_port = pnotify.portnfy_port; 29734502Spraks portused = 1; 29740Sstevel@tonic-gate } 29750Sstevel@tonic-gate 29760Sstevel@tonic-gate /* 29770Sstevel@tonic-gate * a list head should be allocated if notification is 29780Sstevel@tonic-gate * enabled for this list. 29790Sstevel@tonic-gate */ 29800Sstevel@tonic-gate head = NULL; 29810Sstevel@tonic-gate 29821885Sraf if (mode_arg == LIO_WAIT || sigev) { 29830Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 29840Sstevel@tonic-gate error = aio_lio_alloc(&head); 29850Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 29860Sstevel@tonic-gate if (error) 29870Sstevel@tonic-gate goto done; 29880Sstevel@tonic-gate deadhead = 1; 29890Sstevel@tonic-gate head->lio_nent = nent; 29900Sstevel@tonic-gate head->lio_refcnt = nent; 29911885Sraf head->lio_port = -1; 29921885Sraf head->lio_portkev = NULL; 29931885Sraf if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 29941885Sraf sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 29950Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 29960Sstevel@tonic-gate if (sqp == NULL) { 29970Sstevel@tonic-gate error = EAGAIN; 29980Sstevel@tonic-gate goto done; 29990Sstevel@tonic-gate } 30000Sstevel@tonic-gate sqp->sq_func = NULL; 30010Sstevel@tonic-gate sqp->sq_next = NULL; 30020Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 30030Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 30040Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 30050Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 30060Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 30070Sstevel@tonic-gate sqp->sq_info.si_signo = sigevk.sigev_signo; 30080Sstevel@tonic-gate sqp->sq_info.si_value.sival_int = 30090Sstevel@tonic-gate sigevk.sigev_value.sival_int; 30100Sstevel@tonic-gate head->lio_sigqp = sqp; 30110Sstevel@tonic-gate } else { 30120Sstevel@tonic-gate head->lio_sigqp = NULL; 30130Sstevel@tonic-gate } 30141885Sraf if (pkevtp) { 30151885Sraf /* 30161885Sraf * Prepare data to send when list of aiocb's 30171885Sraf * has completed. 30181885Sraf */ 30191885Sraf port_init_event(pkevtp, (uintptr_t)sigev, 30201885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 30211885Sraf NULL, head); 30221885Sraf pkevtp->portkev_events = AIOLIO64; 30231885Sraf head->lio_portkev = pkevtp; 30241885Sraf head->lio_port = pnotify.portnfy_port; 30251885Sraf } 30260Sstevel@tonic-gate } 30270Sstevel@tonic-gate 30280Sstevel@tonic-gate for (i = 0; i < nent; i++, ucbp++) { 30290Sstevel@tonic-gate 30300Sstevel@tonic-gate cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 30310Sstevel@tonic-gate /* skip entry if it can't be copied. */ 30321885Sraf if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 30330Sstevel@tonic-gate if (head) { 30340Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 30350Sstevel@tonic-gate head->lio_nent--; 30360Sstevel@tonic-gate head->lio_refcnt--; 30370Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 30380Sstevel@tonic-gate } 30390Sstevel@tonic-gate continue; 30400Sstevel@tonic-gate } 30410Sstevel@tonic-gate 30420Sstevel@tonic-gate /* skip if opcode for aiocb is LIO_NOP */ 30430Sstevel@tonic-gate mode = aiocb->aio_lio_opcode; 30440Sstevel@tonic-gate if (mode == LIO_NOP) { 30450Sstevel@tonic-gate cbp = NULL; 30460Sstevel@tonic-gate if (head) { 30470Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 30480Sstevel@tonic-gate head->lio_nent--; 30490Sstevel@tonic-gate head->lio_refcnt--; 30500Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 30510Sstevel@tonic-gate } 30520Sstevel@tonic-gate continue; 30530Sstevel@tonic-gate } 30540Sstevel@tonic-gate 30550Sstevel@tonic-gate /* increment file descriptor's ref count. */ 30560Sstevel@tonic-gate if ((fp = getf(aiocb->aio_fildes)) == NULL) { 30570Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 30580Sstevel@tonic-gate if (head) { 30590Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 30600Sstevel@tonic-gate head->lio_nent--; 30610Sstevel@tonic-gate head->lio_refcnt--; 30620Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 30630Sstevel@tonic-gate } 30640Sstevel@tonic-gate aio_errors++; 30650Sstevel@tonic-gate continue; 30660Sstevel@tonic-gate } 30670Sstevel@tonic-gate 30680Sstevel@tonic-gate /* 30690Sstevel@tonic-gate * check the permission of the partition 30700Sstevel@tonic-gate */ 30710Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 30720Sstevel@tonic-gate releasef(aiocb->aio_fildes); 30730Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 30740Sstevel@tonic-gate if (head) { 30750Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 30760Sstevel@tonic-gate head->lio_nent--; 30770Sstevel@tonic-gate head->lio_refcnt--; 30780Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 30790Sstevel@tonic-gate } 30800Sstevel@tonic-gate aio_errors++; 30810Sstevel@tonic-gate continue; 30820Sstevel@tonic-gate } 30830Sstevel@tonic-gate 30840Sstevel@tonic-gate /* 30850Sstevel@tonic-gate * common case where requests are to the same fd 30860Sstevel@tonic-gate * for the same r/w operation 30870Sstevel@tonic-gate * for UFS, need to set EBADFD 30880Sstevel@tonic-gate */ 30891885Sraf vp = fp->f_vnode; 30901885Sraf if (fp != prev_fp || mode != prev_mode) { 30910Sstevel@tonic-gate aio_func = check_vp(vp, mode); 30920Sstevel@tonic-gate if (aio_func == NULL) { 30930Sstevel@tonic-gate prev_fp = NULL; 30940Sstevel@tonic-gate releasef(aiocb->aio_fildes); 30950Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADFD); 30960Sstevel@tonic-gate aio_notsupported++; 30970Sstevel@tonic-gate if (head) { 30980Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 30990Sstevel@tonic-gate head->lio_nent--; 31000Sstevel@tonic-gate head->lio_refcnt--; 31010Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 31020Sstevel@tonic-gate } 31030Sstevel@tonic-gate continue; 31040Sstevel@tonic-gate } else { 31050Sstevel@tonic-gate prev_fp = fp; 31060Sstevel@tonic-gate prev_mode = mode; 31070Sstevel@tonic-gate } 31080Sstevel@tonic-gate } 31091885Sraf 31100Sstevel@tonic-gate #ifdef _LP64 31110Sstevel@tonic-gate aiocb_LFton(aiocb, &aiocb_n); 31120Sstevel@tonic-gate error = aio_req_setup(&reqp, aiop, &aiocb_n, 31131885Sraf (aio_result_t *)&cbp->aio_resultp, vp); 31140Sstevel@tonic-gate #else 31150Sstevel@tonic-gate error = aio_req_setupLF(&reqp, aiop, aiocb, 31161885Sraf (aio_result_t *)&cbp->aio_resultp, vp); 31170Sstevel@tonic-gate #endif /* _LP64 */ 31180Sstevel@tonic-gate if (error) { 31190Sstevel@tonic-gate releasef(aiocb->aio_fildes); 31201885Sraf lio_set_uerror(&cbp->aio_resultp, error); 31210Sstevel@tonic-gate if (head) { 31220Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 31230Sstevel@tonic-gate head->lio_nent--; 31240Sstevel@tonic-gate head->lio_refcnt--; 31250Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 31260Sstevel@tonic-gate } 31270Sstevel@tonic-gate aio_errors++; 31280Sstevel@tonic-gate continue; 31290Sstevel@tonic-gate } 31300Sstevel@tonic-gate 31310Sstevel@tonic-gate reqp->aio_req_lio = head; 31320Sstevel@tonic-gate deadhead = 0; 31330Sstevel@tonic-gate 31340Sstevel@tonic-gate /* 31350Sstevel@tonic-gate * Set the errno field now before sending the request to 31360Sstevel@tonic-gate * the driver to avoid a race condition 31370Sstevel@tonic-gate */ 31380Sstevel@tonic-gate (void) suword32(&cbp->aio_resultp.aio_errno, 31390Sstevel@tonic-gate EINPROGRESS); 31400Sstevel@tonic-gate 31410Sstevel@tonic-gate reqp->aio_req_iocb.iocb32 = *ucbp; 31420Sstevel@tonic-gate 31431885Sraf event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64; 31441885Sraf aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 31451885Sraf aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 31461885Sraf if (aio_port | aio_thread) { 31471885Sraf port_kevent_t *lpkevp; 31481885Sraf /* 31491885Sraf * Prepare data to send with each aiocb completed. 31501885Sraf */ 31511885Sraf if (aio_port) { 31521885Sraf void *paddr = (void *)(uintptr_t) 31531885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 31541885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify))) 31551885Sraf error = EFAULT; 31561885Sraf } else { /* aio_thread */ 31571885Sraf pnotify.portnfy_port = 31581885Sraf aiocb->aio_sigevent.sigev_signo; 31591885Sraf pnotify.portnfy_user = 31601885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 31611885Sraf } 31621885Sraf if (error) 31631885Sraf /* EMPTY */; 31641885Sraf else if (pkevtp != NULL && 31651885Sraf pnotify.portnfy_port == lio_head_port) 31661885Sraf error = port_dup_event(pkevtp, &lpkevp, 31671885Sraf PORT_ALLOC_DEFAULT); 31681885Sraf else 31691885Sraf error = port_alloc_event(pnotify.portnfy_port, 31701885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 31711885Sraf &lpkevp); 31721885Sraf if (error == 0) { 31731885Sraf port_init_event(lpkevp, (uintptr_t)*ucbp, 31741885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 31751885Sraf aio_port_callback, reqp); 31761885Sraf lpkevp->portkev_events = event; 31771885Sraf reqp->aio_req_portkev = lpkevp; 31781885Sraf reqp->aio_req_port = pnotify.portnfy_port; 31791885Sraf } 31800Sstevel@tonic-gate } 31810Sstevel@tonic-gate 31820Sstevel@tonic-gate /* 31830Sstevel@tonic-gate * send the request to driver. 31840Sstevel@tonic-gate */ 31850Sstevel@tonic-gate if (error == 0) { 31860Sstevel@tonic-gate if (aiocb->aio_nbytes == 0) { 31870Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 31880Sstevel@tonic-gate aio_zerolen(reqp); 31890Sstevel@tonic-gate continue; 31900Sstevel@tonic-gate } 31910Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 31920Sstevel@tonic-gate CRED()); 31930Sstevel@tonic-gate } 31940Sstevel@tonic-gate 31950Sstevel@tonic-gate /* 31960Sstevel@tonic-gate * the fd's ref count is not decremented until the IO has 31970Sstevel@tonic-gate * completed unless there was an error. 31980Sstevel@tonic-gate */ 31990Sstevel@tonic-gate if (error) { 32000Sstevel@tonic-gate releasef(aiocb->aio_fildes); 32010Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 32020Sstevel@tonic-gate if (head) { 32030Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 32040Sstevel@tonic-gate head->lio_nent--; 32050Sstevel@tonic-gate head->lio_refcnt--; 32060Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 32070Sstevel@tonic-gate } 32080Sstevel@tonic-gate if (error == ENOTSUP) 32090Sstevel@tonic-gate aio_notsupported++; 32100Sstevel@tonic-gate else 32110Sstevel@tonic-gate aio_errors++; 32124502Spraks lio_set_error(reqp, portused); 32130Sstevel@tonic-gate } else { 32140Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 32150Sstevel@tonic-gate } 32160Sstevel@tonic-gate } 32170Sstevel@tonic-gate 32180Sstevel@tonic-gate if (aio_notsupported) { 32190Sstevel@tonic-gate error = ENOTSUP; 32200Sstevel@tonic-gate } else if (aio_errors) { 32210Sstevel@tonic-gate /* 32220Sstevel@tonic-gate * return EIO if any request failed 32230Sstevel@tonic-gate */ 32240Sstevel@tonic-gate error = EIO; 32250Sstevel@tonic-gate } 32260Sstevel@tonic-gate 32270Sstevel@tonic-gate if (mode_arg == LIO_WAIT) { 32280Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 32290Sstevel@tonic-gate while (head->lio_refcnt > 0) { 32300Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 32310Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 32320Sstevel@tonic-gate error = EINTR; 32330Sstevel@tonic-gate goto done; 32340Sstevel@tonic-gate } 32350Sstevel@tonic-gate } 32360Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 32370Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 32380Sstevel@tonic-gate } 32390Sstevel@tonic-gate 32400Sstevel@tonic-gate done: 32410Sstevel@tonic-gate kmem_free(cbplist, ssize); 32420Sstevel@tonic-gate if (deadhead) { 32430Sstevel@tonic-gate if (head->lio_sigqp) 32440Sstevel@tonic-gate kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 32451885Sraf if (head->lio_portkev) 32461885Sraf port_free_event(head->lio_portkev); 32470Sstevel@tonic-gate kmem_free(head, sizeof (aio_lio_t)); 32480Sstevel@tonic-gate } 32490Sstevel@tonic-gate return (error); 32500Sstevel@tonic-gate } 32510Sstevel@tonic-gate 32520Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 32530Sstevel@tonic-gate static void 32540Sstevel@tonic-gate aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 32550Sstevel@tonic-gate { 32560Sstevel@tonic-gate dest->aio_fildes = src->aio_fildes; 32570Sstevel@tonic-gate dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 32580Sstevel@tonic-gate dest->aio_nbytes = (size_t)src->aio_nbytes; 32590Sstevel@tonic-gate dest->aio_offset = (off_t)src->aio_offset; 32600Sstevel@tonic-gate dest->aio_reqprio = src->aio_reqprio; 32610Sstevel@tonic-gate dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 32620Sstevel@tonic-gate dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 32630Sstevel@tonic-gate 32640Sstevel@tonic-gate /* 32650Sstevel@tonic-gate * See comment in sigqueue32() on handling of 32-bit 32660Sstevel@tonic-gate * sigvals in a 64-bit kernel. 32670Sstevel@tonic-gate */ 32680Sstevel@tonic-gate dest->aio_sigevent.sigev_value.sival_int = 32690Sstevel@tonic-gate (int)src->aio_sigevent.sigev_value.sival_int; 32700Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 32710Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_function; 32720Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 32730Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 32740Sstevel@tonic-gate dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 32750Sstevel@tonic-gate dest->aio_lio_opcode = src->aio_lio_opcode; 32760Sstevel@tonic-gate dest->aio_state = src->aio_state; 32770Sstevel@tonic-gate dest->aio__pad[0] = src->aio__pad[0]; 32780Sstevel@tonic-gate } 32790Sstevel@tonic-gate #endif 32800Sstevel@tonic-gate 32810Sstevel@tonic-gate /* 32820Sstevel@tonic-gate * This function is used only for largefile calls made by 32831885Sraf * 32 bit applications. 32840Sstevel@tonic-gate */ 32850Sstevel@tonic-gate static int 32860Sstevel@tonic-gate aio_req_setupLF( 32870Sstevel@tonic-gate aio_req_t **reqpp, 32880Sstevel@tonic-gate aio_t *aiop, 32890Sstevel@tonic-gate aiocb64_32_t *arg, 32900Sstevel@tonic-gate aio_result_t *resultp, 32910Sstevel@tonic-gate vnode_t *vp) 32920Sstevel@tonic-gate { 32931885Sraf sigqueue_t *sqp = NULL; 32940Sstevel@tonic-gate aio_req_t *reqp; 32951885Sraf struct uio *uio; 32961885Sraf struct sigevent32 *sigev; 32970Sstevel@tonic-gate int error; 32980Sstevel@tonic-gate 32991885Sraf sigev = &arg->aio_sigevent; 33001885Sraf if (sigev->sigev_notify == SIGEV_SIGNAL && 33011885Sraf sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 33020Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 33030Sstevel@tonic-gate if (sqp == NULL) 33040Sstevel@tonic-gate return (EAGAIN); 33050Sstevel@tonic-gate sqp->sq_func = NULL; 33060Sstevel@tonic-gate sqp->sq_next = NULL; 33070Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 33080Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 33090Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 33100Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 33110Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 33120Sstevel@tonic-gate sqp->sq_info.si_signo = sigev->sigev_signo; 33131885Sraf sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int; 33141885Sraf } 33150Sstevel@tonic-gate 33160Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 33170Sstevel@tonic-gate 33180Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) { 33190Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 33200Sstevel@tonic-gate if (sqp) 33210Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t)); 33220Sstevel@tonic-gate return (EIO); 33230Sstevel@tonic-gate } 33240Sstevel@tonic-gate /* 33250Sstevel@tonic-gate * get an aio_reqp from the free list or allocate one 33260Sstevel@tonic-gate * from dynamic memory. 33270Sstevel@tonic-gate */ 33280Sstevel@tonic-gate if (error = aio_req_alloc(&reqp, resultp)) { 33290Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 33300Sstevel@tonic-gate if (sqp) 33310Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t)); 33320Sstevel@tonic-gate return (error); 33330Sstevel@tonic-gate } 33340Sstevel@tonic-gate aiop->aio_pending++; 33350Sstevel@tonic-gate aiop->aio_outstanding++; 33360Sstevel@tonic-gate reqp->aio_req_flags = AIO_PENDING; 33371885Sraf if (sigev->sigev_notify == SIGEV_THREAD || 33381885Sraf sigev->sigev_notify == SIGEV_PORT) 33391885Sraf aio_enq(&aiop->aio_portpending, reqp, 0); 33400Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 33410Sstevel@tonic-gate /* 33420Sstevel@tonic-gate * initialize aio request. 33430Sstevel@tonic-gate */ 33440Sstevel@tonic-gate reqp->aio_req_fd = arg->aio_fildes; 33450Sstevel@tonic-gate reqp->aio_req_sigqp = sqp; 33460Sstevel@tonic-gate reqp->aio_req_iocb.iocb = NULL; 33471885Sraf reqp->aio_req_lio = NULL; 33480Sstevel@tonic-gate reqp->aio_req_buf.b_file = vp; 33490Sstevel@tonic-gate uio = reqp->aio_req.aio_uio; 33500Sstevel@tonic-gate uio->uio_iovcnt = 1; 33510Sstevel@tonic-gate uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 33520Sstevel@tonic-gate uio->uio_iov->iov_len = arg->aio_nbytes; 33530Sstevel@tonic-gate uio->uio_loffset = arg->aio_offset; 33540Sstevel@tonic-gate *reqpp = reqp; 33550Sstevel@tonic-gate return (0); 33560Sstevel@tonic-gate } 33570Sstevel@tonic-gate 33580Sstevel@tonic-gate /* 33590Sstevel@tonic-gate * This routine is called when a non largefile call is made by a 32bit 33600Sstevel@tonic-gate * process on a ILP32 or LP64 kernel. 33610Sstevel@tonic-gate */ 33620Sstevel@tonic-gate static int 33630Sstevel@tonic-gate alio32( 33640Sstevel@tonic-gate int mode_arg, 33650Sstevel@tonic-gate void *aiocb_arg, 33660Sstevel@tonic-gate int nent, 33671885Sraf void *sigev) 33680Sstevel@tonic-gate { 33690Sstevel@tonic-gate file_t *fp; 33700Sstevel@tonic-gate file_t *prev_fp = NULL; 33710Sstevel@tonic-gate int prev_mode = -1; 33720Sstevel@tonic-gate struct vnode *vp; 33730Sstevel@tonic-gate aio_lio_t *head; 33740Sstevel@tonic-gate aio_req_t *reqp; 33750Sstevel@tonic-gate aio_t *aiop; 33761885Sraf caddr_t cbplist; 33770Sstevel@tonic-gate aiocb_t cb; 33780Sstevel@tonic-gate aiocb_t *aiocb = &cb; 33790Sstevel@tonic-gate #ifdef _LP64 33800Sstevel@tonic-gate aiocb32_t *cbp; 33810Sstevel@tonic-gate caddr32_t *ucbp; 33820Sstevel@tonic-gate aiocb32_t cb32; 33830Sstevel@tonic-gate aiocb32_t *aiocb32 = &cb32; 33841885Sraf struct sigevent32 sigevk; 33850Sstevel@tonic-gate #else 33860Sstevel@tonic-gate aiocb_t *cbp, **ucbp; 33871885Sraf struct sigevent sigevk; 33880Sstevel@tonic-gate #endif 33890Sstevel@tonic-gate sigqueue_t *sqp; 33900Sstevel@tonic-gate int (*aio_func)(); 33910Sstevel@tonic-gate int mode; 33921885Sraf int error = 0; 33931885Sraf int aio_errors = 0; 33940Sstevel@tonic-gate int i; 33950Sstevel@tonic-gate size_t ssize; 33960Sstevel@tonic-gate int deadhead = 0; 33970Sstevel@tonic-gate int aio_notsupported = 0; 33981885Sraf int lio_head_port; 33991885Sraf int aio_port; 34001885Sraf int aio_thread; 34010Sstevel@tonic-gate port_kevent_t *pkevtp = NULL; 34024502Spraks int portused = 0; 34030Sstevel@tonic-gate #ifdef _LP64 34040Sstevel@tonic-gate port_notify32_t pnotify; 34050Sstevel@tonic-gate #else 34060Sstevel@tonic-gate port_notify_t pnotify; 34070Sstevel@tonic-gate #endif 34081885Sraf int event; 34091885Sraf 34100Sstevel@tonic-gate aiop = curproc->p_aio; 34110Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 34120Sstevel@tonic-gate return (EINVAL); 34130Sstevel@tonic-gate 34140Sstevel@tonic-gate #ifdef _LP64 34150Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent); 34160Sstevel@tonic-gate #else 34170Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent); 34180Sstevel@tonic-gate #endif 34190Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP); 34200Sstevel@tonic-gate ucbp = (void *)cbplist; 34210Sstevel@tonic-gate 34221885Sraf if (copyin(aiocb_arg, cbplist, ssize) || 34231885Sraf (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) { 34240Sstevel@tonic-gate kmem_free(cbplist, ssize); 34250Sstevel@tonic-gate return (EFAULT); 34260Sstevel@tonic-gate } 34270Sstevel@tonic-gate 34281885Sraf /* Event Ports */ 34291885Sraf if (sigev && 34301885Sraf (sigevk.sigev_notify == SIGEV_THREAD || 34311885Sraf sigevk.sigev_notify == SIGEV_PORT)) { 34321885Sraf if (sigevk.sigev_notify == SIGEV_THREAD) { 34331885Sraf pnotify.portnfy_port = sigevk.sigev_signo; 34341885Sraf pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 34351885Sraf } else if (copyin( 34361885Sraf (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 34371885Sraf &pnotify, sizeof (pnotify))) { 34380Sstevel@tonic-gate kmem_free(cbplist, ssize); 34390Sstevel@tonic-gate return (EFAULT); 34400Sstevel@tonic-gate } 34411885Sraf error = port_alloc_event(pnotify.portnfy_port, 34421885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 34431885Sraf if (error) { 34441885Sraf if (error == ENOMEM || error == EAGAIN) 34451885Sraf error = EAGAIN; 34461885Sraf else 34471885Sraf error = EINVAL; 34481885Sraf kmem_free(cbplist, ssize); 34491885Sraf return (error); 34501885Sraf } 34511885Sraf lio_head_port = pnotify.portnfy_port; 34524502Spraks portused = 1; 34530Sstevel@tonic-gate } 34540Sstevel@tonic-gate 34550Sstevel@tonic-gate /* 34560Sstevel@tonic-gate * a list head should be allocated if notification is 34570Sstevel@tonic-gate * enabled for this list. 34580Sstevel@tonic-gate */ 34590Sstevel@tonic-gate head = NULL; 34600Sstevel@tonic-gate 34611885Sraf if (mode_arg == LIO_WAIT || sigev) { 34620Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 34630Sstevel@tonic-gate error = aio_lio_alloc(&head); 34640Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 34650Sstevel@tonic-gate if (error) 34660Sstevel@tonic-gate goto done; 34670Sstevel@tonic-gate deadhead = 1; 34680Sstevel@tonic-gate head->lio_nent = nent; 34690Sstevel@tonic-gate head->lio_refcnt = nent; 34701885Sraf head->lio_port = -1; 34711885Sraf head->lio_portkev = NULL; 34721885Sraf if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 34731885Sraf sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 34740Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 34750Sstevel@tonic-gate if (sqp == NULL) { 34760Sstevel@tonic-gate error = EAGAIN; 34770Sstevel@tonic-gate goto done; 34780Sstevel@tonic-gate } 34790Sstevel@tonic-gate sqp->sq_func = NULL; 34800Sstevel@tonic-gate sqp->sq_next = NULL; 34810Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 34820Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 34830Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 34840Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 34850Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 34861885Sraf sqp->sq_info.si_signo = sigevk.sigev_signo; 34870Sstevel@tonic-gate sqp->sq_info.si_value.sival_int = 34881885Sraf sigevk.sigev_value.sival_int; 34890Sstevel@tonic-gate head->lio_sigqp = sqp; 34900Sstevel@tonic-gate } else { 34910Sstevel@tonic-gate head->lio_sigqp = NULL; 34920Sstevel@tonic-gate } 34931885Sraf if (pkevtp) { 34941885Sraf /* 34951885Sraf * Prepare data to send when list of aiocb's has 34961885Sraf * completed. 34971885Sraf */ 34981885Sraf port_init_event(pkevtp, (uintptr_t)sigev, 34991885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 35001885Sraf NULL, head); 35011885Sraf pkevtp->portkev_events = AIOLIO; 35021885Sraf head->lio_portkev = pkevtp; 35031885Sraf head->lio_port = pnotify.portnfy_port; 35041885Sraf } 35050Sstevel@tonic-gate } 35060Sstevel@tonic-gate 35070Sstevel@tonic-gate for (i = 0; i < nent; i++, ucbp++) { 35080Sstevel@tonic-gate 35090Sstevel@tonic-gate /* skip entry if it can't be copied. */ 35100Sstevel@tonic-gate #ifdef _LP64 35110Sstevel@tonic-gate cbp = (aiocb32_t *)(uintptr_t)*ucbp; 35121885Sraf if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32))) 35130Sstevel@tonic-gate #else 35140Sstevel@tonic-gate cbp = (aiocb_t *)*ucbp; 35151885Sraf if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) 35160Sstevel@tonic-gate #endif 35171885Sraf { 35180Sstevel@tonic-gate if (head) { 35190Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35200Sstevel@tonic-gate head->lio_nent--; 35210Sstevel@tonic-gate head->lio_refcnt--; 35220Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35230Sstevel@tonic-gate } 35240Sstevel@tonic-gate continue; 35250Sstevel@tonic-gate } 35260Sstevel@tonic-gate #ifdef _LP64 35270Sstevel@tonic-gate /* 35280Sstevel@tonic-gate * copy 32 bit structure into 64 bit structure 35290Sstevel@tonic-gate */ 35300Sstevel@tonic-gate aiocb_32ton(aiocb32, aiocb); 35310Sstevel@tonic-gate #endif /* _LP64 */ 35320Sstevel@tonic-gate 35330Sstevel@tonic-gate /* skip if opcode for aiocb is LIO_NOP */ 35340Sstevel@tonic-gate mode = aiocb->aio_lio_opcode; 35350Sstevel@tonic-gate if (mode == LIO_NOP) { 35360Sstevel@tonic-gate cbp = NULL; 35370Sstevel@tonic-gate if (head) { 35380Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35390Sstevel@tonic-gate head->lio_nent--; 35400Sstevel@tonic-gate head->lio_refcnt--; 35410Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35420Sstevel@tonic-gate } 35430Sstevel@tonic-gate continue; 35440Sstevel@tonic-gate } 35450Sstevel@tonic-gate 35460Sstevel@tonic-gate /* increment file descriptor's ref count. */ 35470Sstevel@tonic-gate if ((fp = getf(aiocb->aio_fildes)) == NULL) { 35480Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 35490Sstevel@tonic-gate if (head) { 35500Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35510Sstevel@tonic-gate head->lio_nent--; 35520Sstevel@tonic-gate head->lio_refcnt--; 35530Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35540Sstevel@tonic-gate } 35550Sstevel@tonic-gate aio_errors++; 35560Sstevel@tonic-gate continue; 35570Sstevel@tonic-gate } 35580Sstevel@tonic-gate 35590Sstevel@tonic-gate /* 35600Sstevel@tonic-gate * check the permission of the partition 35610Sstevel@tonic-gate */ 35620Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 35630Sstevel@tonic-gate releasef(aiocb->aio_fildes); 35640Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 35650Sstevel@tonic-gate if (head) { 35660Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35670Sstevel@tonic-gate head->lio_nent--; 35680Sstevel@tonic-gate head->lio_refcnt--; 35690Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35700Sstevel@tonic-gate } 35710Sstevel@tonic-gate aio_errors++; 35720Sstevel@tonic-gate continue; 35730Sstevel@tonic-gate } 35740Sstevel@tonic-gate 35750Sstevel@tonic-gate /* 35760Sstevel@tonic-gate * common case where requests are to the same fd 35770Sstevel@tonic-gate * for the same r/w operation 35780Sstevel@tonic-gate * for UFS, need to set EBADFD 35790Sstevel@tonic-gate */ 35801885Sraf vp = fp->f_vnode; 35811885Sraf if (fp != prev_fp || mode != prev_mode) { 35820Sstevel@tonic-gate aio_func = check_vp(vp, mode); 35830Sstevel@tonic-gate if (aio_func == NULL) { 35840Sstevel@tonic-gate prev_fp = NULL; 35850Sstevel@tonic-gate releasef(aiocb->aio_fildes); 35861885Sraf lio_set_uerror(&cbp->aio_resultp, EBADFD); 35870Sstevel@tonic-gate aio_notsupported++; 35880Sstevel@tonic-gate if (head) { 35890Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35900Sstevel@tonic-gate head->lio_nent--; 35910Sstevel@tonic-gate head->lio_refcnt--; 35920Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35930Sstevel@tonic-gate } 35940Sstevel@tonic-gate continue; 35950Sstevel@tonic-gate } else { 35960Sstevel@tonic-gate prev_fp = fp; 35970Sstevel@tonic-gate prev_mode = mode; 35980Sstevel@tonic-gate } 35990Sstevel@tonic-gate } 36001885Sraf 36011885Sraf error = aio_req_setup(&reqp, aiop, aiocb, 36021885Sraf (aio_result_t *)&cbp->aio_resultp, vp); 36031885Sraf if (error) { 36040Sstevel@tonic-gate releasef(aiocb->aio_fildes); 36050Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 36060Sstevel@tonic-gate if (head) { 36070Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 36080Sstevel@tonic-gate head->lio_nent--; 36090Sstevel@tonic-gate head->lio_refcnt--; 36100Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 36110Sstevel@tonic-gate } 36120Sstevel@tonic-gate aio_errors++; 36130Sstevel@tonic-gate continue; 36140Sstevel@tonic-gate } 36150Sstevel@tonic-gate 36160Sstevel@tonic-gate reqp->aio_req_lio = head; 36170Sstevel@tonic-gate deadhead = 0; 36180Sstevel@tonic-gate 36190Sstevel@tonic-gate /* 36200Sstevel@tonic-gate * Set the errno field now before sending the request to 36210Sstevel@tonic-gate * the driver to avoid a race condition 36220Sstevel@tonic-gate */ 36230Sstevel@tonic-gate (void) suword32(&cbp->aio_resultp.aio_errno, 36240Sstevel@tonic-gate EINPROGRESS); 36250Sstevel@tonic-gate 36261885Sraf reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp; 36271885Sraf 36281885Sraf event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 36291885Sraf aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 36301885Sraf aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 36311885Sraf if (aio_port | aio_thread) { 36321885Sraf port_kevent_t *lpkevp; 36331885Sraf /* 36341885Sraf * Prepare data to send with each aiocb completed. 36351885Sraf */ 36360Sstevel@tonic-gate #ifdef _LP64 36371885Sraf if (aio_port) { 36381885Sraf void *paddr = (void *)(uintptr_t) 36391885Sraf aiocb32->aio_sigevent.sigev_value.sival_ptr; 36401885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify))) 36411885Sraf error = EFAULT; 36421885Sraf } else { /* aio_thread */ 36431885Sraf pnotify.portnfy_port = 36441885Sraf aiocb32->aio_sigevent.sigev_signo; 36451885Sraf pnotify.portnfy_user = 36461885Sraf aiocb32->aio_sigevent.sigev_value.sival_ptr; 36471885Sraf } 36480Sstevel@tonic-gate #else 36491885Sraf if (aio_port) { 36501885Sraf void *paddr = 36511885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 36521885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify))) 36531885Sraf error = EFAULT; 36541885Sraf } else { /* aio_thread */ 36551885Sraf pnotify.portnfy_port = 36561885Sraf aiocb->aio_sigevent.sigev_signo; 36571885Sraf pnotify.portnfy_user = 36581885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 36591885Sraf } 36600Sstevel@tonic-gate #endif 36611885Sraf if (error) 36621885Sraf /* EMPTY */; 36631885Sraf else if (pkevtp != NULL && 36641885Sraf pnotify.portnfy_port == lio_head_port) 36651885Sraf error = port_dup_event(pkevtp, &lpkevp, 36661885Sraf PORT_ALLOC_DEFAULT); 36671885Sraf else 36681885Sraf error = port_alloc_event(pnotify.portnfy_port, 36691885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 36701885Sraf &lpkevp); 36711885Sraf if (error == 0) { 36721885Sraf port_init_event(lpkevp, (uintptr_t)cbp, 36731885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 36741885Sraf aio_port_callback, reqp); 36751885Sraf lpkevp->portkev_events = event; 36761885Sraf reqp->aio_req_portkev = lpkevp; 36771885Sraf reqp->aio_req_port = pnotify.portnfy_port; 36781885Sraf } 36790Sstevel@tonic-gate } 36800Sstevel@tonic-gate 36810Sstevel@tonic-gate /* 36820Sstevel@tonic-gate * send the request to driver. 36830Sstevel@tonic-gate */ 36840Sstevel@tonic-gate if (error == 0) { 36850Sstevel@tonic-gate if (aiocb->aio_nbytes == 0) { 36860Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 36870Sstevel@tonic-gate aio_zerolen(reqp); 36880Sstevel@tonic-gate continue; 36890Sstevel@tonic-gate } 36900Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 36910Sstevel@tonic-gate CRED()); 36920Sstevel@tonic-gate } 36930Sstevel@tonic-gate 36940Sstevel@tonic-gate /* 36950Sstevel@tonic-gate * the fd's ref count is not decremented until the IO has 36960Sstevel@tonic-gate * completed unless there was an error. 36970Sstevel@tonic-gate */ 36980Sstevel@tonic-gate if (error) { 36990Sstevel@tonic-gate releasef(aiocb->aio_fildes); 37000Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 37010Sstevel@tonic-gate if (head) { 37020Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 37030Sstevel@tonic-gate head->lio_nent--; 37040Sstevel@tonic-gate head->lio_refcnt--; 37050Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 37060Sstevel@tonic-gate } 37070Sstevel@tonic-gate if (error == ENOTSUP) 37080Sstevel@tonic-gate aio_notsupported++; 37090Sstevel@tonic-gate else 37100Sstevel@tonic-gate aio_errors++; 37114502Spraks lio_set_error(reqp, portused); 37120Sstevel@tonic-gate } else { 37130Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 37140Sstevel@tonic-gate } 37150Sstevel@tonic-gate } 37160Sstevel@tonic-gate 37170Sstevel@tonic-gate if (aio_notsupported) { 37180Sstevel@tonic-gate error = ENOTSUP; 37190Sstevel@tonic-gate } else if (aio_errors) { 37200Sstevel@tonic-gate /* 37210Sstevel@tonic-gate * return EIO if any request failed 37220Sstevel@tonic-gate */ 37230Sstevel@tonic-gate error = EIO; 37240Sstevel@tonic-gate } 37250Sstevel@tonic-gate 37260Sstevel@tonic-gate if (mode_arg == LIO_WAIT) { 37270Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 37280Sstevel@tonic-gate while (head->lio_refcnt > 0) { 37290Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 37300Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 37310Sstevel@tonic-gate error = EINTR; 37320Sstevel@tonic-gate goto done; 37330Sstevel@tonic-gate } 37340Sstevel@tonic-gate } 37350Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 37360Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 37370Sstevel@tonic-gate } 37380Sstevel@tonic-gate 37390Sstevel@tonic-gate done: 37400Sstevel@tonic-gate kmem_free(cbplist, ssize); 37410Sstevel@tonic-gate if (deadhead) { 37420Sstevel@tonic-gate if (head->lio_sigqp) 37430Sstevel@tonic-gate kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 37441885Sraf if (head->lio_portkev) 37451885Sraf port_free_event(head->lio_portkev); 37460Sstevel@tonic-gate kmem_free(head, sizeof (aio_lio_t)); 37470Sstevel@tonic-gate } 37480Sstevel@tonic-gate return (error); 37490Sstevel@tonic-gate } 37500Sstevel@tonic-gate 37510Sstevel@tonic-gate 37520Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 37530Sstevel@tonic-gate void 37540Sstevel@tonic-gate aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 37550Sstevel@tonic-gate { 37560Sstevel@tonic-gate dest->aio_fildes = src->aio_fildes; 37570Sstevel@tonic-gate dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 37580Sstevel@tonic-gate dest->aio_nbytes = (size_t)src->aio_nbytes; 37590Sstevel@tonic-gate dest->aio_offset = (off_t)src->aio_offset; 37600Sstevel@tonic-gate dest->aio_reqprio = src->aio_reqprio; 37610Sstevel@tonic-gate dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 37620Sstevel@tonic-gate dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 37630Sstevel@tonic-gate 37640Sstevel@tonic-gate /* 37650Sstevel@tonic-gate * See comment in sigqueue32() on handling of 32-bit 37660Sstevel@tonic-gate * sigvals in a 64-bit kernel. 37670Sstevel@tonic-gate */ 37680Sstevel@tonic-gate dest->aio_sigevent.sigev_value.sival_int = 37690Sstevel@tonic-gate (int)src->aio_sigevent.sigev_value.sival_int; 37700Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 37710Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_function; 37720Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 37730Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 37740Sstevel@tonic-gate dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 37750Sstevel@tonic-gate dest->aio_lio_opcode = src->aio_lio_opcode; 37760Sstevel@tonic-gate dest->aio_state = src->aio_state; 37770Sstevel@tonic-gate dest->aio__pad[0] = src->aio__pad[0]; 37780Sstevel@tonic-gate } 37790Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 37800Sstevel@tonic-gate 37810Sstevel@tonic-gate /* 37820Sstevel@tonic-gate * aio_port_callback() is called just before the event is retrieved from the 37830Sstevel@tonic-gate * port. The task of this callback function is to finish the work of the 37840Sstevel@tonic-gate * transaction for the application, it means : 37850Sstevel@tonic-gate * - copyout transaction data to the application 37860Sstevel@tonic-gate * (this thread is running in the right process context) 37870Sstevel@tonic-gate * - keep trace of the transaction (update of counters). 37880Sstevel@tonic-gate * - free allocated buffers 37890Sstevel@tonic-gate * The aiocb pointer is the object element of the port_kevent_t structure. 37900Sstevel@tonic-gate * 37910Sstevel@tonic-gate * flag : 37920Sstevel@tonic-gate * PORT_CALLBACK_DEFAULT : do copyout and free resources 37930Sstevel@tonic-gate * PORT_CALLBACK_CLOSE : don't do copyout, free resources 37940Sstevel@tonic-gate */ 37950Sstevel@tonic-gate 37960Sstevel@tonic-gate /*ARGSUSED*/ 37970Sstevel@tonic-gate int 37980Sstevel@tonic-gate aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 37990Sstevel@tonic-gate { 38000Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 38010Sstevel@tonic-gate aio_req_t *reqp = arg; 38020Sstevel@tonic-gate struct iovec *iov; 38030Sstevel@tonic-gate struct buf *bp; 38040Sstevel@tonic-gate void *resultp; 38050Sstevel@tonic-gate 38060Sstevel@tonic-gate if (pid != curproc->p_pid) { 38070Sstevel@tonic-gate /* wrong proc !!, can not deliver data here ... */ 38080Sstevel@tonic-gate return (EACCES); 38090Sstevel@tonic-gate } 38100Sstevel@tonic-gate 38110Sstevel@tonic-gate mutex_enter(&aiop->aio_portq_mutex); 38120Sstevel@tonic-gate reqp->aio_req_portkev = NULL; 38130Sstevel@tonic-gate aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 38140Sstevel@tonic-gate mutex_exit(&aiop->aio_portq_mutex); 38150Sstevel@tonic-gate aphysio_unlock(reqp); /* unlock used pages */ 38160Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 38170Sstevel@tonic-gate if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 38180Sstevel@tonic-gate aio_req_free_port(aiop, reqp); /* back to free list */ 38190Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 38200Sstevel@tonic-gate return (0); 38210Sstevel@tonic-gate } 38220Sstevel@tonic-gate 38230Sstevel@tonic-gate iov = reqp->aio_req_uio.uio_iov; 38240Sstevel@tonic-gate bp = &reqp->aio_req_buf; 38250Sstevel@tonic-gate resultp = (void *)reqp->aio_req_resultp; 38260Sstevel@tonic-gate aio_req_free_port(aiop, reqp); /* request struct back to free list */ 38270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 38280Sstevel@tonic-gate if (flag == PORT_CALLBACK_DEFAULT) 38290Sstevel@tonic-gate aio_copyout_result_port(iov, bp, resultp); 38300Sstevel@tonic-gate return (0); 38310Sstevel@tonic-gate } 3832