10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51885Sraf * Common Development and Distribution License (the "License"). 61885Sraf * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 211885Sraf 220Sstevel@tonic-gate /* 23*4123Sdm120769 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate /* 300Sstevel@tonic-gate * Kernel asynchronous I/O. 310Sstevel@tonic-gate * This is only for raw devices now (as of Nov. 1993). 320Sstevel@tonic-gate */ 330Sstevel@tonic-gate 340Sstevel@tonic-gate #include <sys/types.h> 350Sstevel@tonic-gate #include <sys/errno.h> 360Sstevel@tonic-gate #include <sys/conf.h> 370Sstevel@tonic-gate #include <sys/file.h> 380Sstevel@tonic-gate #include <sys/fs/snode.h> 390Sstevel@tonic-gate #include <sys/unistd.h> 400Sstevel@tonic-gate #include <sys/cmn_err.h> 410Sstevel@tonic-gate #include <vm/as.h> 420Sstevel@tonic-gate #include <vm/faultcode.h> 430Sstevel@tonic-gate #include <sys/sysmacros.h> 440Sstevel@tonic-gate #include <sys/procfs.h> 450Sstevel@tonic-gate #include <sys/kmem.h> 460Sstevel@tonic-gate #include <sys/autoconf.h> 470Sstevel@tonic-gate #include <sys/ddi_impldefs.h> 480Sstevel@tonic-gate #include <sys/sunddi.h> 490Sstevel@tonic-gate #include <sys/aio_impl.h> 500Sstevel@tonic-gate #include <sys/debug.h> 510Sstevel@tonic-gate #include <sys/param.h> 520Sstevel@tonic-gate #include <sys/systm.h> 530Sstevel@tonic-gate #include <sys/vmsystm.h> 540Sstevel@tonic-gate #include <sys/fs/pxfs_ki.h> 550Sstevel@tonic-gate #include <sys/contract/process_impl.h> 560Sstevel@tonic-gate 570Sstevel@tonic-gate /* 580Sstevel@tonic-gate * external entry point. 590Sstevel@tonic-gate */ 600Sstevel@tonic-gate #ifdef _LP64 610Sstevel@tonic-gate static int64_t kaioc(long, long, long, long, long, long); 620Sstevel@tonic-gate #endif 630Sstevel@tonic-gate static int kaio(ulong_t *, rval_t *); 640Sstevel@tonic-gate 650Sstevel@tonic-gate 660Sstevel@tonic-gate #define AIO_64 0 670Sstevel@tonic-gate #define AIO_32 1 680Sstevel@tonic-gate #define AIO_LARGEFILE 2 690Sstevel@tonic-gate 700Sstevel@tonic-gate /* 710Sstevel@tonic-gate * implementation specific functions (private) 720Sstevel@tonic-gate */ 730Sstevel@tonic-gate #ifdef _LP64 741885Sraf static int alio(int, aiocb_t **, int, struct sigevent *); 750Sstevel@tonic-gate #endif 760Sstevel@tonic-gate static int aionotify(void); 770Sstevel@tonic-gate static int aioinit(void); 780Sstevel@tonic-gate static int aiostart(void); 790Sstevel@tonic-gate static void alio_cleanup(aio_t *, aiocb_t **, int, int); 800Sstevel@tonic-gate static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *, 810Sstevel@tonic-gate cred_t *); 820Sstevel@tonic-gate static void lio_set_error(aio_req_t *); 830Sstevel@tonic-gate static aio_t *aio_aiop_alloc(); 840Sstevel@tonic-gate static int aio_req_alloc(aio_req_t **, aio_result_t *); 850Sstevel@tonic-gate static int aio_lio_alloc(aio_lio_t **); 860Sstevel@tonic-gate static aio_req_t *aio_req_done(void *); 870Sstevel@tonic-gate static aio_req_t *aio_req_remove(aio_req_t *); 880Sstevel@tonic-gate static int aio_req_find(aio_result_t *, aio_req_t **); 890Sstevel@tonic-gate static int aio_hash_insert(struct aio_req_t *, aio_t *); 900Sstevel@tonic-gate static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *, 911885Sraf aio_result_t *, vnode_t *); 920Sstevel@tonic-gate static int aio_cleanup_thread(aio_t *); 930Sstevel@tonic-gate static aio_lio_t *aio_list_get(aio_result_t *); 940Sstevel@tonic-gate static void lio_set_uerror(void *, int); 950Sstevel@tonic-gate extern void aio_zerolen(aio_req_t *); 960Sstevel@tonic-gate static int aiowait(struct timeval *, int, long *); 970Sstevel@tonic-gate static int aiowaitn(void *, uint_t, uint_t *, timespec_t *); 980Sstevel@tonic-gate static int aio_unlock_requests(caddr_t iocblist, int iocb_index, 990Sstevel@tonic-gate aio_req_t *reqlist, aio_t *aiop, model_t model); 1000Sstevel@tonic-gate static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max); 1010Sstevel@tonic-gate static int aiosuspend(void *, int, struct timespec *, int, 1020Sstevel@tonic-gate long *, int); 1030Sstevel@tonic-gate static int aliowait(int, void *, int, void *, int); 1040Sstevel@tonic-gate static int aioerror(void *, int); 1050Sstevel@tonic-gate static int aio_cancel(int, void *, long *, int); 1060Sstevel@tonic-gate static int arw(int, int, char *, int, offset_t, aio_result_t *, int); 1070Sstevel@tonic-gate static int aiorw(int, void *, int, int); 1080Sstevel@tonic-gate 1090Sstevel@tonic-gate static int alioLF(int, void *, int, void *); 1101885Sraf static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *, 1111885Sraf aio_result_t *, vnode_t *); 1120Sstevel@tonic-gate static int alio32(int, void *, int, void *); 1130Sstevel@tonic-gate static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 1140Sstevel@tonic-gate static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p); 1150Sstevel@tonic-gate 1160Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1170Sstevel@tonic-gate static void aiocb_LFton(aiocb64_32_t *, aiocb_t *); 1180Sstevel@tonic-gate void aiocb_32ton(aiocb32_t *, aiocb_t *); 1190Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1200Sstevel@tonic-gate 1210Sstevel@tonic-gate /* 1220Sstevel@tonic-gate * implementation specific functions (external) 1230Sstevel@tonic-gate */ 1240Sstevel@tonic-gate void aio_req_free(aio_t *, aio_req_t *); 1250Sstevel@tonic-gate 1260Sstevel@tonic-gate /* 1270Sstevel@tonic-gate * Event Port framework 1280Sstevel@tonic-gate */ 1290Sstevel@tonic-gate 1300Sstevel@tonic-gate void aio_req_free_port(aio_t *, aio_req_t *); 1310Sstevel@tonic-gate static int aio_port_callback(void *, int *, pid_t, int, void *); 1320Sstevel@tonic-gate 1330Sstevel@tonic-gate /* 1340Sstevel@tonic-gate * This is the loadable module wrapper. 1350Sstevel@tonic-gate */ 1360Sstevel@tonic-gate #include <sys/modctl.h> 1370Sstevel@tonic-gate #include <sys/syscall.h> 1380Sstevel@tonic-gate 1390Sstevel@tonic-gate #ifdef _LP64 1400Sstevel@tonic-gate 1410Sstevel@tonic-gate static struct sysent kaio_sysent = { 1420Sstevel@tonic-gate 6, 1430Sstevel@tonic-gate SE_NOUNLOAD | SE_64RVAL | SE_ARGC, 1440Sstevel@tonic-gate (int (*)())kaioc 1450Sstevel@tonic-gate }; 1460Sstevel@tonic-gate 1470Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1480Sstevel@tonic-gate static struct sysent kaio_sysent32 = { 1490Sstevel@tonic-gate 7, 1500Sstevel@tonic-gate SE_NOUNLOAD | SE_64RVAL, 1510Sstevel@tonic-gate kaio 1520Sstevel@tonic-gate }; 1530Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1540Sstevel@tonic-gate 1550Sstevel@tonic-gate #else /* _LP64 */ 1560Sstevel@tonic-gate 1570Sstevel@tonic-gate static struct sysent kaio_sysent = { 1580Sstevel@tonic-gate 7, 1590Sstevel@tonic-gate SE_NOUNLOAD | SE_32RVAL1, 1600Sstevel@tonic-gate kaio 1610Sstevel@tonic-gate }; 1620Sstevel@tonic-gate 1630Sstevel@tonic-gate #endif /* _LP64 */ 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate /* 1660Sstevel@tonic-gate * Module linkage information for the kernel. 1670Sstevel@tonic-gate */ 1680Sstevel@tonic-gate 1690Sstevel@tonic-gate static struct modlsys modlsys = { 1700Sstevel@tonic-gate &mod_syscallops, 1710Sstevel@tonic-gate "kernel Async I/O", 1720Sstevel@tonic-gate &kaio_sysent 1730Sstevel@tonic-gate }; 1740Sstevel@tonic-gate 1750Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1760Sstevel@tonic-gate static struct modlsys modlsys32 = { 1770Sstevel@tonic-gate &mod_syscallops32, 1780Sstevel@tonic-gate "kernel Async I/O for 32 bit compatibility", 1790Sstevel@tonic-gate &kaio_sysent32 1800Sstevel@tonic-gate }; 1810Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate 1840Sstevel@tonic-gate static struct modlinkage modlinkage = { 1850Sstevel@tonic-gate MODREV_1, 1860Sstevel@tonic-gate &modlsys, 1870Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 1880Sstevel@tonic-gate &modlsys32, 1890Sstevel@tonic-gate #endif 1900Sstevel@tonic-gate NULL 1910Sstevel@tonic-gate }; 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate int 1940Sstevel@tonic-gate _init(void) 1950Sstevel@tonic-gate { 1960Sstevel@tonic-gate int retval; 1970Sstevel@tonic-gate 1980Sstevel@tonic-gate if ((retval = mod_install(&modlinkage)) != 0) 1990Sstevel@tonic-gate return (retval); 2000Sstevel@tonic-gate 2010Sstevel@tonic-gate return (0); 2020Sstevel@tonic-gate } 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate int 2050Sstevel@tonic-gate _fini(void) 2060Sstevel@tonic-gate { 2070Sstevel@tonic-gate int retval; 2080Sstevel@tonic-gate 2090Sstevel@tonic-gate retval = mod_remove(&modlinkage); 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate return (retval); 2120Sstevel@tonic-gate } 2130Sstevel@tonic-gate 2140Sstevel@tonic-gate int 2150Sstevel@tonic-gate _info(struct modinfo *modinfop) 2160Sstevel@tonic-gate { 2170Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop)); 2180Sstevel@tonic-gate } 2190Sstevel@tonic-gate 2200Sstevel@tonic-gate #ifdef _LP64 2210Sstevel@tonic-gate static int64_t 2220Sstevel@tonic-gate kaioc( 2230Sstevel@tonic-gate long a0, 2240Sstevel@tonic-gate long a1, 2250Sstevel@tonic-gate long a2, 2260Sstevel@tonic-gate long a3, 2270Sstevel@tonic-gate long a4, 2280Sstevel@tonic-gate long a5) 2290Sstevel@tonic-gate { 2300Sstevel@tonic-gate int error; 2310Sstevel@tonic-gate long rval = 0; 2320Sstevel@tonic-gate 2330Sstevel@tonic-gate switch ((int)a0 & ~AIO_POLL_BIT) { 2340Sstevel@tonic-gate case AIOREAD: 2350Sstevel@tonic-gate error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 2360Sstevel@tonic-gate (offset_t)a4, (aio_result_t *)a5, FREAD); 2370Sstevel@tonic-gate break; 2380Sstevel@tonic-gate case AIOWRITE: 2390Sstevel@tonic-gate error = arw((int)a0, (int)a1, (char *)a2, (int)a3, 2400Sstevel@tonic-gate (offset_t)a4, (aio_result_t *)a5, FWRITE); 2410Sstevel@tonic-gate break; 2420Sstevel@tonic-gate case AIOWAIT: 2430Sstevel@tonic-gate error = aiowait((struct timeval *)a1, (int)a2, &rval); 2440Sstevel@tonic-gate break; 2450Sstevel@tonic-gate case AIOWAITN: 2460Sstevel@tonic-gate error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3, 2470Sstevel@tonic-gate (timespec_t *)a4); 2480Sstevel@tonic-gate break; 2490Sstevel@tonic-gate case AIONOTIFY: 2500Sstevel@tonic-gate error = aionotify(); 2510Sstevel@tonic-gate break; 2520Sstevel@tonic-gate case AIOINIT: 2530Sstevel@tonic-gate error = aioinit(); 2540Sstevel@tonic-gate break; 2550Sstevel@tonic-gate case AIOSTART: 2560Sstevel@tonic-gate error = aiostart(); 2570Sstevel@tonic-gate break; 2580Sstevel@tonic-gate case AIOLIO: 2591885Sraf error = alio((int)a1, (aiocb_t **)a2, (int)a3, 2600Sstevel@tonic-gate (struct sigevent *)a4); 2610Sstevel@tonic-gate break; 2620Sstevel@tonic-gate case AIOLIOWAIT: 2630Sstevel@tonic-gate error = aliowait((int)a1, (void *)a2, (int)a3, 2640Sstevel@tonic-gate (struct sigevent *)a4, AIO_64); 2650Sstevel@tonic-gate break; 2660Sstevel@tonic-gate case AIOSUSPEND: 2670Sstevel@tonic-gate error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3, 2680Sstevel@tonic-gate (int)a4, &rval, AIO_64); 2690Sstevel@tonic-gate break; 2700Sstevel@tonic-gate case AIOERROR: 2710Sstevel@tonic-gate error = aioerror((void *)a1, AIO_64); 2720Sstevel@tonic-gate break; 2730Sstevel@tonic-gate case AIOAREAD: 2740Sstevel@tonic-gate error = aiorw((int)a0, (void *)a1, FREAD, AIO_64); 2750Sstevel@tonic-gate break; 2760Sstevel@tonic-gate case AIOAWRITE: 2770Sstevel@tonic-gate error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64); 2780Sstevel@tonic-gate break; 2790Sstevel@tonic-gate case AIOCANCEL: 2800Sstevel@tonic-gate error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64); 2810Sstevel@tonic-gate break; 2820Sstevel@tonic-gate 2830Sstevel@tonic-gate /* 2840Sstevel@tonic-gate * The large file related stuff is valid only for 2850Sstevel@tonic-gate * 32 bit kernel and not for 64 bit kernel 2860Sstevel@tonic-gate * On 64 bit kernel we convert large file calls 2870Sstevel@tonic-gate * to regular 64bit calls. 2880Sstevel@tonic-gate */ 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate default: 2910Sstevel@tonic-gate error = EINVAL; 2920Sstevel@tonic-gate } 2930Sstevel@tonic-gate if (error) 2940Sstevel@tonic-gate return ((int64_t)set_errno(error)); 2950Sstevel@tonic-gate return (rval); 2960Sstevel@tonic-gate } 2970Sstevel@tonic-gate #endif 2980Sstevel@tonic-gate 2990Sstevel@tonic-gate static int 3000Sstevel@tonic-gate kaio( 3010Sstevel@tonic-gate ulong_t *uap, 3020Sstevel@tonic-gate rval_t *rvp) 3030Sstevel@tonic-gate { 3040Sstevel@tonic-gate long rval = 0; 3050Sstevel@tonic-gate int error = 0; 3060Sstevel@tonic-gate offset_t off; 3070Sstevel@tonic-gate 3080Sstevel@tonic-gate 3090Sstevel@tonic-gate rvp->r_vals = 0; 3100Sstevel@tonic-gate #if defined(_LITTLE_ENDIAN) 3110Sstevel@tonic-gate off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4]; 3120Sstevel@tonic-gate #else 3130Sstevel@tonic-gate off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5]; 3140Sstevel@tonic-gate #endif 3150Sstevel@tonic-gate 3160Sstevel@tonic-gate switch (uap[0] & ~AIO_POLL_BIT) { 3170Sstevel@tonic-gate /* 3180Sstevel@tonic-gate * It must be the 32 bit system call on 64 bit kernel 3190Sstevel@tonic-gate */ 3200Sstevel@tonic-gate case AIOREAD: 3210Sstevel@tonic-gate return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 3220Sstevel@tonic-gate (int)uap[3], off, (aio_result_t *)uap[6], FREAD)); 3230Sstevel@tonic-gate case AIOWRITE: 3240Sstevel@tonic-gate return (arw((int)uap[0], (int)uap[1], (char *)uap[2], 3250Sstevel@tonic-gate (int)uap[3], off, (aio_result_t *)uap[6], FWRITE)); 3260Sstevel@tonic-gate case AIOWAIT: 3270Sstevel@tonic-gate error = aiowait((struct timeval *)uap[1], (int)uap[2], 3280Sstevel@tonic-gate &rval); 3290Sstevel@tonic-gate break; 3300Sstevel@tonic-gate case AIOWAITN: 3310Sstevel@tonic-gate error = aiowaitn((void *)uap[1], (uint_t)uap[2], 3320Sstevel@tonic-gate (uint_t *)uap[3], (timespec_t *)uap[4]); 3330Sstevel@tonic-gate break; 3340Sstevel@tonic-gate case AIONOTIFY: 3350Sstevel@tonic-gate return (aionotify()); 3360Sstevel@tonic-gate case AIOINIT: 3370Sstevel@tonic-gate return (aioinit()); 3380Sstevel@tonic-gate case AIOSTART: 3390Sstevel@tonic-gate return (aiostart()); 3400Sstevel@tonic-gate case AIOLIO: 3410Sstevel@tonic-gate return (alio32((int)uap[1], (void *)uap[2], (int)uap[3], 3420Sstevel@tonic-gate (void *)uap[4])); 3430Sstevel@tonic-gate case AIOLIOWAIT: 3440Sstevel@tonic-gate return (aliowait((int)uap[1], (void *)uap[2], 3450Sstevel@tonic-gate (int)uap[3], (struct sigevent *)uap[4], AIO_32)); 3460Sstevel@tonic-gate case AIOSUSPEND: 3470Sstevel@tonic-gate error = aiosuspend((void *)uap[1], (int)uap[2], 3480Sstevel@tonic-gate (timespec_t *)uap[3], (int)uap[4], 3490Sstevel@tonic-gate &rval, AIO_32); 3500Sstevel@tonic-gate break; 3510Sstevel@tonic-gate case AIOERROR: 3520Sstevel@tonic-gate return (aioerror((void *)uap[1], AIO_32)); 3530Sstevel@tonic-gate case AIOAREAD: 3540Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], 3550Sstevel@tonic-gate FREAD, AIO_32)); 3560Sstevel@tonic-gate case AIOAWRITE: 3570Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], 3580Sstevel@tonic-gate FWRITE, AIO_32)); 3590Sstevel@tonic-gate case AIOCANCEL: 3600Sstevel@tonic-gate error = (aio_cancel((int)uap[1], (void *)uap[2], &rval, 3610Sstevel@tonic-gate AIO_32)); 3620Sstevel@tonic-gate break; 3630Sstevel@tonic-gate case AIOLIO64: 3640Sstevel@tonic-gate return (alioLF((int)uap[1], (void *)uap[2], 3650Sstevel@tonic-gate (int)uap[3], (void *)uap[4])); 3660Sstevel@tonic-gate case AIOLIOWAIT64: 3670Sstevel@tonic-gate return (aliowait(uap[1], (void *)uap[2], 3680Sstevel@tonic-gate (int)uap[3], (void *)uap[4], AIO_LARGEFILE)); 3690Sstevel@tonic-gate case AIOSUSPEND64: 3700Sstevel@tonic-gate error = aiosuspend((void *)uap[1], (int)uap[2], 3710Sstevel@tonic-gate (timespec_t *)uap[3], (int)uap[4], &rval, 3720Sstevel@tonic-gate AIO_LARGEFILE); 3730Sstevel@tonic-gate break; 3740Sstevel@tonic-gate case AIOERROR64: 3750Sstevel@tonic-gate return (aioerror((void *)uap[1], AIO_LARGEFILE)); 3760Sstevel@tonic-gate case AIOAREAD64: 3770Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], FREAD, 3780Sstevel@tonic-gate AIO_LARGEFILE)); 3790Sstevel@tonic-gate case AIOAWRITE64: 3800Sstevel@tonic-gate return (aiorw((int)uap[0], (void *)uap[1], FWRITE, 3810Sstevel@tonic-gate AIO_LARGEFILE)); 3820Sstevel@tonic-gate case AIOCANCEL64: 3830Sstevel@tonic-gate error = (aio_cancel((int)uap[1], (void *)uap[2], 3840Sstevel@tonic-gate &rval, AIO_LARGEFILE)); 3850Sstevel@tonic-gate break; 3860Sstevel@tonic-gate default: 3870Sstevel@tonic-gate return (EINVAL); 3880Sstevel@tonic-gate } 3890Sstevel@tonic-gate 3900Sstevel@tonic-gate rvp->r_val1 = rval; 3910Sstevel@tonic-gate return (error); 3920Sstevel@tonic-gate } 3930Sstevel@tonic-gate 3940Sstevel@tonic-gate /* 3950Sstevel@tonic-gate * wake up LWPs in this process that are sleeping in 3960Sstevel@tonic-gate * aiowait(). 3970Sstevel@tonic-gate */ 3980Sstevel@tonic-gate static int 3990Sstevel@tonic-gate aionotify(void) 4000Sstevel@tonic-gate { 4010Sstevel@tonic-gate aio_t *aiop; 4020Sstevel@tonic-gate 4030Sstevel@tonic-gate aiop = curproc->p_aio; 4040Sstevel@tonic-gate if (aiop == NULL) 4050Sstevel@tonic-gate return (0); 4060Sstevel@tonic-gate 4070Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 4080Sstevel@tonic-gate aiop->aio_notifycnt++; 4090Sstevel@tonic-gate cv_broadcast(&aiop->aio_waitcv); 4100Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 4110Sstevel@tonic-gate 4120Sstevel@tonic-gate return (0); 4130Sstevel@tonic-gate } 4140Sstevel@tonic-gate 4150Sstevel@tonic-gate static int 4160Sstevel@tonic-gate timeval2reltime(struct timeval *timout, timestruc_t *rqtime, 4170Sstevel@tonic-gate timestruc_t **rqtp, int *blocking) 4180Sstevel@tonic-gate { 4190Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 4200Sstevel@tonic-gate struct timeval32 wait_time_32; 4210Sstevel@tonic-gate #endif 4220Sstevel@tonic-gate struct timeval wait_time; 4230Sstevel@tonic-gate model_t model = get_udatamodel(); 4240Sstevel@tonic-gate 4250Sstevel@tonic-gate *rqtp = NULL; 4260Sstevel@tonic-gate if (timout == NULL) { /* wait indefinitely */ 4270Sstevel@tonic-gate *blocking = 1; 4280Sstevel@tonic-gate return (0); 4290Sstevel@tonic-gate } 4300Sstevel@tonic-gate 4310Sstevel@tonic-gate /* 4320Sstevel@tonic-gate * Need to correctly compare with the -1 passed in for a user 4330Sstevel@tonic-gate * address pointer, with both 32 bit and 64 bit apps. 4340Sstevel@tonic-gate */ 4350Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 4360Sstevel@tonic-gate if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */ 4370Sstevel@tonic-gate *blocking = 0; 4380Sstevel@tonic-gate return (0); 4390Sstevel@tonic-gate } 4400Sstevel@tonic-gate 4410Sstevel@tonic-gate if (copyin(timout, &wait_time, sizeof (wait_time))) 4420Sstevel@tonic-gate return (EFAULT); 4430Sstevel@tonic-gate } 4440Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 4450Sstevel@tonic-gate else { 4460Sstevel@tonic-gate /* 4470Sstevel@tonic-gate * -1 from a 32bit app. It will not get sign extended. 4480Sstevel@tonic-gate * don't wait if -1. 4490Sstevel@tonic-gate */ 4500Sstevel@tonic-gate if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) { 4510Sstevel@tonic-gate *blocking = 0; 4520Sstevel@tonic-gate return (0); 4530Sstevel@tonic-gate } 4540Sstevel@tonic-gate 4550Sstevel@tonic-gate if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 4560Sstevel@tonic-gate return (EFAULT); 4570Sstevel@tonic-gate TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32); 4580Sstevel@tonic-gate } 4590Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 4600Sstevel@tonic-gate 4610Sstevel@tonic-gate if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */ 4620Sstevel@tonic-gate *blocking = 0; 4630Sstevel@tonic-gate return (0); 4640Sstevel@tonic-gate } 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate if (wait_time.tv_sec < 0 || 4670Sstevel@tonic-gate wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC) 4680Sstevel@tonic-gate return (EINVAL); 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate rqtime->tv_sec = wait_time.tv_sec; 4710Sstevel@tonic-gate rqtime->tv_nsec = wait_time.tv_usec * 1000; 4720Sstevel@tonic-gate *rqtp = rqtime; 4730Sstevel@tonic-gate *blocking = 1; 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate return (0); 4760Sstevel@tonic-gate } 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate static int 4790Sstevel@tonic-gate timespec2reltime(timespec_t *timout, timestruc_t *rqtime, 4800Sstevel@tonic-gate timestruc_t **rqtp, int *blocking) 4810Sstevel@tonic-gate { 4820Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 4830Sstevel@tonic-gate timespec32_t wait_time_32; 4840Sstevel@tonic-gate #endif 4850Sstevel@tonic-gate model_t model = get_udatamodel(); 4860Sstevel@tonic-gate 4870Sstevel@tonic-gate *rqtp = NULL; 4880Sstevel@tonic-gate if (timout == NULL) { 4890Sstevel@tonic-gate *blocking = 1; 4900Sstevel@tonic-gate return (0); 4910Sstevel@tonic-gate } 4920Sstevel@tonic-gate 4930Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 4940Sstevel@tonic-gate if (copyin(timout, rqtime, sizeof (*rqtime))) 4950Sstevel@tonic-gate return (EFAULT); 4960Sstevel@tonic-gate } 4970Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 4980Sstevel@tonic-gate else { 4990Sstevel@tonic-gate if (copyin(timout, &wait_time_32, sizeof (wait_time_32))) 5000Sstevel@tonic-gate return (EFAULT); 5010Sstevel@tonic-gate TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 5020Sstevel@tonic-gate } 5030Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 5040Sstevel@tonic-gate 5050Sstevel@tonic-gate if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 5060Sstevel@tonic-gate *blocking = 0; 5070Sstevel@tonic-gate return (0); 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate if (rqtime->tv_sec < 0 || 5110Sstevel@tonic-gate rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 5120Sstevel@tonic-gate return (EINVAL); 5130Sstevel@tonic-gate 5140Sstevel@tonic-gate *rqtp = rqtime; 5150Sstevel@tonic-gate *blocking = 1; 5160Sstevel@tonic-gate 5170Sstevel@tonic-gate return (0); 5180Sstevel@tonic-gate } 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate /*ARGSUSED*/ 5210Sstevel@tonic-gate static int 5220Sstevel@tonic-gate aiowait( 5230Sstevel@tonic-gate struct timeval *timout, 5240Sstevel@tonic-gate int dontblockflg, 5250Sstevel@tonic-gate long *rval) 5260Sstevel@tonic-gate { 5270Sstevel@tonic-gate int error; 5280Sstevel@tonic-gate aio_t *aiop; 5290Sstevel@tonic-gate aio_req_t *reqp; 5300Sstevel@tonic-gate clock_t status; 5310Sstevel@tonic-gate int blocking; 532*4123Sdm120769 int timecheck; 5330Sstevel@tonic-gate timestruc_t rqtime; 5340Sstevel@tonic-gate timestruc_t *rqtp; 5350Sstevel@tonic-gate 5360Sstevel@tonic-gate aiop = curproc->p_aio; 5370Sstevel@tonic-gate if (aiop == NULL) 5380Sstevel@tonic-gate return (EINVAL); 5390Sstevel@tonic-gate 5400Sstevel@tonic-gate /* 5410Sstevel@tonic-gate * Establish the absolute future time for the timeout. 5420Sstevel@tonic-gate */ 5430Sstevel@tonic-gate error = timeval2reltime(timout, &rqtime, &rqtp, &blocking); 5440Sstevel@tonic-gate if (error) 5450Sstevel@tonic-gate return (error); 5460Sstevel@tonic-gate if (rqtp) { 5470Sstevel@tonic-gate timestruc_t now; 548*4123Sdm120769 timecheck = timechanged; 5490Sstevel@tonic-gate gethrestime(&now); 5500Sstevel@tonic-gate timespecadd(rqtp, &now); 5510Sstevel@tonic-gate } 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 5540Sstevel@tonic-gate for (;;) { 5550Sstevel@tonic-gate /* process requests on poll queue */ 5560Sstevel@tonic-gate if (aiop->aio_pollq) { 5570Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 5580Sstevel@tonic-gate aio_cleanup(0); 5590Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate if ((reqp = aio_req_remove(NULL)) != NULL) { 5620Sstevel@tonic-gate *rval = (long)reqp->aio_req_resultp; 5630Sstevel@tonic-gate break; 5640Sstevel@tonic-gate } 5650Sstevel@tonic-gate /* user-level done queue might not be empty */ 5660Sstevel@tonic-gate if (aiop->aio_notifycnt > 0) { 5670Sstevel@tonic-gate aiop->aio_notifycnt--; 5680Sstevel@tonic-gate *rval = 1; 5690Sstevel@tonic-gate break; 5700Sstevel@tonic-gate } 5710Sstevel@tonic-gate /* don't block if no outstanding aio */ 5720Sstevel@tonic-gate if (aiop->aio_outstanding == 0 && dontblockflg) { 5730Sstevel@tonic-gate error = EINVAL; 5740Sstevel@tonic-gate break; 5750Sstevel@tonic-gate } 5760Sstevel@tonic-gate if (blocking) { 5770Sstevel@tonic-gate status = cv_waituntil_sig(&aiop->aio_waitcv, 578*4123Sdm120769 &aiop->aio_mutex, rqtp, timecheck); 5790Sstevel@tonic-gate 5800Sstevel@tonic-gate if (status > 0) /* check done queue again */ 5810Sstevel@tonic-gate continue; 5820Sstevel@tonic-gate if (status == 0) { /* interrupted by a signal */ 5830Sstevel@tonic-gate error = EINTR; 5840Sstevel@tonic-gate *rval = -1; 5850Sstevel@tonic-gate } else { /* timer expired */ 5860Sstevel@tonic-gate error = ETIME; 5870Sstevel@tonic-gate } 5880Sstevel@tonic-gate } 5890Sstevel@tonic-gate break; 5900Sstevel@tonic-gate } 5910Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 5920Sstevel@tonic-gate if (reqp) { 5930Sstevel@tonic-gate aphysio_unlock(reqp); 5940Sstevel@tonic-gate aio_copyout_result(reqp); 5950Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 5960Sstevel@tonic-gate aio_req_free(aiop, reqp); 5970Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 5980Sstevel@tonic-gate } 5990Sstevel@tonic-gate return (error); 6000Sstevel@tonic-gate } 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate /* 6030Sstevel@tonic-gate * aiowaitn can be used to reap completed asynchronous requests submitted with 6040Sstevel@tonic-gate * lio_listio, aio_read or aio_write. 6050Sstevel@tonic-gate * This function only reaps asynchronous raw I/Os. 6060Sstevel@tonic-gate */ 6070Sstevel@tonic-gate 6080Sstevel@tonic-gate /*ARGSUSED*/ 6090Sstevel@tonic-gate static int 6100Sstevel@tonic-gate aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout) 6110Sstevel@tonic-gate { 6120Sstevel@tonic-gate int error = 0; 6130Sstevel@tonic-gate aio_t *aiop; 6140Sstevel@tonic-gate aio_req_t *reqlist = NULL; 6150Sstevel@tonic-gate caddr_t iocblist = NULL; /* array of iocb ptr's */ 6160Sstevel@tonic-gate uint_t waitcnt, cnt = 0; /* iocb cnt */ 6170Sstevel@tonic-gate size_t iocbsz; /* users iocb size */ 6180Sstevel@tonic-gate size_t riocbsz; /* returned iocb size */ 6190Sstevel@tonic-gate int iocb_index = 0; 6200Sstevel@tonic-gate model_t model = get_udatamodel(); 6210Sstevel@tonic-gate int blocking = 1; 622*4123Sdm120769 int timecheck; 6230Sstevel@tonic-gate timestruc_t rqtime; 6240Sstevel@tonic-gate timestruc_t *rqtp; 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate aiop = curproc->p_aio; 6270Sstevel@tonic-gate if (aiop == NULL) 6280Sstevel@tonic-gate return (EINVAL); 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate if (aiop->aio_outstanding == 0) 6310Sstevel@tonic-gate return (EAGAIN); 6320Sstevel@tonic-gate 6330Sstevel@tonic-gate if (copyin(nwait, &waitcnt, sizeof (uint_t))) 6340Sstevel@tonic-gate return (EFAULT); 6350Sstevel@tonic-gate 6360Sstevel@tonic-gate /* set *nwait to zero, if we must return prematurely */ 6370Sstevel@tonic-gate if (copyout(&cnt, nwait, sizeof (uint_t))) 6380Sstevel@tonic-gate return (EFAULT); 6390Sstevel@tonic-gate 6400Sstevel@tonic-gate if (waitcnt == 0) { 6410Sstevel@tonic-gate blocking = 0; 6420Sstevel@tonic-gate rqtp = NULL; 6430Sstevel@tonic-gate waitcnt = nent; 6440Sstevel@tonic-gate } else { 6450Sstevel@tonic-gate error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 6460Sstevel@tonic-gate if (error) 6470Sstevel@tonic-gate return (error); 6480Sstevel@tonic-gate } 6490Sstevel@tonic-gate 6500Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 6510Sstevel@tonic-gate iocbsz = (sizeof (aiocb_t *) * nent); 6520Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 6530Sstevel@tonic-gate else 6540Sstevel@tonic-gate iocbsz = (sizeof (caddr32_t) * nent); 6550Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 6560Sstevel@tonic-gate 6570Sstevel@tonic-gate /* 6580Sstevel@tonic-gate * Only one aio_waitn call is allowed at a time. 6590Sstevel@tonic-gate * The active aio_waitn will collect all requests 6600Sstevel@tonic-gate * out of the "done" list and if necessary it will wait 6610Sstevel@tonic-gate * for some/all pending requests to fulfill the nwait 6620Sstevel@tonic-gate * parameter. 6630Sstevel@tonic-gate * A second or further aio_waitn calls will sleep here 6640Sstevel@tonic-gate * until the active aio_waitn finishes and leaves the kernel 6650Sstevel@tonic-gate * If the second call does not block (poll), then return 6660Sstevel@tonic-gate * immediately with the error code : EAGAIN. 6670Sstevel@tonic-gate * If the second call should block, then sleep here, but 6680Sstevel@tonic-gate * do not touch the timeout. The timeout starts when this 6690Sstevel@tonic-gate * aio_waitn-call becomes active. 6700Sstevel@tonic-gate */ 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 6730Sstevel@tonic-gate 6740Sstevel@tonic-gate while (aiop->aio_flags & AIO_WAITN) { 6750Sstevel@tonic-gate if (blocking == 0) { 6760Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 6770Sstevel@tonic-gate return (EAGAIN); 6780Sstevel@tonic-gate } 6790Sstevel@tonic-gate 6800Sstevel@tonic-gate /* block, no timeout */ 6810Sstevel@tonic-gate aiop->aio_flags |= AIO_WAITN_PENDING; 6820Sstevel@tonic-gate if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) { 6830Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 6840Sstevel@tonic-gate return (EINTR); 6850Sstevel@tonic-gate } 6860Sstevel@tonic-gate } 6870Sstevel@tonic-gate 6880Sstevel@tonic-gate /* 6890Sstevel@tonic-gate * Establish the absolute future time for the timeout. 6900Sstevel@tonic-gate */ 6910Sstevel@tonic-gate if (rqtp) { 6920Sstevel@tonic-gate timestruc_t now; 693*4123Sdm120769 timecheck = timechanged; 6940Sstevel@tonic-gate gethrestime(&now); 6950Sstevel@tonic-gate timespecadd(rqtp, &now); 6960Sstevel@tonic-gate } 6970Sstevel@tonic-gate 6980Sstevel@tonic-gate if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) { 6990Sstevel@tonic-gate kmem_free(aiop->aio_iocb, aiop->aio_iocbsz); 7000Sstevel@tonic-gate aiop->aio_iocb = NULL; 7010Sstevel@tonic-gate } 7020Sstevel@tonic-gate 7030Sstevel@tonic-gate if (aiop->aio_iocb == NULL) { 7040Sstevel@tonic-gate iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP); 7050Sstevel@tonic-gate if (iocblist == NULL) { 7060Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 7070Sstevel@tonic-gate return (ENOMEM); 7080Sstevel@tonic-gate } 7090Sstevel@tonic-gate aiop->aio_iocb = (aiocb_t **)iocblist; 7100Sstevel@tonic-gate aiop->aio_iocbsz = iocbsz; 7110Sstevel@tonic-gate } else { 7120Sstevel@tonic-gate iocblist = (char *)aiop->aio_iocb; 7130Sstevel@tonic-gate } 7140Sstevel@tonic-gate 7150Sstevel@tonic-gate aiop->aio_waitncnt = waitcnt; 7160Sstevel@tonic-gate aiop->aio_flags |= AIO_WAITN; 7170Sstevel@tonic-gate 7180Sstevel@tonic-gate for (;;) { 7190Sstevel@tonic-gate /* push requests on poll queue to done queue */ 7200Sstevel@tonic-gate if (aiop->aio_pollq) { 7210Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 7220Sstevel@tonic-gate aio_cleanup(0); 7230Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 7240Sstevel@tonic-gate } 7250Sstevel@tonic-gate 7260Sstevel@tonic-gate /* check for requests on done queue */ 7270Sstevel@tonic-gate if (aiop->aio_doneq) { 7280Sstevel@tonic-gate cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt); 7290Sstevel@tonic-gate aiop->aio_waitncnt = waitcnt - cnt; 7300Sstevel@tonic-gate } 7310Sstevel@tonic-gate 7320Sstevel@tonic-gate /* user-level done queue might not be empty */ 7330Sstevel@tonic-gate if (aiop->aio_notifycnt > 0) { 7340Sstevel@tonic-gate aiop->aio_notifycnt--; 7350Sstevel@tonic-gate error = 0; 7360Sstevel@tonic-gate break; 7370Sstevel@tonic-gate } 7380Sstevel@tonic-gate 7390Sstevel@tonic-gate /* 7400Sstevel@tonic-gate * if we are here second time as a result of timer 7410Sstevel@tonic-gate * expiration, we reset error if there are enough 7420Sstevel@tonic-gate * aiocb's to satisfy request. 7430Sstevel@tonic-gate * We return also if all requests are already done 7440Sstevel@tonic-gate * and we picked up the whole done queue. 7450Sstevel@tonic-gate */ 7460Sstevel@tonic-gate 7470Sstevel@tonic-gate if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 && 7480Sstevel@tonic-gate aiop->aio_doneq == NULL)) { 7490Sstevel@tonic-gate error = 0; 7500Sstevel@tonic-gate break; 7510Sstevel@tonic-gate } 7520Sstevel@tonic-gate 7530Sstevel@tonic-gate if ((cnt < waitcnt) && blocking) { 7540Sstevel@tonic-gate int rval = cv_waituntil_sig(&aiop->aio_waitcv, 755*4123Sdm120769 &aiop->aio_mutex, rqtp, timecheck); 7560Sstevel@tonic-gate if (rval > 0) 7570Sstevel@tonic-gate continue; 7580Sstevel@tonic-gate if (rval < 0) { 7590Sstevel@tonic-gate error = ETIME; 7600Sstevel@tonic-gate blocking = 0; 7610Sstevel@tonic-gate continue; 7620Sstevel@tonic-gate } 7630Sstevel@tonic-gate error = EINTR; 7640Sstevel@tonic-gate } 7650Sstevel@tonic-gate break; 7660Sstevel@tonic-gate } 7670Sstevel@tonic-gate 7680Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 7690Sstevel@tonic-gate 7700Sstevel@tonic-gate if (cnt > 0) { 7710Sstevel@tonic-gate 7720Sstevel@tonic-gate iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist, 7730Sstevel@tonic-gate aiop, model); 7740Sstevel@tonic-gate 7750Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 7760Sstevel@tonic-gate riocbsz = (sizeof (aiocb_t *) * cnt); 7770Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 7780Sstevel@tonic-gate else 7790Sstevel@tonic-gate riocbsz = (sizeof (caddr32_t) * cnt); 7800Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 7810Sstevel@tonic-gate 7820Sstevel@tonic-gate if (copyout(iocblist, uiocb, riocbsz) || 7830Sstevel@tonic-gate copyout(&cnt, nwait, sizeof (uint_t))) 7840Sstevel@tonic-gate error = EFAULT; 7850Sstevel@tonic-gate } 7860Sstevel@tonic-gate 7870Sstevel@tonic-gate if (aiop->aio_iocbsz > AIO_IOCB_MAX) { 7880Sstevel@tonic-gate kmem_free(iocblist, aiop->aio_iocbsz); 7890Sstevel@tonic-gate aiop->aio_iocb = NULL; 7900Sstevel@tonic-gate } 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate /* check if there is another thread waiting for execution */ 7930Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 7940Sstevel@tonic-gate aiop->aio_flags &= ~AIO_WAITN; 7950Sstevel@tonic-gate if (aiop->aio_flags & AIO_WAITN_PENDING) { 7960Sstevel@tonic-gate aiop->aio_flags &= ~AIO_WAITN_PENDING; 7970Sstevel@tonic-gate cv_signal(&aiop->aio_waitncv); 7980Sstevel@tonic-gate } 7990Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 8000Sstevel@tonic-gate 8010Sstevel@tonic-gate return (error); 8020Sstevel@tonic-gate } 8030Sstevel@tonic-gate 8040Sstevel@tonic-gate /* 8050Sstevel@tonic-gate * aio_unlock_requests 8060Sstevel@tonic-gate * copyouts the result of the request as well as the return value. 8070Sstevel@tonic-gate * It builds the list of completed asynchronous requests, 8080Sstevel@tonic-gate * unlocks the allocated memory ranges and 8090Sstevel@tonic-gate * put the aio request structure back into the free list. 8100Sstevel@tonic-gate */ 8110Sstevel@tonic-gate 8120Sstevel@tonic-gate static int 8130Sstevel@tonic-gate aio_unlock_requests( 8140Sstevel@tonic-gate caddr_t iocblist, 8150Sstevel@tonic-gate int iocb_index, 8160Sstevel@tonic-gate aio_req_t *reqlist, 8170Sstevel@tonic-gate aio_t *aiop, 8180Sstevel@tonic-gate model_t model) 8190Sstevel@tonic-gate { 8200Sstevel@tonic-gate aio_req_t *reqp, *nreqp; 8210Sstevel@tonic-gate 8220Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 8230Sstevel@tonic-gate for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 8240Sstevel@tonic-gate (((caddr_t *)iocblist)[iocb_index++]) = 8250Sstevel@tonic-gate reqp->aio_req_iocb.iocb; 8260Sstevel@tonic-gate nreqp = reqp->aio_req_next; 8270Sstevel@tonic-gate aphysio_unlock(reqp); 8280Sstevel@tonic-gate aio_copyout_result(reqp); 8290Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 8300Sstevel@tonic-gate aio_req_free(aiop, reqp); 8310Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 8320Sstevel@tonic-gate } 8330Sstevel@tonic-gate } 8340Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 8350Sstevel@tonic-gate else { 8360Sstevel@tonic-gate for (reqp = reqlist; reqp != NULL; reqp = nreqp) { 8370Sstevel@tonic-gate ((caddr32_t *)iocblist)[iocb_index++] = 8380Sstevel@tonic-gate reqp->aio_req_iocb.iocb32; 8390Sstevel@tonic-gate nreqp = reqp->aio_req_next; 8400Sstevel@tonic-gate aphysio_unlock(reqp); 8410Sstevel@tonic-gate aio_copyout_result(reqp); 8420Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 8430Sstevel@tonic-gate aio_req_free(aiop, reqp); 8440Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 8450Sstevel@tonic-gate } 8460Sstevel@tonic-gate } 8470Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 8480Sstevel@tonic-gate return (iocb_index); 8490Sstevel@tonic-gate } 8500Sstevel@tonic-gate 8510Sstevel@tonic-gate /* 8520Sstevel@tonic-gate * aio_reqlist_concat 8530Sstevel@tonic-gate * moves "max" elements from the done queue to the reqlist queue and removes 8540Sstevel@tonic-gate * the AIO_DONEQ flag. 8550Sstevel@tonic-gate * - reqlist queue is a simple linked list 8560Sstevel@tonic-gate * - done queue is a double linked list 8570Sstevel@tonic-gate */ 8580Sstevel@tonic-gate 8590Sstevel@tonic-gate static int 8600Sstevel@tonic-gate aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max) 8610Sstevel@tonic-gate { 8620Sstevel@tonic-gate aio_req_t *q2, *q2work, *list; 8630Sstevel@tonic-gate int count = 0; 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate list = *reqlist; 8660Sstevel@tonic-gate q2 = aiop->aio_doneq; 8670Sstevel@tonic-gate q2work = q2; 8680Sstevel@tonic-gate while (max-- > 0) { 8690Sstevel@tonic-gate q2work->aio_req_flags &= ~AIO_DONEQ; 8700Sstevel@tonic-gate q2work = q2work->aio_req_next; 8710Sstevel@tonic-gate count++; 8720Sstevel@tonic-gate if (q2work == q2) 8730Sstevel@tonic-gate break; 8740Sstevel@tonic-gate } 8750Sstevel@tonic-gate 8760Sstevel@tonic-gate if (q2work == q2) { 8770Sstevel@tonic-gate /* all elements revised */ 8780Sstevel@tonic-gate q2->aio_req_prev->aio_req_next = list; 8790Sstevel@tonic-gate list = q2; 8800Sstevel@tonic-gate aiop->aio_doneq = NULL; 8810Sstevel@tonic-gate } else { 8820Sstevel@tonic-gate /* 8830Sstevel@tonic-gate * max < elements in the doneq 8840Sstevel@tonic-gate * detach only the required amount of elements 8850Sstevel@tonic-gate * out of the doneq 8860Sstevel@tonic-gate */ 8870Sstevel@tonic-gate q2work->aio_req_prev->aio_req_next = list; 8880Sstevel@tonic-gate list = q2; 8890Sstevel@tonic-gate 8900Sstevel@tonic-gate aiop->aio_doneq = q2work; 8910Sstevel@tonic-gate q2work->aio_req_prev = q2->aio_req_prev; 8920Sstevel@tonic-gate q2->aio_req_prev->aio_req_next = q2work; 8930Sstevel@tonic-gate } 8940Sstevel@tonic-gate *reqlist = list; 8950Sstevel@tonic-gate return (count); 8960Sstevel@tonic-gate } 8970Sstevel@tonic-gate 8980Sstevel@tonic-gate /*ARGSUSED*/ 8990Sstevel@tonic-gate static int 9000Sstevel@tonic-gate aiosuspend( 9010Sstevel@tonic-gate void *aiocb, 9020Sstevel@tonic-gate int nent, 9030Sstevel@tonic-gate struct timespec *timout, 9040Sstevel@tonic-gate int flag, 9050Sstevel@tonic-gate long *rval, 9060Sstevel@tonic-gate int run_mode) 9070Sstevel@tonic-gate { 9080Sstevel@tonic-gate int error; 9090Sstevel@tonic-gate aio_t *aiop; 9100Sstevel@tonic-gate aio_req_t *reqp, *found, *next; 9110Sstevel@tonic-gate caddr_t cbplist = NULL; 9120Sstevel@tonic-gate aiocb_t *cbp, **ucbp; 9130Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 9140Sstevel@tonic-gate aiocb32_t *cbp32; 9150Sstevel@tonic-gate caddr32_t *ucbp32; 9160Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 9170Sstevel@tonic-gate aiocb64_32_t *cbp64; 9180Sstevel@tonic-gate int rv; 9190Sstevel@tonic-gate int i; 9200Sstevel@tonic-gate size_t ssize; 9210Sstevel@tonic-gate model_t model = get_udatamodel(); 9220Sstevel@tonic-gate int blocking; 923*4123Sdm120769 int timecheck; 9240Sstevel@tonic-gate timestruc_t rqtime; 9250Sstevel@tonic-gate timestruc_t *rqtp; 9260Sstevel@tonic-gate 9270Sstevel@tonic-gate aiop = curproc->p_aio; 9280Sstevel@tonic-gate if (aiop == NULL || nent <= 0) 9290Sstevel@tonic-gate return (EINVAL); 9300Sstevel@tonic-gate 9310Sstevel@tonic-gate /* 9320Sstevel@tonic-gate * Establish the absolute future time for the timeout. 9330Sstevel@tonic-gate */ 9340Sstevel@tonic-gate error = timespec2reltime(timout, &rqtime, &rqtp, &blocking); 9350Sstevel@tonic-gate if (error) 9360Sstevel@tonic-gate return (error); 9370Sstevel@tonic-gate if (rqtp) { 9380Sstevel@tonic-gate timestruc_t now; 939*4123Sdm120769 timecheck = timechanged; 9400Sstevel@tonic-gate gethrestime(&now); 9410Sstevel@tonic-gate timespecadd(rqtp, &now); 9420Sstevel@tonic-gate } 9430Sstevel@tonic-gate 9440Sstevel@tonic-gate /* 9450Sstevel@tonic-gate * If we are not blocking and there's no IO complete 9460Sstevel@tonic-gate * skip aiocb copyin. 9470Sstevel@tonic-gate */ 9480Sstevel@tonic-gate if (!blocking && (aiop->aio_pollq == NULL) && 9490Sstevel@tonic-gate (aiop->aio_doneq == NULL)) { 9500Sstevel@tonic-gate return (EAGAIN); 9510Sstevel@tonic-gate } 9520Sstevel@tonic-gate 9530Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 9540Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent); 9550Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 9560Sstevel@tonic-gate else 9570Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent); 9580Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 9590Sstevel@tonic-gate 9600Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_NOSLEEP); 9610Sstevel@tonic-gate if (cbplist == NULL) 9620Sstevel@tonic-gate return (ENOMEM); 9630Sstevel@tonic-gate 9640Sstevel@tonic-gate if (copyin(aiocb, cbplist, ssize)) { 9650Sstevel@tonic-gate error = EFAULT; 9660Sstevel@tonic-gate goto done; 9670Sstevel@tonic-gate } 9680Sstevel@tonic-gate 9690Sstevel@tonic-gate found = NULL; 9700Sstevel@tonic-gate /* 9710Sstevel@tonic-gate * we need to get the aio_cleanupq_mutex since we call 9720Sstevel@tonic-gate * aio_req_done(). 9730Sstevel@tonic-gate */ 9740Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 9750Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 9760Sstevel@tonic-gate for (;;) { 9770Sstevel@tonic-gate /* push requests on poll queue to done queue */ 9780Sstevel@tonic-gate if (aiop->aio_pollq) { 9790Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 9800Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 9810Sstevel@tonic-gate aio_cleanup(0); 9820Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 9830Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 9840Sstevel@tonic-gate } 9850Sstevel@tonic-gate /* check for requests on done queue */ 9860Sstevel@tonic-gate if (aiop->aio_doneq) { 9870Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 9880Sstevel@tonic-gate ucbp = (aiocb_t **)cbplist; 9890Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 9900Sstevel@tonic-gate else 9910Sstevel@tonic-gate ucbp32 = (caddr32_t *)cbplist; 9920Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 9930Sstevel@tonic-gate for (i = 0; i < nent; i++) { 9940Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 9950Sstevel@tonic-gate if ((cbp = *ucbp++) == NULL) 9960Sstevel@tonic-gate continue; 9970Sstevel@tonic-gate if (run_mode != AIO_LARGEFILE) 9980Sstevel@tonic-gate reqp = aio_req_done( 9990Sstevel@tonic-gate &cbp->aio_resultp); 10000Sstevel@tonic-gate else { 10010Sstevel@tonic-gate cbp64 = (aiocb64_32_t *)cbp; 10020Sstevel@tonic-gate reqp = aio_req_done( 10030Sstevel@tonic-gate &cbp64->aio_resultp); 10040Sstevel@tonic-gate } 10050Sstevel@tonic-gate } 10060Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 10070Sstevel@tonic-gate else { 10080Sstevel@tonic-gate if (run_mode == AIO_32) { 10090Sstevel@tonic-gate if ((cbp32 = 10100Sstevel@tonic-gate (aiocb32_t *)(uintptr_t) 10110Sstevel@tonic-gate *ucbp32++) == NULL) 10120Sstevel@tonic-gate continue; 10130Sstevel@tonic-gate reqp = aio_req_done( 10140Sstevel@tonic-gate &cbp32->aio_resultp); 10150Sstevel@tonic-gate } else if (run_mode == AIO_LARGEFILE) { 10160Sstevel@tonic-gate if ((cbp64 = 10170Sstevel@tonic-gate (aiocb64_32_t *)(uintptr_t) 10180Sstevel@tonic-gate *ucbp32++) == NULL) 10190Sstevel@tonic-gate continue; 10200Sstevel@tonic-gate reqp = aio_req_done( 10210Sstevel@tonic-gate &cbp64->aio_resultp); 10220Sstevel@tonic-gate } 10230Sstevel@tonic-gate 10240Sstevel@tonic-gate } 10250Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 10260Sstevel@tonic-gate if (reqp) { 10270Sstevel@tonic-gate reqp->aio_req_next = found; 10280Sstevel@tonic-gate found = reqp; 10290Sstevel@tonic-gate } 10300Sstevel@tonic-gate if (aiop->aio_doneq == NULL) 10310Sstevel@tonic-gate break; 10320Sstevel@tonic-gate } 10330Sstevel@tonic-gate if (found) 10340Sstevel@tonic-gate break; 10350Sstevel@tonic-gate } 10360Sstevel@tonic-gate if (aiop->aio_notifycnt > 0) { 10370Sstevel@tonic-gate /* 10380Sstevel@tonic-gate * nothing on the kernel's queue. the user 10390Sstevel@tonic-gate * has notified the kernel that it has items 10400Sstevel@tonic-gate * on a user-level queue. 10410Sstevel@tonic-gate */ 10420Sstevel@tonic-gate aiop->aio_notifycnt--; 10430Sstevel@tonic-gate *rval = 1; 10440Sstevel@tonic-gate error = 0; 10450Sstevel@tonic-gate break; 10460Sstevel@tonic-gate } 10470Sstevel@tonic-gate /* don't block if nothing is outstanding */ 10480Sstevel@tonic-gate if (aiop->aio_outstanding == 0) { 10490Sstevel@tonic-gate error = EAGAIN; 10500Sstevel@tonic-gate break; 10510Sstevel@tonic-gate } 10520Sstevel@tonic-gate if (blocking) { 10530Sstevel@tonic-gate /* 10540Sstevel@tonic-gate * drop the aio_cleanupq_mutex as we are 10550Sstevel@tonic-gate * going to block. 10560Sstevel@tonic-gate */ 10570Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 10580Sstevel@tonic-gate rv = cv_waituntil_sig(&aiop->aio_waitcv, 1059*4123Sdm120769 &aiop->aio_mutex, rqtp, timecheck); 10600Sstevel@tonic-gate /* 10610Sstevel@tonic-gate * we have to drop aio_mutex and 10620Sstevel@tonic-gate * grab it in the right order. 10630Sstevel@tonic-gate */ 10640Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 10650Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 10660Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 10670Sstevel@tonic-gate if (rv > 0) /* check done queue again */ 10680Sstevel@tonic-gate continue; 10690Sstevel@tonic-gate if (rv == 0) /* interrupted by a signal */ 10700Sstevel@tonic-gate error = EINTR; 10710Sstevel@tonic-gate else /* timer expired */ 10720Sstevel@tonic-gate error = ETIME; 10730Sstevel@tonic-gate } else { 10740Sstevel@tonic-gate error = EAGAIN; 10750Sstevel@tonic-gate } 10760Sstevel@tonic-gate break; 10770Sstevel@tonic-gate } 10780Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 10790Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 10800Sstevel@tonic-gate for (reqp = found; reqp != NULL; reqp = next) { 10810Sstevel@tonic-gate next = reqp->aio_req_next; 10820Sstevel@tonic-gate aphysio_unlock(reqp); 10830Sstevel@tonic-gate aio_copyout_result(reqp); 10840Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 10850Sstevel@tonic-gate aio_req_free(aiop, reqp); 10860Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 10870Sstevel@tonic-gate } 10880Sstevel@tonic-gate done: 10890Sstevel@tonic-gate kmem_free(cbplist, ssize); 10900Sstevel@tonic-gate return (error); 10910Sstevel@tonic-gate } 10920Sstevel@tonic-gate 10930Sstevel@tonic-gate /* 10940Sstevel@tonic-gate * initialize aio by allocating an aio_t struct for this 10950Sstevel@tonic-gate * process. 10960Sstevel@tonic-gate */ 10970Sstevel@tonic-gate static int 10980Sstevel@tonic-gate aioinit(void) 10990Sstevel@tonic-gate { 11000Sstevel@tonic-gate proc_t *p = curproc; 11010Sstevel@tonic-gate aio_t *aiop; 11020Sstevel@tonic-gate mutex_enter(&p->p_lock); 11030Sstevel@tonic-gate if ((aiop = p->p_aio) == NULL) { 11040Sstevel@tonic-gate aiop = aio_aiop_alloc(); 11050Sstevel@tonic-gate p->p_aio = aiop; 11060Sstevel@tonic-gate } 11070Sstevel@tonic-gate mutex_exit(&p->p_lock); 11080Sstevel@tonic-gate if (aiop == NULL) 11090Sstevel@tonic-gate return (ENOMEM); 11100Sstevel@tonic-gate return (0); 11110Sstevel@tonic-gate } 11120Sstevel@tonic-gate 11130Sstevel@tonic-gate /* 11140Sstevel@tonic-gate * start a special thread that will cleanup after aio requests 11150Sstevel@tonic-gate * that are preventing a segment from being unmapped. as_unmap() 11160Sstevel@tonic-gate * blocks until all phsyio to this segment is completed. this 11170Sstevel@tonic-gate * doesn't happen until all the pages in this segment are not 11180Sstevel@tonic-gate * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio 11190Sstevel@tonic-gate * requests still outstanding. this special thread will make sure 11200Sstevel@tonic-gate * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed. 11210Sstevel@tonic-gate * 11220Sstevel@tonic-gate * this function will return an error if the process has only 11230Sstevel@tonic-gate * one LWP. the assumption is that the caller is a separate LWP 11240Sstevel@tonic-gate * that remains blocked in the kernel for the life of this process. 11250Sstevel@tonic-gate */ 11260Sstevel@tonic-gate static int 11270Sstevel@tonic-gate aiostart(void) 11280Sstevel@tonic-gate { 11290Sstevel@tonic-gate proc_t *p = curproc; 11300Sstevel@tonic-gate aio_t *aiop; 11310Sstevel@tonic-gate int first, error = 0; 11320Sstevel@tonic-gate 11330Sstevel@tonic-gate if (p->p_lwpcnt == 1) 11340Sstevel@tonic-gate return (EDEADLK); 11350Sstevel@tonic-gate mutex_enter(&p->p_lock); 11360Sstevel@tonic-gate if ((aiop = p->p_aio) == NULL) 11370Sstevel@tonic-gate error = EINVAL; 11380Sstevel@tonic-gate else { 11390Sstevel@tonic-gate first = aiop->aio_ok; 11400Sstevel@tonic-gate if (aiop->aio_ok == 0) 11410Sstevel@tonic-gate aiop->aio_ok = 1; 11420Sstevel@tonic-gate } 11430Sstevel@tonic-gate mutex_exit(&p->p_lock); 11440Sstevel@tonic-gate if (error == 0 && first == 0) { 11450Sstevel@tonic-gate return (aio_cleanup_thread(aiop)); 11460Sstevel@tonic-gate /* should return only to exit */ 11470Sstevel@tonic-gate } 11480Sstevel@tonic-gate return (error); 11490Sstevel@tonic-gate } 11500Sstevel@tonic-gate 11510Sstevel@tonic-gate /* 11520Sstevel@tonic-gate * Associate an aiocb with a port. 11530Sstevel@tonic-gate * This function is used by aiorw() to associate a transaction with a port. 11540Sstevel@tonic-gate * Allocate an event port structure (port_alloc_event()) and store the 11550Sstevel@tonic-gate * delivered user pointer (portnfy_user) in the portkev_user field of the 11560Sstevel@tonic-gate * port_kevent_t structure.. 11570Sstevel@tonic-gate * The aio_req_portkev pointer in the aio_req_t structure was added to identify 11580Sstevel@tonic-gate * the port association. 11590Sstevel@tonic-gate */ 11600Sstevel@tonic-gate 11610Sstevel@tonic-gate static int 11621885Sraf aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp, 11631885Sraf aio_req_t *reqp, int event) 11640Sstevel@tonic-gate { 11650Sstevel@tonic-gate port_kevent_t *pkevp = NULL; 11660Sstevel@tonic-gate int error; 11670Sstevel@tonic-gate 11680Sstevel@tonic-gate error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT, 11690Sstevel@tonic-gate PORT_SOURCE_AIO, &pkevp); 11700Sstevel@tonic-gate if (error) { 11710Sstevel@tonic-gate if ((error == ENOMEM) || (error == EAGAIN)) 11720Sstevel@tonic-gate error = EAGAIN; 11730Sstevel@tonic-gate else 11740Sstevel@tonic-gate error = EINVAL; 11750Sstevel@tonic-gate } else { 11760Sstevel@tonic-gate port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user, 11770Sstevel@tonic-gate aio_port_callback, reqp); 11781885Sraf pkevp->portkev_events = event; 11790Sstevel@tonic-gate reqp->aio_req_portkev = pkevp; 11800Sstevel@tonic-gate reqp->aio_req_port = pntfy->portnfy_port; 11810Sstevel@tonic-gate } 11820Sstevel@tonic-gate return (error); 11830Sstevel@tonic-gate } 11840Sstevel@tonic-gate 11850Sstevel@tonic-gate #ifdef _LP64 11860Sstevel@tonic-gate 11870Sstevel@tonic-gate /* 11880Sstevel@tonic-gate * Asynchronous list IO. A chain of aiocb's are copied in 11890Sstevel@tonic-gate * one at a time. If the aiocb is invalid, it is skipped. 11900Sstevel@tonic-gate * For each aiocb, the appropriate driver entry point is 11910Sstevel@tonic-gate * called. Optimize for the common case where the list 11920Sstevel@tonic-gate * of requests is to the same file descriptor. 11930Sstevel@tonic-gate * 11940Sstevel@tonic-gate * One possible optimization is to define a new driver entry 11950Sstevel@tonic-gate * point that supports a list of IO requests. Whether this 11960Sstevel@tonic-gate * improves performance depends somewhat on the driver's 11970Sstevel@tonic-gate * locking strategy. Processing a list could adversely impact 11980Sstevel@tonic-gate * the driver's interrupt latency. 11990Sstevel@tonic-gate */ 12000Sstevel@tonic-gate static int 12010Sstevel@tonic-gate alio( 12021885Sraf int mode_arg, 12031885Sraf aiocb_t **aiocb_arg, 12041885Sraf int nent, 12051885Sraf struct sigevent *sigev) 12060Sstevel@tonic-gate { 12070Sstevel@tonic-gate file_t *fp; 12080Sstevel@tonic-gate file_t *prev_fp = NULL; 12090Sstevel@tonic-gate int prev_mode = -1; 12100Sstevel@tonic-gate struct vnode *vp; 12110Sstevel@tonic-gate aio_lio_t *head; 12120Sstevel@tonic-gate aio_req_t *reqp; 12130Sstevel@tonic-gate aio_t *aiop; 12140Sstevel@tonic-gate caddr_t cbplist; 12150Sstevel@tonic-gate aiocb_t cb; 12160Sstevel@tonic-gate aiocb_t *aiocb = &cb; 12171885Sraf aiocb_t *cbp; 12181885Sraf aiocb_t **ucbp; 12190Sstevel@tonic-gate struct sigevent sigevk; 12200Sstevel@tonic-gate sigqueue_t *sqp; 12210Sstevel@tonic-gate int (*aio_func)(); 12220Sstevel@tonic-gate int mode; 12230Sstevel@tonic-gate int error = 0; 12240Sstevel@tonic-gate int aio_errors = 0; 12250Sstevel@tonic-gate int i; 12260Sstevel@tonic-gate size_t ssize; 12270Sstevel@tonic-gate int deadhead = 0; 12280Sstevel@tonic-gate int aio_notsupported = 0; 12291885Sraf int lio_head_port; 12301885Sraf int aio_port; 12311885Sraf int aio_thread; 12320Sstevel@tonic-gate port_kevent_t *pkevtp = NULL; 12330Sstevel@tonic-gate port_notify_t pnotify; 12341885Sraf int event; 12350Sstevel@tonic-gate 12360Sstevel@tonic-gate aiop = curproc->p_aio; 12370Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 12380Sstevel@tonic-gate return (EINVAL); 12390Sstevel@tonic-gate 12400Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent); 12410Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP); 12420Sstevel@tonic-gate ucbp = (aiocb_t **)cbplist; 12430Sstevel@tonic-gate 12441885Sraf if (copyin(aiocb_arg, cbplist, ssize) || 12451885Sraf (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) { 12460Sstevel@tonic-gate kmem_free(cbplist, ssize); 12470Sstevel@tonic-gate return (EFAULT); 12480Sstevel@tonic-gate } 12490Sstevel@tonic-gate 12501885Sraf /* Event Ports */ 12511885Sraf if (sigev && 12521885Sraf (sigevk.sigev_notify == SIGEV_THREAD || 12531885Sraf sigevk.sigev_notify == SIGEV_PORT)) { 12541885Sraf if (sigevk.sigev_notify == SIGEV_THREAD) { 12551885Sraf pnotify.portnfy_port = sigevk.sigev_signo; 12561885Sraf pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 12571885Sraf } else if (copyin(sigevk.sigev_value.sival_ptr, 12581885Sraf &pnotify, sizeof (pnotify))) { 12590Sstevel@tonic-gate kmem_free(cbplist, ssize); 12600Sstevel@tonic-gate return (EFAULT); 12610Sstevel@tonic-gate } 12621885Sraf error = port_alloc_event(pnotify.portnfy_port, 12631885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 12641885Sraf if (error) { 12651885Sraf if (error == ENOMEM || error == EAGAIN) 12661885Sraf error = EAGAIN; 12671885Sraf else 12681885Sraf error = EINVAL; 12691885Sraf kmem_free(cbplist, ssize); 12701885Sraf return (error); 12711885Sraf } 12721885Sraf lio_head_port = pnotify.portnfy_port; 12730Sstevel@tonic-gate } 12740Sstevel@tonic-gate 12750Sstevel@tonic-gate /* 12760Sstevel@tonic-gate * a list head should be allocated if notification is 12770Sstevel@tonic-gate * enabled for this list. 12780Sstevel@tonic-gate */ 12790Sstevel@tonic-gate head = NULL; 12800Sstevel@tonic-gate 12811885Sraf if (mode_arg == LIO_WAIT || sigev) { 12820Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 12830Sstevel@tonic-gate error = aio_lio_alloc(&head); 12840Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 12850Sstevel@tonic-gate if (error) 12860Sstevel@tonic-gate goto done; 12870Sstevel@tonic-gate deadhead = 1; 12880Sstevel@tonic-gate head->lio_nent = nent; 12890Sstevel@tonic-gate head->lio_refcnt = nent; 12901885Sraf head->lio_port = -1; 12911885Sraf head->lio_portkev = NULL; 12921885Sraf if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 12931885Sraf sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 12940Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 12950Sstevel@tonic-gate if (sqp == NULL) { 12960Sstevel@tonic-gate error = EAGAIN; 12970Sstevel@tonic-gate goto done; 12980Sstevel@tonic-gate } 12990Sstevel@tonic-gate sqp->sq_func = NULL; 13000Sstevel@tonic-gate sqp->sq_next = NULL; 13010Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 13020Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 13030Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 13040Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 13050Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 13060Sstevel@tonic-gate sqp->sq_info.si_signo = sigevk.sigev_signo; 13070Sstevel@tonic-gate sqp->sq_info.si_value = sigevk.sigev_value; 13080Sstevel@tonic-gate head->lio_sigqp = sqp; 13090Sstevel@tonic-gate } else { 13100Sstevel@tonic-gate head->lio_sigqp = NULL; 13110Sstevel@tonic-gate } 13121885Sraf if (pkevtp) { 13131885Sraf /* 13141885Sraf * Prepare data to send when list of aiocb's 13151885Sraf * has completed. 13161885Sraf */ 13171885Sraf port_init_event(pkevtp, (uintptr_t)sigev, 13181885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 13191885Sraf NULL, head); 13201885Sraf pkevtp->portkev_events = AIOLIO; 13211885Sraf head->lio_portkev = pkevtp; 13221885Sraf head->lio_port = pnotify.portnfy_port; 13231885Sraf } 13240Sstevel@tonic-gate } 13250Sstevel@tonic-gate 13260Sstevel@tonic-gate for (i = 0; i < nent; i++, ucbp++) { 13270Sstevel@tonic-gate 13280Sstevel@tonic-gate cbp = *ucbp; 13290Sstevel@tonic-gate /* skip entry if it can't be copied. */ 13301885Sraf if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 13310Sstevel@tonic-gate if (head) { 13320Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13330Sstevel@tonic-gate head->lio_nent--; 13340Sstevel@tonic-gate head->lio_refcnt--; 13350Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 13360Sstevel@tonic-gate } 13370Sstevel@tonic-gate continue; 13380Sstevel@tonic-gate } 13390Sstevel@tonic-gate 13400Sstevel@tonic-gate /* skip if opcode for aiocb is LIO_NOP */ 13410Sstevel@tonic-gate mode = aiocb->aio_lio_opcode; 13420Sstevel@tonic-gate if (mode == LIO_NOP) { 13430Sstevel@tonic-gate cbp = NULL; 13440Sstevel@tonic-gate if (head) { 13450Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13460Sstevel@tonic-gate head->lio_nent--; 13470Sstevel@tonic-gate head->lio_refcnt--; 13480Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 13490Sstevel@tonic-gate } 13500Sstevel@tonic-gate continue; 13510Sstevel@tonic-gate } 13520Sstevel@tonic-gate 13530Sstevel@tonic-gate /* increment file descriptor's ref count. */ 13540Sstevel@tonic-gate if ((fp = getf(aiocb->aio_fildes)) == NULL) { 13550Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 13560Sstevel@tonic-gate if (head) { 13570Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13580Sstevel@tonic-gate head->lio_nent--; 13590Sstevel@tonic-gate head->lio_refcnt--; 13600Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 13610Sstevel@tonic-gate } 13620Sstevel@tonic-gate aio_errors++; 13630Sstevel@tonic-gate continue; 13640Sstevel@tonic-gate } 13650Sstevel@tonic-gate 13660Sstevel@tonic-gate /* 13670Sstevel@tonic-gate * check the permission of the partition 13680Sstevel@tonic-gate */ 13690Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 13700Sstevel@tonic-gate releasef(aiocb->aio_fildes); 13710Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 13720Sstevel@tonic-gate if (head) { 13730Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13740Sstevel@tonic-gate head->lio_nent--; 13750Sstevel@tonic-gate head->lio_refcnt--; 13760Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 13770Sstevel@tonic-gate } 13780Sstevel@tonic-gate aio_errors++; 13790Sstevel@tonic-gate continue; 13800Sstevel@tonic-gate } 13810Sstevel@tonic-gate 13820Sstevel@tonic-gate /* 13831885Sraf * common case where requests are to the same fd 13841885Sraf * for the same r/w operation. 13850Sstevel@tonic-gate * for UFS, need to set EBADFD 13860Sstevel@tonic-gate */ 13871885Sraf vp = fp->f_vnode; 13881885Sraf if (fp != prev_fp || mode != prev_mode) { 13890Sstevel@tonic-gate aio_func = check_vp(vp, mode); 13900Sstevel@tonic-gate if (aio_func == NULL) { 13910Sstevel@tonic-gate prev_fp = NULL; 13920Sstevel@tonic-gate releasef(aiocb->aio_fildes); 13930Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADFD); 13940Sstevel@tonic-gate aio_notsupported++; 13950Sstevel@tonic-gate if (head) { 13960Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 13970Sstevel@tonic-gate head->lio_nent--; 13980Sstevel@tonic-gate head->lio_refcnt--; 13990Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 14000Sstevel@tonic-gate } 14010Sstevel@tonic-gate continue; 14020Sstevel@tonic-gate } else { 14030Sstevel@tonic-gate prev_fp = fp; 14040Sstevel@tonic-gate prev_mode = mode; 14050Sstevel@tonic-gate } 14060Sstevel@tonic-gate } 14070Sstevel@tonic-gate 14081885Sraf error = aio_req_setup(&reqp, aiop, aiocb, 14091885Sraf &cbp->aio_resultp, vp); 14101885Sraf if (error) { 14110Sstevel@tonic-gate releasef(aiocb->aio_fildes); 14120Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 14130Sstevel@tonic-gate if (head) { 14140Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 14150Sstevel@tonic-gate head->lio_nent--; 14160Sstevel@tonic-gate head->lio_refcnt--; 14170Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 14180Sstevel@tonic-gate } 14190Sstevel@tonic-gate aio_errors++; 14200Sstevel@tonic-gate continue; 14210Sstevel@tonic-gate } 14220Sstevel@tonic-gate 14230Sstevel@tonic-gate reqp->aio_req_lio = head; 14240Sstevel@tonic-gate deadhead = 0; 14250Sstevel@tonic-gate 14260Sstevel@tonic-gate /* 14270Sstevel@tonic-gate * Set the errno field now before sending the request to 14280Sstevel@tonic-gate * the driver to avoid a race condition 14290Sstevel@tonic-gate */ 14300Sstevel@tonic-gate (void) suword32(&cbp->aio_resultp.aio_errno, 14310Sstevel@tonic-gate EINPROGRESS); 14320Sstevel@tonic-gate 14330Sstevel@tonic-gate reqp->aio_req_iocb.iocb = (caddr_t)cbp; 14340Sstevel@tonic-gate 14351885Sraf event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 14361885Sraf aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 14371885Sraf aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 14381885Sraf if (aio_port | aio_thread) { 14391885Sraf port_kevent_t *lpkevp; 14401885Sraf /* 14411885Sraf * Prepare data to send with each aiocb completed. 14421885Sraf */ 14431885Sraf if (aio_port) { 14441885Sraf void *paddr = 14451885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 14461885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify))) 14471885Sraf error = EFAULT; 14481885Sraf } else { /* aio_thread */ 14491885Sraf pnotify.portnfy_port = 14501885Sraf aiocb->aio_sigevent.sigev_signo; 14511885Sraf pnotify.portnfy_user = 14521885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 14531885Sraf } 14541885Sraf if (error) 14551885Sraf /* EMPTY */; 14561885Sraf else if (pkevtp != NULL && 14571885Sraf pnotify.portnfy_port == lio_head_port) 14581885Sraf error = port_dup_event(pkevtp, &lpkevp, 14591885Sraf PORT_ALLOC_DEFAULT); 14601885Sraf else 14611885Sraf error = port_alloc_event(pnotify.portnfy_port, 14621885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 14631885Sraf &lpkevp); 14641885Sraf if (error == 0) { 14651885Sraf port_init_event(lpkevp, (uintptr_t)cbp, 14661885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 14671885Sraf aio_port_callback, reqp); 14681885Sraf lpkevp->portkev_events = event; 14691885Sraf reqp->aio_req_portkev = lpkevp; 14701885Sraf reqp->aio_req_port = pnotify.portnfy_port; 14711885Sraf } 14720Sstevel@tonic-gate } 14730Sstevel@tonic-gate 14740Sstevel@tonic-gate /* 14750Sstevel@tonic-gate * send the request to driver. 14760Sstevel@tonic-gate */ 14770Sstevel@tonic-gate if (error == 0) { 14780Sstevel@tonic-gate if (aiocb->aio_nbytes == 0) { 14790Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 14800Sstevel@tonic-gate aio_zerolen(reqp); 14810Sstevel@tonic-gate continue; 14820Sstevel@tonic-gate } 14830Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 14840Sstevel@tonic-gate CRED()); 14850Sstevel@tonic-gate } 14861885Sraf 14870Sstevel@tonic-gate /* 14880Sstevel@tonic-gate * the fd's ref count is not decremented until the IO has 14890Sstevel@tonic-gate * completed unless there was an error. 14900Sstevel@tonic-gate */ 14910Sstevel@tonic-gate if (error) { 14920Sstevel@tonic-gate releasef(aiocb->aio_fildes); 14930Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 14940Sstevel@tonic-gate if (head) { 14950Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 14960Sstevel@tonic-gate head->lio_nent--; 14970Sstevel@tonic-gate head->lio_refcnt--; 14980Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 14990Sstevel@tonic-gate } 15000Sstevel@tonic-gate if (error == ENOTSUP) 15010Sstevel@tonic-gate aio_notsupported++; 15020Sstevel@tonic-gate else 15030Sstevel@tonic-gate aio_errors++; 15040Sstevel@tonic-gate lio_set_error(reqp); 15050Sstevel@tonic-gate } else { 15060Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 15070Sstevel@tonic-gate } 15080Sstevel@tonic-gate } 15090Sstevel@tonic-gate 15100Sstevel@tonic-gate if (aio_notsupported) { 15110Sstevel@tonic-gate error = ENOTSUP; 15120Sstevel@tonic-gate } else if (aio_errors) { 15130Sstevel@tonic-gate /* 15140Sstevel@tonic-gate * return EIO if any request failed 15150Sstevel@tonic-gate */ 15160Sstevel@tonic-gate error = EIO; 15170Sstevel@tonic-gate } 15180Sstevel@tonic-gate 15190Sstevel@tonic-gate if (mode_arg == LIO_WAIT) { 15200Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 15210Sstevel@tonic-gate while (head->lio_refcnt > 0) { 15220Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 15230Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 15240Sstevel@tonic-gate error = EINTR; 15250Sstevel@tonic-gate goto done; 15260Sstevel@tonic-gate } 15270Sstevel@tonic-gate } 15280Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 15290Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64); 15300Sstevel@tonic-gate } 15310Sstevel@tonic-gate 15320Sstevel@tonic-gate done: 15330Sstevel@tonic-gate kmem_free(cbplist, ssize); 15340Sstevel@tonic-gate if (deadhead) { 15350Sstevel@tonic-gate if (head->lio_sigqp) 15360Sstevel@tonic-gate kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 15371885Sraf if (head->lio_portkev) 15381885Sraf port_free_event(head->lio_portkev); 15390Sstevel@tonic-gate kmem_free(head, sizeof (aio_lio_t)); 15400Sstevel@tonic-gate } 15410Sstevel@tonic-gate return (error); 15420Sstevel@tonic-gate } 15430Sstevel@tonic-gate 15440Sstevel@tonic-gate #endif /* _LP64 */ 15450Sstevel@tonic-gate 15460Sstevel@tonic-gate /* 15470Sstevel@tonic-gate * Asynchronous list IO. 15480Sstevel@tonic-gate * If list I/O is called with LIO_WAIT it can still return 15490Sstevel@tonic-gate * before all the I/O's are completed if a signal is caught 15500Sstevel@tonic-gate * or if the list include UFS I/O requests. If this happens, 15510Sstevel@tonic-gate * libaio will call aliowait() to wait for the I/O's to 15520Sstevel@tonic-gate * complete 15530Sstevel@tonic-gate */ 15540Sstevel@tonic-gate /*ARGSUSED*/ 15550Sstevel@tonic-gate static int 15560Sstevel@tonic-gate aliowait( 15570Sstevel@tonic-gate int mode, 15580Sstevel@tonic-gate void *aiocb, 15590Sstevel@tonic-gate int nent, 15600Sstevel@tonic-gate void *sigev, 15610Sstevel@tonic-gate int run_mode) 15620Sstevel@tonic-gate { 15630Sstevel@tonic-gate aio_lio_t *head; 15640Sstevel@tonic-gate aio_t *aiop; 15650Sstevel@tonic-gate caddr_t cbplist; 15660Sstevel@tonic-gate aiocb_t *cbp, **ucbp; 15670Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 15680Sstevel@tonic-gate aiocb32_t *cbp32; 15690Sstevel@tonic-gate caddr32_t *ucbp32; 15700Sstevel@tonic-gate aiocb64_32_t *cbp64; 15710Sstevel@tonic-gate #endif 15720Sstevel@tonic-gate int error = 0; 15730Sstevel@tonic-gate int i; 15740Sstevel@tonic-gate size_t ssize = 0; 15750Sstevel@tonic-gate model_t model = get_udatamodel(); 15760Sstevel@tonic-gate 15770Sstevel@tonic-gate aiop = curproc->p_aio; 15780Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 15790Sstevel@tonic-gate return (EINVAL); 15800Sstevel@tonic-gate 15810Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 15820Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent); 15830Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 15840Sstevel@tonic-gate else 15850Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent); 15860Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 15870Sstevel@tonic-gate 15880Sstevel@tonic-gate if (ssize == 0) 15890Sstevel@tonic-gate return (EINVAL); 15900Sstevel@tonic-gate 15910Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP); 15920Sstevel@tonic-gate 15930Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 15940Sstevel@tonic-gate ucbp = (aiocb_t **)cbplist; 15950Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 15960Sstevel@tonic-gate else 15970Sstevel@tonic-gate ucbp32 = (caddr32_t *)cbplist; 15980Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 15990Sstevel@tonic-gate 16000Sstevel@tonic-gate if (copyin(aiocb, cbplist, ssize)) { 16010Sstevel@tonic-gate error = EFAULT; 16020Sstevel@tonic-gate goto done; 16030Sstevel@tonic-gate } 16040Sstevel@tonic-gate 16050Sstevel@tonic-gate /* 16060Sstevel@tonic-gate * To find the list head, we go through the 16070Sstevel@tonic-gate * list of aiocb structs, find the request 16080Sstevel@tonic-gate * its for, then get the list head that reqp 16090Sstevel@tonic-gate * points to 16100Sstevel@tonic-gate */ 16110Sstevel@tonic-gate head = NULL; 16120Sstevel@tonic-gate 16130Sstevel@tonic-gate for (i = 0; i < nent; i++) { 16140Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 16150Sstevel@tonic-gate /* 16160Sstevel@tonic-gate * Since we are only checking for a NULL pointer 16170Sstevel@tonic-gate * Following should work on both native data sizes 16180Sstevel@tonic-gate * as well as for largefile aiocb. 16190Sstevel@tonic-gate */ 16200Sstevel@tonic-gate if ((cbp = *ucbp++) == NULL) 16210Sstevel@tonic-gate continue; 16220Sstevel@tonic-gate if (run_mode != AIO_LARGEFILE) 16230Sstevel@tonic-gate if (head = aio_list_get(&cbp->aio_resultp)) 16240Sstevel@tonic-gate break; 16250Sstevel@tonic-gate else { 16260Sstevel@tonic-gate /* 16270Sstevel@tonic-gate * This is a case when largefile call is 16280Sstevel@tonic-gate * made on 32 bit kernel. 16290Sstevel@tonic-gate * Treat each pointer as pointer to 16300Sstevel@tonic-gate * aiocb64_32 16310Sstevel@tonic-gate */ 16320Sstevel@tonic-gate if (head = aio_list_get((aio_result_t *) 16330Sstevel@tonic-gate &(((aiocb64_32_t *)cbp)->aio_resultp))) 16340Sstevel@tonic-gate break; 16350Sstevel@tonic-gate } 16360Sstevel@tonic-gate } 16370Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 16380Sstevel@tonic-gate else { 16390Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) { 16400Sstevel@tonic-gate if ((cbp64 = (aiocb64_32_t *) 16410Sstevel@tonic-gate (uintptr_t)*ucbp32++) == NULL) 16420Sstevel@tonic-gate continue; 16430Sstevel@tonic-gate if (head = aio_list_get((aio_result_t *) 16440Sstevel@tonic-gate &cbp64->aio_resultp)) 16450Sstevel@tonic-gate break; 16460Sstevel@tonic-gate } else if (run_mode == AIO_32) { 16470Sstevel@tonic-gate if ((cbp32 = (aiocb32_t *) 16480Sstevel@tonic-gate (uintptr_t)*ucbp32++) == NULL) 16490Sstevel@tonic-gate continue; 16500Sstevel@tonic-gate if (head = aio_list_get((aio_result_t *) 16510Sstevel@tonic-gate &cbp32->aio_resultp)) 16520Sstevel@tonic-gate break; 16530Sstevel@tonic-gate } 16540Sstevel@tonic-gate } 16550Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 16560Sstevel@tonic-gate } 16570Sstevel@tonic-gate 16580Sstevel@tonic-gate if (head == NULL) { 16590Sstevel@tonic-gate error = EINVAL; 16600Sstevel@tonic-gate goto done; 16610Sstevel@tonic-gate } 16620Sstevel@tonic-gate 16630Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 16640Sstevel@tonic-gate while (head->lio_refcnt > 0) { 16650Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 16660Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 16670Sstevel@tonic-gate error = EINTR; 16680Sstevel@tonic-gate goto done; 16690Sstevel@tonic-gate } 16700Sstevel@tonic-gate } 16710Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 16720Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode); 16730Sstevel@tonic-gate done: 16740Sstevel@tonic-gate kmem_free(cbplist, ssize); 16750Sstevel@tonic-gate return (error); 16760Sstevel@tonic-gate } 16770Sstevel@tonic-gate 16780Sstevel@tonic-gate aio_lio_t * 16790Sstevel@tonic-gate aio_list_get(aio_result_t *resultp) 16800Sstevel@tonic-gate { 16810Sstevel@tonic-gate aio_lio_t *head = NULL; 16820Sstevel@tonic-gate aio_t *aiop; 16830Sstevel@tonic-gate aio_req_t **bucket; 16840Sstevel@tonic-gate aio_req_t *reqp; 16850Sstevel@tonic-gate long index; 16860Sstevel@tonic-gate 16870Sstevel@tonic-gate aiop = curproc->p_aio; 16880Sstevel@tonic-gate if (aiop == NULL) 16890Sstevel@tonic-gate return (NULL); 16900Sstevel@tonic-gate 16910Sstevel@tonic-gate if (resultp) { 16920Sstevel@tonic-gate index = AIO_HASH(resultp); 16930Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 16940Sstevel@tonic-gate for (reqp = *bucket; reqp != NULL; 16950Sstevel@tonic-gate reqp = reqp->aio_hash_next) { 16960Sstevel@tonic-gate if (reqp->aio_req_resultp == resultp) { 16970Sstevel@tonic-gate head = reqp->aio_req_lio; 16980Sstevel@tonic-gate return (head); 16990Sstevel@tonic-gate } 17000Sstevel@tonic-gate } 17010Sstevel@tonic-gate } 17020Sstevel@tonic-gate return (NULL); 17030Sstevel@tonic-gate } 17040Sstevel@tonic-gate 17050Sstevel@tonic-gate 17060Sstevel@tonic-gate static void 17070Sstevel@tonic-gate lio_set_uerror(void *resultp, int error) 17080Sstevel@tonic-gate { 17090Sstevel@tonic-gate /* 17100Sstevel@tonic-gate * the resultp field is a pointer to where the 17110Sstevel@tonic-gate * error should be written out to the user's 17120Sstevel@tonic-gate * aiocb. 17130Sstevel@tonic-gate * 17140Sstevel@tonic-gate */ 17150Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 17160Sstevel@tonic-gate (void) sulword(&((aio_result_t *)resultp)->aio_return, 17170Sstevel@tonic-gate (ssize_t)-1); 17180Sstevel@tonic-gate (void) suword32(&((aio_result_t *)resultp)->aio_errno, error); 17190Sstevel@tonic-gate } 17200Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 17210Sstevel@tonic-gate else { 17220Sstevel@tonic-gate (void) suword32(&((aio_result32_t *)resultp)->aio_return, 17230Sstevel@tonic-gate (uint_t)-1); 17240Sstevel@tonic-gate (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error); 17250Sstevel@tonic-gate } 17260Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 17270Sstevel@tonic-gate } 17280Sstevel@tonic-gate 17290Sstevel@tonic-gate /* 17300Sstevel@tonic-gate * do cleanup completion for all requests in list. memory for 17310Sstevel@tonic-gate * each request is also freed. 17320Sstevel@tonic-gate */ 17330Sstevel@tonic-gate static void 17340Sstevel@tonic-gate alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode) 17350Sstevel@tonic-gate { 17360Sstevel@tonic-gate int i; 17370Sstevel@tonic-gate aio_req_t *reqp; 17380Sstevel@tonic-gate aio_result_t *resultp; 17391885Sraf aiocb64_32_t *aiocb_64; 17400Sstevel@tonic-gate 17410Sstevel@tonic-gate for (i = 0; i < nent; i++) { 17420Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 17430Sstevel@tonic-gate if (cbp[i] == NULL) 17440Sstevel@tonic-gate continue; 17450Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) { 17460Sstevel@tonic-gate aiocb_64 = (aiocb64_32_t *)cbp[i]; 17471885Sraf resultp = (aio_result_t *) 17481885Sraf &aiocb_64->aio_resultp; 17490Sstevel@tonic-gate } else 17500Sstevel@tonic-gate resultp = &cbp[i]->aio_resultp; 17510Sstevel@tonic-gate } 17520Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 17530Sstevel@tonic-gate else { 17541885Sraf aiocb32_t *aiocb_32; 17551885Sraf caddr32_t *cbp32; 17560Sstevel@tonic-gate 17570Sstevel@tonic-gate cbp32 = (caddr32_t *)cbp; 17580Sstevel@tonic-gate if (cbp32[i] == NULL) 17590Sstevel@tonic-gate continue; 17600Sstevel@tonic-gate if (run_mode == AIO_32) { 17610Sstevel@tonic-gate aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i]; 17620Sstevel@tonic-gate resultp = (aio_result_t *)&aiocb_32-> 17630Sstevel@tonic-gate aio_resultp; 17640Sstevel@tonic-gate } else if (run_mode == AIO_LARGEFILE) { 17650Sstevel@tonic-gate aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i]; 17660Sstevel@tonic-gate resultp = (aio_result_t *)&aiocb_64-> 17670Sstevel@tonic-gate aio_resultp; 17680Sstevel@tonic-gate } 17690Sstevel@tonic-gate } 17700Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 17710Sstevel@tonic-gate /* 17720Sstevel@tonic-gate * we need to get the aio_cleanupq_mutex since we call 17730Sstevel@tonic-gate * aio_req_done(). 17740Sstevel@tonic-gate */ 17750Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 17760Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 17770Sstevel@tonic-gate reqp = aio_req_done(resultp); 17780Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 17790Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 17800Sstevel@tonic-gate if (reqp != NULL) { 17810Sstevel@tonic-gate aphysio_unlock(reqp); 17820Sstevel@tonic-gate aio_copyout_result(reqp); 17830Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 17840Sstevel@tonic-gate aio_req_free(aiop, reqp); 17850Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 17860Sstevel@tonic-gate } 17870Sstevel@tonic-gate } 17880Sstevel@tonic-gate } 17890Sstevel@tonic-gate 17900Sstevel@tonic-gate /* 17911885Sraf * Write out the results for an aio request that is done. 17920Sstevel@tonic-gate */ 17930Sstevel@tonic-gate static int 17940Sstevel@tonic-gate aioerror(void *cb, int run_mode) 17950Sstevel@tonic-gate { 17960Sstevel@tonic-gate aio_result_t *resultp; 17970Sstevel@tonic-gate aio_t *aiop; 17980Sstevel@tonic-gate aio_req_t *reqp; 17990Sstevel@tonic-gate int retval; 18000Sstevel@tonic-gate 18010Sstevel@tonic-gate aiop = curproc->p_aio; 18020Sstevel@tonic-gate if (aiop == NULL || cb == NULL) 18030Sstevel@tonic-gate return (EINVAL); 18040Sstevel@tonic-gate 18050Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 18060Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) 18070Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 18080Sstevel@tonic-gate aio_resultp; 18090Sstevel@tonic-gate else 18100Sstevel@tonic-gate resultp = &((aiocb_t *)cb)->aio_resultp; 18110Sstevel@tonic-gate } 18120Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 18130Sstevel@tonic-gate else { 18140Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) 18150Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb)-> 18160Sstevel@tonic-gate aio_resultp; 18170Sstevel@tonic-gate else if (run_mode == AIO_32) 18180Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb32_t *)cb)-> 18190Sstevel@tonic-gate aio_resultp; 18200Sstevel@tonic-gate } 18210Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 18220Sstevel@tonic-gate /* 18230Sstevel@tonic-gate * we need to get the aio_cleanupq_mutex since we call 18240Sstevel@tonic-gate * aio_req_find(). 18250Sstevel@tonic-gate */ 18260Sstevel@tonic-gate mutex_enter(&aiop->aio_cleanupq_mutex); 18270Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 18280Sstevel@tonic-gate retval = aio_req_find(resultp, &reqp); 18290Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 18300Sstevel@tonic-gate mutex_exit(&aiop->aio_cleanupq_mutex); 18310Sstevel@tonic-gate if (retval == 0) { 18320Sstevel@tonic-gate aphysio_unlock(reqp); 18330Sstevel@tonic-gate aio_copyout_result(reqp); 18340Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 18350Sstevel@tonic-gate aio_req_free(aiop, reqp); 18360Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 18370Sstevel@tonic-gate return (0); 18380Sstevel@tonic-gate } else if (retval == 1) 18390Sstevel@tonic-gate return (EINPROGRESS); 18400Sstevel@tonic-gate else if (retval == 2) 18410Sstevel@tonic-gate return (EINVAL); 18420Sstevel@tonic-gate return (0); 18430Sstevel@tonic-gate } 18440Sstevel@tonic-gate 18450Sstevel@tonic-gate /* 18460Sstevel@tonic-gate * aio_cancel - if no requests outstanding, 18470Sstevel@tonic-gate * return AIO_ALLDONE 18480Sstevel@tonic-gate * else 18490Sstevel@tonic-gate * return AIO_NOTCANCELED 18500Sstevel@tonic-gate */ 18510Sstevel@tonic-gate static int 18520Sstevel@tonic-gate aio_cancel( 18530Sstevel@tonic-gate int fildes, 18540Sstevel@tonic-gate void *cb, 18550Sstevel@tonic-gate long *rval, 18560Sstevel@tonic-gate int run_mode) 18570Sstevel@tonic-gate { 18580Sstevel@tonic-gate aio_t *aiop; 18590Sstevel@tonic-gate void *resultp; 18600Sstevel@tonic-gate int index; 18610Sstevel@tonic-gate aio_req_t **bucket; 18620Sstevel@tonic-gate aio_req_t *ent; 18630Sstevel@tonic-gate 18640Sstevel@tonic-gate 18650Sstevel@tonic-gate /* 18660Sstevel@tonic-gate * Verify valid file descriptor 18670Sstevel@tonic-gate */ 18680Sstevel@tonic-gate if ((getf(fildes)) == NULL) { 18690Sstevel@tonic-gate return (EBADF); 18700Sstevel@tonic-gate } 18710Sstevel@tonic-gate releasef(fildes); 18720Sstevel@tonic-gate 18730Sstevel@tonic-gate aiop = curproc->p_aio; 18740Sstevel@tonic-gate if (aiop == NULL) 18750Sstevel@tonic-gate return (EINVAL); 18760Sstevel@tonic-gate 18770Sstevel@tonic-gate if (aiop->aio_outstanding == 0) { 18780Sstevel@tonic-gate *rval = AIO_ALLDONE; 18790Sstevel@tonic-gate return (0); 18800Sstevel@tonic-gate } 18810Sstevel@tonic-gate 18820Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 18830Sstevel@tonic-gate if (cb != NULL) { 18840Sstevel@tonic-gate if (get_udatamodel() == DATAMODEL_NATIVE) { 18850Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) 18860Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 18870Sstevel@tonic-gate ->aio_resultp; 18880Sstevel@tonic-gate else 18890Sstevel@tonic-gate resultp = &((aiocb_t *)cb)->aio_resultp; 18900Sstevel@tonic-gate } 18910Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 18920Sstevel@tonic-gate else { 18930Sstevel@tonic-gate if (run_mode == AIO_LARGEFILE) 18940Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb64_32_t *)cb) 18950Sstevel@tonic-gate ->aio_resultp; 18960Sstevel@tonic-gate else if (run_mode == AIO_32) 18970Sstevel@tonic-gate resultp = (aio_result_t *)&((aiocb32_t *)cb) 18980Sstevel@tonic-gate ->aio_resultp; 18990Sstevel@tonic-gate } 19000Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 19010Sstevel@tonic-gate index = AIO_HASH(resultp); 19020Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 19030Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 19040Sstevel@tonic-gate if (ent->aio_req_resultp == resultp) { 19050Sstevel@tonic-gate if ((ent->aio_req_flags & AIO_PENDING) == 0) { 19060Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19070Sstevel@tonic-gate *rval = AIO_ALLDONE; 19080Sstevel@tonic-gate return (0); 19090Sstevel@tonic-gate } 19100Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19110Sstevel@tonic-gate *rval = AIO_NOTCANCELED; 19120Sstevel@tonic-gate return (0); 19130Sstevel@tonic-gate } 19140Sstevel@tonic-gate } 19150Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19160Sstevel@tonic-gate *rval = AIO_ALLDONE; 19170Sstevel@tonic-gate return (0); 19180Sstevel@tonic-gate } 19190Sstevel@tonic-gate 19200Sstevel@tonic-gate for (index = 0; index < AIO_HASHSZ; index++) { 19210Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 19220Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 19230Sstevel@tonic-gate if (ent->aio_req_fd == fildes) { 19240Sstevel@tonic-gate if ((ent->aio_req_flags & AIO_PENDING) != 0) { 19250Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19260Sstevel@tonic-gate *rval = AIO_NOTCANCELED; 19270Sstevel@tonic-gate return (0); 19280Sstevel@tonic-gate } 19290Sstevel@tonic-gate } 19300Sstevel@tonic-gate } 19310Sstevel@tonic-gate } 19320Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 19330Sstevel@tonic-gate *rval = AIO_ALLDONE; 19340Sstevel@tonic-gate return (0); 19350Sstevel@tonic-gate } 19360Sstevel@tonic-gate 19370Sstevel@tonic-gate /* 19380Sstevel@tonic-gate * solaris version of asynchronous read and write 19390Sstevel@tonic-gate */ 19400Sstevel@tonic-gate static int 19410Sstevel@tonic-gate arw( 19420Sstevel@tonic-gate int opcode, 19430Sstevel@tonic-gate int fdes, 19440Sstevel@tonic-gate char *bufp, 19450Sstevel@tonic-gate int bufsize, 19460Sstevel@tonic-gate offset_t offset, 19470Sstevel@tonic-gate aio_result_t *resultp, 19480Sstevel@tonic-gate int mode) 19490Sstevel@tonic-gate { 19500Sstevel@tonic-gate file_t *fp; 19510Sstevel@tonic-gate int error; 19520Sstevel@tonic-gate struct vnode *vp; 19530Sstevel@tonic-gate aio_req_t *reqp; 19540Sstevel@tonic-gate aio_t *aiop; 19550Sstevel@tonic-gate int (*aio_func)(); 19560Sstevel@tonic-gate #ifdef _LP64 19570Sstevel@tonic-gate aiocb_t aiocb; 19580Sstevel@tonic-gate #else 19590Sstevel@tonic-gate aiocb64_32_t aiocb64; 19600Sstevel@tonic-gate #endif 19610Sstevel@tonic-gate 19620Sstevel@tonic-gate aiop = curproc->p_aio; 19630Sstevel@tonic-gate if (aiop == NULL) 19640Sstevel@tonic-gate return (EINVAL); 19650Sstevel@tonic-gate 19660Sstevel@tonic-gate if ((fp = getf(fdes)) == NULL) { 19670Sstevel@tonic-gate return (EBADF); 19680Sstevel@tonic-gate } 19690Sstevel@tonic-gate 19700Sstevel@tonic-gate /* 19710Sstevel@tonic-gate * check the permission of the partition 19720Sstevel@tonic-gate */ 19730Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 19740Sstevel@tonic-gate releasef(fdes); 19750Sstevel@tonic-gate return (EBADF); 19760Sstevel@tonic-gate } 19770Sstevel@tonic-gate 19780Sstevel@tonic-gate vp = fp->f_vnode; 19790Sstevel@tonic-gate aio_func = check_vp(vp, mode); 19800Sstevel@tonic-gate if (aio_func == NULL) { 19810Sstevel@tonic-gate releasef(fdes); 19820Sstevel@tonic-gate return (EBADFD); 19830Sstevel@tonic-gate } 19840Sstevel@tonic-gate #ifdef _LP64 19850Sstevel@tonic-gate aiocb.aio_fildes = fdes; 19860Sstevel@tonic-gate aiocb.aio_buf = bufp; 19870Sstevel@tonic-gate aiocb.aio_nbytes = bufsize; 19880Sstevel@tonic-gate aiocb.aio_offset = offset; 19890Sstevel@tonic-gate aiocb.aio_sigevent.sigev_notify = 0; 19901885Sraf error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 19910Sstevel@tonic-gate #else 19920Sstevel@tonic-gate aiocb64.aio_fildes = fdes; 19930Sstevel@tonic-gate aiocb64.aio_buf = (caddr32_t)bufp; 19940Sstevel@tonic-gate aiocb64.aio_nbytes = bufsize; 19950Sstevel@tonic-gate aiocb64.aio_offset = offset; 19960Sstevel@tonic-gate aiocb64.aio_sigevent.sigev_notify = 0; 19971885Sraf error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 19980Sstevel@tonic-gate #endif 19990Sstevel@tonic-gate if (error) { 20000Sstevel@tonic-gate releasef(fdes); 20010Sstevel@tonic-gate return (error); 20020Sstevel@tonic-gate } 20030Sstevel@tonic-gate 20040Sstevel@tonic-gate /* 20050Sstevel@tonic-gate * enable polling on this request if the opcode has 20060Sstevel@tonic-gate * the AIO poll bit set 20070Sstevel@tonic-gate */ 20080Sstevel@tonic-gate if (opcode & AIO_POLL_BIT) 20090Sstevel@tonic-gate reqp->aio_req_flags |= AIO_POLL; 20100Sstevel@tonic-gate 20110Sstevel@tonic-gate if (bufsize == 0) { 20120Sstevel@tonic-gate clear_active_fd(fdes); 20130Sstevel@tonic-gate aio_zerolen(reqp); 20140Sstevel@tonic-gate return (0); 20150Sstevel@tonic-gate } 20160Sstevel@tonic-gate /* 20170Sstevel@tonic-gate * send the request to driver. 20180Sstevel@tonic-gate */ 20190Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 20200Sstevel@tonic-gate /* 20210Sstevel@tonic-gate * the fd is stored in the aio_req_t by aio_req_setup(), and 20220Sstevel@tonic-gate * is released by the aio_cleanup_thread() when the IO has 20230Sstevel@tonic-gate * completed. 20240Sstevel@tonic-gate */ 20250Sstevel@tonic-gate if (error) { 20260Sstevel@tonic-gate releasef(fdes); 20270Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 20280Sstevel@tonic-gate aio_req_free(aiop, reqp); 20290Sstevel@tonic-gate aiop->aio_pending--; 20300Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) 20310Sstevel@tonic-gate cv_signal(&aiop->aio_cleanupcv); 20320Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 20330Sstevel@tonic-gate return (error); 20340Sstevel@tonic-gate } 20350Sstevel@tonic-gate clear_active_fd(fdes); 20360Sstevel@tonic-gate return (0); 20370Sstevel@tonic-gate } 20380Sstevel@tonic-gate 20390Sstevel@tonic-gate /* 20400Sstevel@tonic-gate * posix version of asynchronous read and write 20410Sstevel@tonic-gate */ 20421885Sraf static int 20430Sstevel@tonic-gate aiorw( 20440Sstevel@tonic-gate int opcode, 20450Sstevel@tonic-gate void *aiocb_arg, 20460Sstevel@tonic-gate int mode, 20470Sstevel@tonic-gate int run_mode) 20480Sstevel@tonic-gate { 20490Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 20500Sstevel@tonic-gate aiocb32_t aiocb32; 20510Sstevel@tonic-gate struct sigevent32 *sigev32; 20520Sstevel@tonic-gate port_notify32_t pntfy32; 20530Sstevel@tonic-gate #endif 20540Sstevel@tonic-gate aiocb64_32_t aiocb64; 20550Sstevel@tonic-gate aiocb_t aiocb; 20560Sstevel@tonic-gate file_t *fp; 20570Sstevel@tonic-gate int error, fd; 20580Sstevel@tonic-gate size_t bufsize; 20590Sstevel@tonic-gate struct vnode *vp; 20600Sstevel@tonic-gate aio_req_t *reqp; 20610Sstevel@tonic-gate aio_t *aiop; 20620Sstevel@tonic-gate int (*aio_func)(); 20630Sstevel@tonic-gate aio_result_t *resultp; 20640Sstevel@tonic-gate struct sigevent *sigev; 20650Sstevel@tonic-gate model_t model; 20660Sstevel@tonic-gate int aio_use_port = 0; 20670Sstevel@tonic-gate port_notify_t pntfy; 20680Sstevel@tonic-gate 20690Sstevel@tonic-gate model = get_udatamodel(); 20700Sstevel@tonic-gate aiop = curproc->p_aio; 20710Sstevel@tonic-gate if (aiop == NULL) 20720Sstevel@tonic-gate return (EINVAL); 20730Sstevel@tonic-gate 20740Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) { 20750Sstevel@tonic-gate if (run_mode != AIO_LARGEFILE) { 20760Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t))) 20770Sstevel@tonic-gate return (EFAULT); 20780Sstevel@tonic-gate bufsize = aiocb.aio_nbytes; 20790Sstevel@tonic-gate resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp); 20800Sstevel@tonic-gate if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) { 20810Sstevel@tonic-gate return (EBADF); 20820Sstevel@tonic-gate } 20830Sstevel@tonic-gate sigev = &aiocb.aio_sigevent; 20840Sstevel@tonic-gate } else { 20850Sstevel@tonic-gate /* 20860Sstevel@tonic-gate * We come here only when we make largefile 20870Sstevel@tonic-gate * call on 32 bit kernel using 32 bit library. 20880Sstevel@tonic-gate */ 20890Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 20900Sstevel@tonic-gate return (EFAULT); 20910Sstevel@tonic-gate bufsize = aiocb64.aio_nbytes; 20920Sstevel@tonic-gate resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 20930Sstevel@tonic-gate ->aio_resultp); 20941885Sraf if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 20950Sstevel@tonic-gate return (EBADF); 20960Sstevel@tonic-gate sigev = (struct sigevent *)&aiocb64.aio_sigevent; 20970Sstevel@tonic-gate } 20980Sstevel@tonic-gate 20990Sstevel@tonic-gate if (sigev->sigev_notify == SIGEV_PORT) { 21000Sstevel@tonic-gate if (copyin((void *)sigev->sigev_value.sival_ptr, 21010Sstevel@tonic-gate &pntfy, sizeof (port_notify_t))) { 21020Sstevel@tonic-gate releasef(fd); 21030Sstevel@tonic-gate return (EFAULT); 21040Sstevel@tonic-gate } 21050Sstevel@tonic-gate aio_use_port = 1; 21061885Sraf } else if (sigev->sigev_notify == SIGEV_THREAD) { 21071885Sraf pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo; 21081885Sraf pntfy.portnfy_user = 21091885Sraf aiocb.aio_sigevent.sigev_value.sival_ptr; 21101885Sraf aio_use_port = 1; 21110Sstevel@tonic-gate } 21120Sstevel@tonic-gate } 21130Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 21140Sstevel@tonic-gate else { 21150Sstevel@tonic-gate if (run_mode == AIO_32) { 21160Sstevel@tonic-gate /* 32 bit system call is being made on 64 bit kernel */ 21170Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t))) 21180Sstevel@tonic-gate return (EFAULT); 21190Sstevel@tonic-gate 21200Sstevel@tonic-gate bufsize = aiocb32.aio_nbytes; 21210Sstevel@tonic-gate aiocb_32ton(&aiocb32, &aiocb); 21220Sstevel@tonic-gate resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)-> 21230Sstevel@tonic-gate aio_resultp); 21240Sstevel@tonic-gate if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) { 21250Sstevel@tonic-gate return (EBADF); 21260Sstevel@tonic-gate } 21270Sstevel@tonic-gate sigev32 = &aiocb32.aio_sigevent; 21280Sstevel@tonic-gate } else if (run_mode == AIO_LARGEFILE) { 21290Sstevel@tonic-gate /* 21300Sstevel@tonic-gate * We come here only when we make largefile 21310Sstevel@tonic-gate * call on 64 bit kernel using 32 bit library. 21320Sstevel@tonic-gate */ 21330Sstevel@tonic-gate if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t))) 21340Sstevel@tonic-gate return (EFAULT); 21350Sstevel@tonic-gate bufsize = aiocb64.aio_nbytes; 21360Sstevel@tonic-gate aiocb_LFton(&aiocb64, &aiocb); 21370Sstevel@tonic-gate resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg) 21380Sstevel@tonic-gate ->aio_resultp); 21390Sstevel@tonic-gate if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL) 21400Sstevel@tonic-gate return (EBADF); 21410Sstevel@tonic-gate sigev32 = &aiocb64.aio_sigevent; 21420Sstevel@tonic-gate } 21430Sstevel@tonic-gate 21440Sstevel@tonic-gate if (sigev32->sigev_notify == SIGEV_PORT) { 21450Sstevel@tonic-gate if (copyin( 21460Sstevel@tonic-gate (void *)(uintptr_t)sigev32->sigev_value.sival_ptr, 21470Sstevel@tonic-gate &pntfy32, sizeof (port_notify32_t))) { 21480Sstevel@tonic-gate releasef(fd); 21490Sstevel@tonic-gate return (EFAULT); 21500Sstevel@tonic-gate } 21510Sstevel@tonic-gate pntfy.portnfy_port = pntfy32.portnfy_port; 21521885Sraf pntfy.portnfy_user = (void *)(uintptr_t) 21531885Sraf pntfy32.portnfy_user; 21541885Sraf aio_use_port = 1; 21551885Sraf } else if (sigev32->sigev_notify == SIGEV_THREAD) { 21561885Sraf pntfy.portnfy_port = sigev32->sigev_signo; 21571885Sraf pntfy.portnfy_user = (void *)(uintptr_t) 21581885Sraf sigev32->sigev_value.sival_ptr; 21590Sstevel@tonic-gate aio_use_port = 1; 21600Sstevel@tonic-gate } 21610Sstevel@tonic-gate } 21620Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 21630Sstevel@tonic-gate 21640Sstevel@tonic-gate /* 21650Sstevel@tonic-gate * check the permission of the partition 21660Sstevel@tonic-gate */ 21670Sstevel@tonic-gate 21680Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 21690Sstevel@tonic-gate releasef(fd); 21700Sstevel@tonic-gate return (EBADF); 21710Sstevel@tonic-gate } 21720Sstevel@tonic-gate 21730Sstevel@tonic-gate vp = fp->f_vnode; 21740Sstevel@tonic-gate aio_func = check_vp(vp, mode); 21750Sstevel@tonic-gate if (aio_func == NULL) { 21760Sstevel@tonic-gate releasef(fd); 21770Sstevel@tonic-gate return (EBADFD); 21780Sstevel@tonic-gate } 21791885Sraf if (run_mode == AIO_LARGEFILE) 21801885Sraf error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp); 21810Sstevel@tonic-gate else 21821885Sraf error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp); 21830Sstevel@tonic-gate 21840Sstevel@tonic-gate if (error) { 21850Sstevel@tonic-gate releasef(fd); 21860Sstevel@tonic-gate return (error); 21870Sstevel@tonic-gate } 21880Sstevel@tonic-gate /* 21890Sstevel@tonic-gate * enable polling on this request if the opcode has 21900Sstevel@tonic-gate * the AIO poll bit set 21910Sstevel@tonic-gate */ 21920Sstevel@tonic-gate if (opcode & AIO_POLL_BIT) 21930Sstevel@tonic-gate reqp->aio_req_flags |= AIO_POLL; 21940Sstevel@tonic-gate 21950Sstevel@tonic-gate if (model == DATAMODEL_NATIVE) 21960Sstevel@tonic-gate reqp->aio_req_iocb.iocb = aiocb_arg; 21970Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 21980Sstevel@tonic-gate else 21990Sstevel@tonic-gate reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg; 22000Sstevel@tonic-gate #endif 22010Sstevel@tonic-gate 22021885Sraf if (aio_use_port) { 22031885Sraf int event = (run_mode == AIO_LARGEFILE)? 22041885Sraf ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) : 22051885Sraf ((mode == FREAD)? AIOAREAD : AIOAWRITE); 22061885Sraf error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event); 22071885Sraf } 22080Sstevel@tonic-gate 22090Sstevel@tonic-gate /* 22100Sstevel@tonic-gate * send the request to driver. 22110Sstevel@tonic-gate */ 22120Sstevel@tonic-gate if (error == 0) { 22130Sstevel@tonic-gate if (bufsize == 0) { 22140Sstevel@tonic-gate clear_active_fd(fd); 22150Sstevel@tonic-gate aio_zerolen(reqp); 22160Sstevel@tonic-gate return (0); 22170Sstevel@tonic-gate } 22180Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED()); 22190Sstevel@tonic-gate } 22200Sstevel@tonic-gate 22210Sstevel@tonic-gate /* 22220Sstevel@tonic-gate * the fd is stored in the aio_req_t by aio_req_setup(), and 22230Sstevel@tonic-gate * is released by the aio_cleanup_thread() when the IO has 22240Sstevel@tonic-gate * completed. 22250Sstevel@tonic-gate */ 22260Sstevel@tonic-gate if (error) { 22270Sstevel@tonic-gate releasef(fd); 22280Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 22291885Sraf aio_deq(&aiop->aio_portpending, reqp); 22300Sstevel@tonic-gate aio_req_free(aiop, reqp); 22310Sstevel@tonic-gate aiop->aio_pending--; 22320Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) 22330Sstevel@tonic-gate cv_signal(&aiop->aio_cleanupcv); 22340Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 22350Sstevel@tonic-gate return (error); 22360Sstevel@tonic-gate } 22370Sstevel@tonic-gate clear_active_fd(fd); 22380Sstevel@tonic-gate return (0); 22390Sstevel@tonic-gate } 22400Sstevel@tonic-gate 22410Sstevel@tonic-gate 22420Sstevel@tonic-gate /* 22430Sstevel@tonic-gate * set error for a list IO entry that failed. 22440Sstevel@tonic-gate */ 22450Sstevel@tonic-gate static void 22460Sstevel@tonic-gate lio_set_error(aio_req_t *reqp) 22470Sstevel@tonic-gate { 22480Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 22490Sstevel@tonic-gate 22500Sstevel@tonic-gate if (aiop == NULL) 22510Sstevel@tonic-gate return; 22520Sstevel@tonic-gate 22530Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 22541885Sraf aio_deq(&aiop->aio_portpending, reqp); 22550Sstevel@tonic-gate aiop->aio_pending--; 22560Sstevel@tonic-gate /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */ 22570Sstevel@tonic-gate reqp->aio_req_flags |= AIO_PHYSIODONE; 22580Sstevel@tonic-gate /* 22590Sstevel@tonic-gate * Need to free the request now as its never 22600Sstevel@tonic-gate * going to get on the done queue 22610Sstevel@tonic-gate * 22620Sstevel@tonic-gate * Note: aio_outstanding is decremented in 22630Sstevel@tonic-gate * aio_req_free() 22640Sstevel@tonic-gate */ 22650Sstevel@tonic-gate aio_req_free(aiop, reqp); 22660Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) 22670Sstevel@tonic-gate cv_signal(&aiop->aio_cleanupcv); 22680Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 22690Sstevel@tonic-gate } 22700Sstevel@tonic-gate 22710Sstevel@tonic-gate /* 22720Sstevel@tonic-gate * check if a specified request is done, and remove it from 22730Sstevel@tonic-gate * the done queue. otherwise remove anybody from the done queue 22740Sstevel@tonic-gate * if NULL is specified. 22750Sstevel@tonic-gate */ 22760Sstevel@tonic-gate static aio_req_t * 22770Sstevel@tonic-gate aio_req_done(void *resultp) 22780Sstevel@tonic-gate { 22790Sstevel@tonic-gate aio_req_t **bucket; 22800Sstevel@tonic-gate aio_req_t *ent; 22810Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 22820Sstevel@tonic-gate long index; 22830Sstevel@tonic-gate 22840Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 22850Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 22860Sstevel@tonic-gate 22870Sstevel@tonic-gate if (resultp) { 22880Sstevel@tonic-gate index = AIO_HASH(resultp); 22890Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 22900Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 22910Sstevel@tonic-gate if (ent->aio_req_resultp == (aio_result_t *)resultp) { 22920Sstevel@tonic-gate if (ent->aio_req_flags & AIO_DONEQ) { 22930Sstevel@tonic-gate return (aio_req_remove(ent)); 22940Sstevel@tonic-gate } 22950Sstevel@tonic-gate return (NULL); 22960Sstevel@tonic-gate } 22970Sstevel@tonic-gate } 22980Sstevel@tonic-gate /* no match, resultp is invalid */ 22990Sstevel@tonic-gate return (NULL); 23000Sstevel@tonic-gate } 23010Sstevel@tonic-gate return (aio_req_remove(NULL)); 23020Sstevel@tonic-gate } 23030Sstevel@tonic-gate 23040Sstevel@tonic-gate /* 23050Sstevel@tonic-gate * determine if a user-level resultp pointer is associated with an 23060Sstevel@tonic-gate * active IO request. Zero is returned when the request is done, 23070Sstevel@tonic-gate * and the request is removed from the done queue. Only when the 23080Sstevel@tonic-gate * return value is zero, is the "reqp" pointer valid. One is returned 23090Sstevel@tonic-gate * when the request is inprogress. Two is returned when the request 23100Sstevel@tonic-gate * is invalid. 23110Sstevel@tonic-gate */ 23120Sstevel@tonic-gate static int 23130Sstevel@tonic-gate aio_req_find(aio_result_t *resultp, aio_req_t **reqp) 23140Sstevel@tonic-gate { 23150Sstevel@tonic-gate aio_req_t **bucket; 23160Sstevel@tonic-gate aio_req_t *ent; 23170Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 23180Sstevel@tonic-gate long index; 23190Sstevel@tonic-gate 23200Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex)); 23210Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 23220Sstevel@tonic-gate 23230Sstevel@tonic-gate index = AIO_HASH(resultp); 23240Sstevel@tonic-gate bucket = &aiop->aio_hash[index]; 23250Sstevel@tonic-gate for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) { 23260Sstevel@tonic-gate if (ent->aio_req_resultp == resultp) { 23270Sstevel@tonic-gate if (ent->aio_req_flags & AIO_DONEQ) { 23280Sstevel@tonic-gate *reqp = aio_req_remove(ent); 23290Sstevel@tonic-gate return (0); 23300Sstevel@tonic-gate } 23310Sstevel@tonic-gate return (1); 23320Sstevel@tonic-gate } 23330Sstevel@tonic-gate } 23340Sstevel@tonic-gate /* no match, resultp is invalid */ 23350Sstevel@tonic-gate return (2); 23360Sstevel@tonic-gate } 23370Sstevel@tonic-gate 23380Sstevel@tonic-gate /* 23390Sstevel@tonic-gate * remove a request from the done queue. 23400Sstevel@tonic-gate */ 23410Sstevel@tonic-gate static aio_req_t * 23420Sstevel@tonic-gate aio_req_remove(aio_req_t *reqp) 23430Sstevel@tonic-gate { 23440Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 23450Sstevel@tonic-gate 23460Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 23470Sstevel@tonic-gate 23481885Sraf if (reqp != NULL) { 23490Sstevel@tonic-gate ASSERT(reqp->aio_req_flags & AIO_DONEQ); 23500Sstevel@tonic-gate if (reqp->aio_req_next == reqp) { 23510Sstevel@tonic-gate /* only one request on queue */ 23520Sstevel@tonic-gate if (reqp == aiop->aio_doneq) { 23530Sstevel@tonic-gate aiop->aio_doneq = NULL; 23540Sstevel@tonic-gate } else { 23550Sstevel@tonic-gate ASSERT(reqp == aiop->aio_cleanupq); 23560Sstevel@tonic-gate aiop->aio_cleanupq = NULL; 23570Sstevel@tonic-gate } 23580Sstevel@tonic-gate } else { 23590Sstevel@tonic-gate reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 23600Sstevel@tonic-gate reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 23610Sstevel@tonic-gate /* 23620Sstevel@tonic-gate * The request can be either on the aio_doneq or the 23630Sstevel@tonic-gate * aio_cleanupq 23640Sstevel@tonic-gate */ 23650Sstevel@tonic-gate if (reqp == aiop->aio_doneq) 23660Sstevel@tonic-gate aiop->aio_doneq = reqp->aio_req_next; 23670Sstevel@tonic-gate 23680Sstevel@tonic-gate if (reqp == aiop->aio_cleanupq) 23690Sstevel@tonic-gate aiop->aio_cleanupq = reqp->aio_req_next; 23700Sstevel@tonic-gate } 23710Sstevel@tonic-gate reqp->aio_req_flags &= ~AIO_DONEQ; 23721885Sraf reqp->aio_req_next = NULL; 23731885Sraf reqp->aio_req_prev = NULL; 23741885Sraf } else if ((reqp = aiop->aio_doneq) != NULL) { 23751885Sraf ASSERT(reqp->aio_req_flags & AIO_DONEQ); 23761885Sraf if (reqp == reqp->aio_req_next) { 23770Sstevel@tonic-gate /* only one request on queue */ 23780Sstevel@tonic-gate aiop->aio_doneq = NULL; 23790Sstevel@tonic-gate } else { 23801885Sraf reqp->aio_req_prev->aio_req_next = reqp->aio_req_next; 23811885Sraf reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev; 23821885Sraf aiop->aio_doneq = reqp->aio_req_next; 23830Sstevel@tonic-gate } 23841885Sraf reqp->aio_req_flags &= ~AIO_DONEQ; 23851885Sraf reqp->aio_req_next = NULL; 23861885Sraf reqp->aio_req_prev = NULL; 23870Sstevel@tonic-gate } 23881885Sraf if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN)) 23891885Sraf cv_broadcast(&aiop->aio_waitcv); 23901885Sraf return (reqp); 23910Sstevel@tonic-gate } 23920Sstevel@tonic-gate 23930Sstevel@tonic-gate static int 23940Sstevel@tonic-gate aio_req_setup( 23950Sstevel@tonic-gate aio_req_t **reqpp, 23960Sstevel@tonic-gate aio_t *aiop, 23970Sstevel@tonic-gate aiocb_t *arg, 23980Sstevel@tonic-gate aio_result_t *resultp, 23990Sstevel@tonic-gate vnode_t *vp) 24000Sstevel@tonic-gate { 24011885Sraf sigqueue_t *sqp = NULL; 24020Sstevel@tonic-gate aio_req_t *reqp; 24030Sstevel@tonic-gate struct uio *uio; 24040Sstevel@tonic-gate struct sigevent *sigev; 24050Sstevel@tonic-gate int error; 24060Sstevel@tonic-gate 24070Sstevel@tonic-gate sigev = &arg->aio_sigevent; 24081885Sraf if (sigev->sigev_notify == SIGEV_SIGNAL && 24091885Sraf sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 24100Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 24110Sstevel@tonic-gate if (sqp == NULL) 24120Sstevel@tonic-gate return (EAGAIN); 24130Sstevel@tonic-gate sqp->sq_func = NULL; 24140Sstevel@tonic-gate sqp->sq_next = NULL; 24150Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 24160Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 24170Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 24180Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 24190Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 24200Sstevel@tonic-gate sqp->sq_info.si_signo = sigev->sigev_signo; 24210Sstevel@tonic-gate sqp->sq_info.si_value = sigev->sigev_value; 24221885Sraf } 24230Sstevel@tonic-gate 24240Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 24250Sstevel@tonic-gate 24260Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) { 24270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 24280Sstevel@tonic-gate if (sqp) 24290Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t)); 24300Sstevel@tonic-gate return (EIO); 24310Sstevel@tonic-gate } 24320Sstevel@tonic-gate /* 24330Sstevel@tonic-gate * get an aio_reqp from the free list or allocate one 24340Sstevel@tonic-gate * from dynamic memory. 24350Sstevel@tonic-gate */ 24360Sstevel@tonic-gate if (error = aio_req_alloc(&reqp, resultp)) { 24370Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 24380Sstevel@tonic-gate if (sqp) 24390Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t)); 24400Sstevel@tonic-gate return (error); 24410Sstevel@tonic-gate } 24420Sstevel@tonic-gate aiop->aio_pending++; 24430Sstevel@tonic-gate aiop->aio_outstanding++; 24440Sstevel@tonic-gate reqp->aio_req_flags = AIO_PENDING; 24451885Sraf if (sigev->sigev_notify == SIGEV_THREAD || 24461885Sraf sigev->sigev_notify == SIGEV_PORT) 24471885Sraf aio_enq(&aiop->aio_portpending, reqp, 0); 24480Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 24490Sstevel@tonic-gate /* 24500Sstevel@tonic-gate * initialize aio request. 24510Sstevel@tonic-gate */ 24520Sstevel@tonic-gate reqp->aio_req_fd = arg->aio_fildes; 24530Sstevel@tonic-gate reqp->aio_req_sigqp = sqp; 24540Sstevel@tonic-gate reqp->aio_req_iocb.iocb = NULL; 24551885Sraf reqp->aio_req_lio = NULL; 24560Sstevel@tonic-gate reqp->aio_req_buf.b_file = vp; 24570Sstevel@tonic-gate uio = reqp->aio_req.aio_uio; 24580Sstevel@tonic-gate uio->uio_iovcnt = 1; 24590Sstevel@tonic-gate uio->uio_iov->iov_base = (caddr_t)arg->aio_buf; 24600Sstevel@tonic-gate uio->uio_iov->iov_len = arg->aio_nbytes; 24610Sstevel@tonic-gate uio->uio_loffset = arg->aio_offset; 24620Sstevel@tonic-gate *reqpp = reqp; 24630Sstevel@tonic-gate return (0); 24640Sstevel@tonic-gate } 24650Sstevel@tonic-gate 24660Sstevel@tonic-gate /* 24670Sstevel@tonic-gate * Allocate p_aio struct. 24680Sstevel@tonic-gate */ 24690Sstevel@tonic-gate static aio_t * 24700Sstevel@tonic-gate aio_aiop_alloc(void) 24710Sstevel@tonic-gate { 24720Sstevel@tonic-gate aio_t *aiop; 24730Sstevel@tonic-gate 24740Sstevel@tonic-gate ASSERT(MUTEX_HELD(&curproc->p_lock)); 24750Sstevel@tonic-gate 24760Sstevel@tonic-gate aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP); 24770Sstevel@tonic-gate if (aiop) { 24780Sstevel@tonic-gate mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL); 24790Sstevel@tonic-gate mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT, 24800Sstevel@tonic-gate NULL); 24810Sstevel@tonic-gate mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL); 24820Sstevel@tonic-gate } 24830Sstevel@tonic-gate return (aiop); 24840Sstevel@tonic-gate } 24850Sstevel@tonic-gate 24860Sstevel@tonic-gate /* 24870Sstevel@tonic-gate * Allocate an aio_req struct. 24880Sstevel@tonic-gate */ 24890Sstevel@tonic-gate static int 24900Sstevel@tonic-gate aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp) 24910Sstevel@tonic-gate { 24920Sstevel@tonic-gate aio_req_t *reqp; 24930Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 24940Sstevel@tonic-gate 24950Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 24960Sstevel@tonic-gate 24970Sstevel@tonic-gate if ((reqp = aiop->aio_free) != NULL) { 24980Sstevel@tonic-gate aiop->aio_free = reqp->aio_req_next; 24991885Sraf bzero(reqp, sizeof (*reqp)); 25000Sstevel@tonic-gate } else { 25010Sstevel@tonic-gate /* 25020Sstevel@tonic-gate * Check whether memory is getting tight. 25030Sstevel@tonic-gate * This is a temporary mechanism to avoid memory 25040Sstevel@tonic-gate * exhaustion by a single process until we come up 25050Sstevel@tonic-gate * with a per process solution such as setrlimit(). 25060Sstevel@tonic-gate */ 25070Sstevel@tonic-gate if (freemem < desfree) 25080Sstevel@tonic-gate return (EAGAIN); 25090Sstevel@tonic-gate reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP); 25100Sstevel@tonic-gate if (reqp == NULL) 25110Sstevel@tonic-gate return (EAGAIN); 25120Sstevel@tonic-gate } 25131885Sraf reqp->aio_req.aio_uio = &reqp->aio_req_uio; 25141885Sraf reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov; 25151885Sraf reqp->aio_req.aio_private = reqp; 25160Sstevel@tonic-gate reqp->aio_req_buf.b_offset = -1; 25170Sstevel@tonic-gate reqp->aio_req_resultp = resultp; 25180Sstevel@tonic-gate if (aio_hash_insert(reqp, aiop)) { 25190Sstevel@tonic-gate reqp->aio_req_next = aiop->aio_free; 25200Sstevel@tonic-gate aiop->aio_free = reqp; 25210Sstevel@tonic-gate return (EINVAL); 25220Sstevel@tonic-gate } 25230Sstevel@tonic-gate *nreqp = reqp; 25240Sstevel@tonic-gate return (0); 25250Sstevel@tonic-gate } 25260Sstevel@tonic-gate 25270Sstevel@tonic-gate /* 25280Sstevel@tonic-gate * Allocate an aio_lio_t struct. 25290Sstevel@tonic-gate */ 25300Sstevel@tonic-gate static int 25310Sstevel@tonic-gate aio_lio_alloc(aio_lio_t **head) 25320Sstevel@tonic-gate { 25330Sstevel@tonic-gate aio_lio_t *liop; 25340Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 25350Sstevel@tonic-gate 25360Sstevel@tonic-gate ASSERT(MUTEX_HELD(&aiop->aio_mutex)); 25370Sstevel@tonic-gate 25380Sstevel@tonic-gate if ((liop = aiop->aio_lio_free) != NULL) { 25390Sstevel@tonic-gate aiop->aio_lio_free = liop->lio_next; 25400Sstevel@tonic-gate } else { 25410Sstevel@tonic-gate /* 25420Sstevel@tonic-gate * Check whether memory is getting tight. 25430Sstevel@tonic-gate * This is a temporary mechanism to avoid memory 25440Sstevel@tonic-gate * exhaustion by a single process until we come up 25450Sstevel@tonic-gate * with a per process solution such as setrlimit(). 25460Sstevel@tonic-gate */ 25470Sstevel@tonic-gate if (freemem < desfree) 25480Sstevel@tonic-gate return (EAGAIN); 25490Sstevel@tonic-gate 25500Sstevel@tonic-gate liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP); 25510Sstevel@tonic-gate if (liop == NULL) 25520Sstevel@tonic-gate return (EAGAIN); 25530Sstevel@tonic-gate } 25540Sstevel@tonic-gate *head = liop; 25550Sstevel@tonic-gate return (0); 25560Sstevel@tonic-gate } 25570Sstevel@tonic-gate 25580Sstevel@tonic-gate /* 25590Sstevel@tonic-gate * this is a special per-process thread that is only activated if 25600Sstevel@tonic-gate * the process is unmapping a segment with outstanding aio. normally, 25610Sstevel@tonic-gate * the process will have completed the aio before unmapping the 25620Sstevel@tonic-gate * segment. If the process does unmap a segment with outstanding aio, 25630Sstevel@tonic-gate * this special thread will guarentee that the locked pages due to 25640Sstevel@tonic-gate * aphysio() are released, thereby permitting the segment to be 2565304Spraks * unmapped. In addition to this, the cleanup thread is woken up 2566304Spraks * during DR operations to release the locked pages. 25670Sstevel@tonic-gate */ 25680Sstevel@tonic-gate 25690Sstevel@tonic-gate static int 25700Sstevel@tonic-gate aio_cleanup_thread(aio_t *aiop) 25710Sstevel@tonic-gate { 25720Sstevel@tonic-gate proc_t *p = curproc; 25730Sstevel@tonic-gate struct as *as = p->p_as; 25740Sstevel@tonic-gate int poked = 0; 25750Sstevel@tonic-gate kcondvar_t *cvp; 25760Sstevel@tonic-gate int exit_flag = 0; 2577304Spraks int rqclnup = 0; 25780Sstevel@tonic-gate 25790Sstevel@tonic-gate sigfillset(&curthread->t_hold); 25800Sstevel@tonic-gate sigdiffset(&curthread->t_hold, &cantmask); 25810Sstevel@tonic-gate for (;;) { 25820Sstevel@tonic-gate /* 25830Sstevel@tonic-gate * if a segment is being unmapped, and the current 25840Sstevel@tonic-gate * process's done queue is not empty, then every request 25850Sstevel@tonic-gate * on the doneq with locked resources should be forced 25860Sstevel@tonic-gate * to release their locks. By moving the doneq request 25870Sstevel@tonic-gate * to the cleanupq, aio_cleanup() will process the cleanupq, 25880Sstevel@tonic-gate * and place requests back onto the doneq. All requests 25890Sstevel@tonic-gate * processed by aio_cleanup() will have their physical 25900Sstevel@tonic-gate * resources unlocked. 25910Sstevel@tonic-gate */ 25920Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 25930Sstevel@tonic-gate if ((aiop->aio_flags & AIO_CLEANUP) == 0) { 25940Sstevel@tonic-gate aiop->aio_flags |= AIO_CLEANUP; 25950Sstevel@tonic-gate mutex_enter(&as->a_contents); 2596304Spraks if (aiop->aio_rqclnup) { 2597304Spraks aiop->aio_rqclnup = 0; 2598304Spraks rqclnup = 1; 2599304Spraks } 2600304Spraks 2601304Spraks if ((rqclnup || AS_ISUNMAPWAIT(as)) && 26021885Sraf aiop->aio_doneq) { 26030Sstevel@tonic-gate aio_req_t *doneqhead = aiop->aio_doneq; 26040Sstevel@tonic-gate mutex_exit(&as->a_contents); 26050Sstevel@tonic-gate aiop->aio_doneq = NULL; 26060Sstevel@tonic-gate aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ); 26070Sstevel@tonic-gate } else { 26080Sstevel@tonic-gate mutex_exit(&as->a_contents); 26090Sstevel@tonic-gate } 26100Sstevel@tonic-gate } 26110Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 26120Sstevel@tonic-gate aio_cleanup(AIO_CLEANUP_THREAD); 26130Sstevel@tonic-gate /* 26140Sstevel@tonic-gate * thread should block on the cleanupcv while 26150Sstevel@tonic-gate * AIO_CLEANUP is set. 26160Sstevel@tonic-gate */ 26170Sstevel@tonic-gate cvp = &aiop->aio_cleanupcv; 26180Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 26190Sstevel@tonic-gate 26200Sstevel@tonic-gate if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL || 26210Sstevel@tonic-gate aiop->aio_notifyq != NULL || 26220Sstevel@tonic-gate aiop->aio_portcleanupq != NULL) { 26230Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 26240Sstevel@tonic-gate continue; 26250Sstevel@tonic-gate } 26260Sstevel@tonic-gate mutex_enter(&as->a_contents); 26270Sstevel@tonic-gate 26280Sstevel@tonic-gate /* 26290Sstevel@tonic-gate * AIO_CLEANUP determines when the cleanup thread 2630304Spraks * should be active. This flag is set when 2631304Spraks * the cleanup thread is awakened by as_unmap() or 2632304Spraks * due to DR operations. 26330Sstevel@tonic-gate * The flag is cleared when the blocking as_unmap() 26340Sstevel@tonic-gate * that originally awakened us is allowed to 26350Sstevel@tonic-gate * complete. as_unmap() blocks when trying to 26360Sstevel@tonic-gate * unmap a segment that has SOFTLOCKed pages. when 26370Sstevel@tonic-gate * the segment's pages are all SOFTUNLOCKed, 2638304Spraks * as->a_flags & AS_UNMAPWAIT should be zero. 2639304Spraks * 2640304Spraks * In case of cleanup request by DR, the flag is cleared 2641304Spraks * once all the pending aio requests have been processed. 2642304Spraks * 2643304Spraks * The flag shouldn't be cleared right away if the 2644304Spraks * cleanup thread was interrupted because the process 2645304Spraks * is doing forkall(). This happens when cv_wait_sig() 2646304Spraks * returns zero, because it was awakened by a pokelwps(). 2647304Spraks * If the process is not exiting, it must be doing forkall(). 26480Sstevel@tonic-gate */ 26490Sstevel@tonic-gate if ((poked == 0) && 2650304Spraks ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) || 2651304Spraks (aiop->aio_pending == 0))) { 26520Sstevel@tonic-gate aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT); 26530Sstevel@tonic-gate cvp = &as->a_cv; 2654304Spraks rqclnup = 0; 26550Sstevel@tonic-gate } 26560Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 26570Sstevel@tonic-gate if (poked) { 26580Sstevel@tonic-gate /* 26590Sstevel@tonic-gate * If the process is exiting/killed, don't return 26600Sstevel@tonic-gate * immediately without waiting for pending I/O's 26610Sstevel@tonic-gate * and releasing the page locks. 26620Sstevel@tonic-gate */ 26630Sstevel@tonic-gate if (p->p_flag & (SEXITLWPS|SKILLED)) { 26640Sstevel@tonic-gate /* 26650Sstevel@tonic-gate * If exit_flag is set, then it is 26660Sstevel@tonic-gate * safe to exit because we have released 26670Sstevel@tonic-gate * page locks of completed I/O's. 26680Sstevel@tonic-gate */ 26690Sstevel@tonic-gate if (exit_flag) 26700Sstevel@tonic-gate break; 26710Sstevel@tonic-gate 26720Sstevel@tonic-gate mutex_exit(&as->a_contents); 26730Sstevel@tonic-gate 26740Sstevel@tonic-gate /* 26750Sstevel@tonic-gate * Wait for all the pending aio to complete. 26760Sstevel@tonic-gate */ 26770Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 26780Sstevel@tonic-gate aiop->aio_flags |= AIO_REQ_BLOCK; 26790Sstevel@tonic-gate while (aiop->aio_pending != 0) 26800Sstevel@tonic-gate cv_wait(&aiop->aio_cleanupcv, 26810Sstevel@tonic-gate &aiop->aio_mutex); 26820Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 26830Sstevel@tonic-gate exit_flag = 1; 26840Sstevel@tonic-gate continue; 26850Sstevel@tonic-gate } else if (p->p_flag & 26860Sstevel@tonic-gate (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) { 26870Sstevel@tonic-gate /* 26880Sstevel@tonic-gate * hold LWP until it 26890Sstevel@tonic-gate * is continued. 26900Sstevel@tonic-gate */ 26910Sstevel@tonic-gate mutex_exit(&as->a_contents); 26920Sstevel@tonic-gate mutex_enter(&p->p_lock); 26930Sstevel@tonic-gate stop(PR_SUSPENDED, SUSPEND_NORMAL); 26940Sstevel@tonic-gate mutex_exit(&p->p_lock); 26950Sstevel@tonic-gate poked = 0; 26960Sstevel@tonic-gate continue; 26970Sstevel@tonic-gate } 26980Sstevel@tonic-gate } else { 26990Sstevel@tonic-gate /* 27000Sstevel@tonic-gate * When started this thread will sleep on as->a_cv. 27010Sstevel@tonic-gate * as_unmap will awake this thread if the 27020Sstevel@tonic-gate * segment has SOFTLOCKed pages (poked = 0). 27030Sstevel@tonic-gate * 1. pokelwps() awakes this thread => 27040Sstevel@tonic-gate * break the loop to check SEXITLWPS, SHOLDFORK, etc 27050Sstevel@tonic-gate * 2. as_unmap awakes this thread => 27060Sstevel@tonic-gate * to break the loop it is necessary that 27070Sstevel@tonic-gate * - AS_UNMAPWAIT is set (as_unmap is waiting for 27080Sstevel@tonic-gate * memory to be unlocked) 27090Sstevel@tonic-gate * - AIO_CLEANUP is not set 27100Sstevel@tonic-gate * (if AIO_CLEANUP is set we have to wait for 27110Sstevel@tonic-gate * pending requests. aio_done will send a signal 27120Sstevel@tonic-gate * for every request which completes to continue 27130Sstevel@tonic-gate * unmapping the corresponding address range) 2714304Spraks * 3. A cleanup request will wake this thread up, ex. 2715304Spraks * by the DR operations. The aio_rqclnup flag will 2716304Spraks * be set. 27170Sstevel@tonic-gate */ 27180Sstevel@tonic-gate while (poked == 0) { 2719304Spraks /* 2720304Spraks * we need to handle cleanup requests 2721304Spraks * that come in after we had just cleaned up, 2722304Spraks * so that we do cleanup of any new aio 2723304Spraks * requests that got completed and have 2724304Spraks * locked resources. 2725304Spraks */ 2726304Spraks if ((aiop->aio_rqclnup || 2727304Spraks (AS_ISUNMAPWAIT(as) != 0)) && 2728304Spraks (aiop->aio_flags & AIO_CLEANUP) == 0) 27290Sstevel@tonic-gate break; 27300Sstevel@tonic-gate poked = !cv_wait_sig(cvp, &as->a_contents); 27310Sstevel@tonic-gate if (AS_ISUNMAPWAIT(as) == 0) 27320Sstevel@tonic-gate cv_signal(cvp); 27330Sstevel@tonic-gate if (aiop->aio_outstanding != 0) 27340Sstevel@tonic-gate break; 27350Sstevel@tonic-gate } 27360Sstevel@tonic-gate } 27370Sstevel@tonic-gate mutex_exit(&as->a_contents); 27380Sstevel@tonic-gate } 27390Sstevel@tonic-gate exit: 27400Sstevel@tonic-gate mutex_exit(&as->a_contents); 27410Sstevel@tonic-gate ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED))); 27420Sstevel@tonic-gate aston(curthread); /* make thread do post_syscall */ 27430Sstevel@tonic-gate return (0); 27440Sstevel@tonic-gate } 27450Sstevel@tonic-gate 27460Sstevel@tonic-gate /* 27470Sstevel@tonic-gate * save a reference to a user's outstanding aio in a hash list. 27480Sstevel@tonic-gate */ 27490Sstevel@tonic-gate static int 27500Sstevel@tonic-gate aio_hash_insert( 27510Sstevel@tonic-gate aio_req_t *aio_reqp, 27520Sstevel@tonic-gate aio_t *aiop) 27530Sstevel@tonic-gate { 27540Sstevel@tonic-gate long index; 27550Sstevel@tonic-gate aio_result_t *resultp = aio_reqp->aio_req_resultp; 27560Sstevel@tonic-gate aio_req_t *current; 27570Sstevel@tonic-gate aio_req_t **nextp; 27580Sstevel@tonic-gate 27590Sstevel@tonic-gate index = AIO_HASH(resultp); 27600Sstevel@tonic-gate nextp = &aiop->aio_hash[index]; 27610Sstevel@tonic-gate while ((current = *nextp) != NULL) { 27620Sstevel@tonic-gate if (current->aio_req_resultp == resultp) 27630Sstevel@tonic-gate return (DUPLICATE); 27640Sstevel@tonic-gate nextp = ¤t->aio_hash_next; 27650Sstevel@tonic-gate } 27660Sstevel@tonic-gate *nextp = aio_reqp; 27670Sstevel@tonic-gate aio_reqp->aio_hash_next = NULL; 27680Sstevel@tonic-gate return (0); 27690Sstevel@tonic-gate } 27700Sstevel@tonic-gate 27710Sstevel@tonic-gate static int 27720Sstevel@tonic-gate (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *, 27730Sstevel@tonic-gate cred_t *) 27740Sstevel@tonic-gate { 27750Sstevel@tonic-gate struct snode *sp; 27760Sstevel@tonic-gate dev_t dev; 27770Sstevel@tonic-gate struct cb_ops *cb; 27780Sstevel@tonic-gate major_t major; 27790Sstevel@tonic-gate int (*aio_func)(); 27800Sstevel@tonic-gate 27810Sstevel@tonic-gate dev = vp->v_rdev; 27820Sstevel@tonic-gate major = getmajor(dev); 27830Sstevel@tonic-gate 27840Sstevel@tonic-gate /* 27850Sstevel@tonic-gate * return NULL for requests to files and STREAMs so 27860Sstevel@tonic-gate * that libaio takes care of them. 27870Sstevel@tonic-gate */ 27880Sstevel@tonic-gate if (vp->v_type == VCHR) { 27890Sstevel@tonic-gate /* no stream device for kaio */ 27900Sstevel@tonic-gate if (STREAMSTAB(major)) { 27910Sstevel@tonic-gate return (NULL); 27920Sstevel@tonic-gate } 27930Sstevel@tonic-gate } else { 27940Sstevel@tonic-gate return (NULL); 27950Sstevel@tonic-gate } 27960Sstevel@tonic-gate 27970Sstevel@tonic-gate /* 27980Sstevel@tonic-gate * Check old drivers which do not have async I/O entry points. 27990Sstevel@tonic-gate */ 28000Sstevel@tonic-gate if (devopsp[major]->devo_rev < 3) 28010Sstevel@tonic-gate return (NULL); 28020Sstevel@tonic-gate 28030Sstevel@tonic-gate cb = devopsp[major]->devo_cb_ops; 28040Sstevel@tonic-gate 28050Sstevel@tonic-gate if (cb->cb_rev < 1) 28060Sstevel@tonic-gate return (NULL); 28070Sstevel@tonic-gate 28080Sstevel@tonic-gate /* 28090Sstevel@tonic-gate * Check whether this device is a block device. 28100Sstevel@tonic-gate * Kaio is not supported for devices like tty. 28110Sstevel@tonic-gate */ 28120Sstevel@tonic-gate if (cb->cb_strategy == nodev || cb->cb_strategy == NULL) 28130Sstevel@tonic-gate return (NULL); 28140Sstevel@tonic-gate 28150Sstevel@tonic-gate /* 28160Sstevel@tonic-gate * Clustering: If vnode is a PXFS vnode, then the device may be remote. 28170Sstevel@tonic-gate * We cannot call the driver directly. Instead return the 28180Sstevel@tonic-gate * PXFS functions. 28190Sstevel@tonic-gate */ 28200Sstevel@tonic-gate 28210Sstevel@tonic-gate if (IS_PXFSVP(vp)) { 28220Sstevel@tonic-gate if (mode & FREAD) 28230Sstevel@tonic-gate return (clpxfs_aio_read); 28240Sstevel@tonic-gate else 28250Sstevel@tonic-gate return (clpxfs_aio_write); 28260Sstevel@tonic-gate } 28270Sstevel@tonic-gate if (mode & FREAD) 28280Sstevel@tonic-gate aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read; 28290Sstevel@tonic-gate else 28300Sstevel@tonic-gate aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write; 28310Sstevel@tonic-gate 28320Sstevel@tonic-gate /* 28330Sstevel@tonic-gate * Do we need this ? 28340Sstevel@tonic-gate * nodev returns ENXIO anyway. 28350Sstevel@tonic-gate */ 28360Sstevel@tonic-gate if (aio_func == nodev) 28370Sstevel@tonic-gate return (NULL); 28380Sstevel@tonic-gate 28390Sstevel@tonic-gate sp = VTOS(vp); 28400Sstevel@tonic-gate smark(sp, SACC); 28410Sstevel@tonic-gate return (aio_func); 28420Sstevel@tonic-gate } 28430Sstevel@tonic-gate 28440Sstevel@tonic-gate /* 28450Sstevel@tonic-gate * Clustering: We want check_vp to return a function prototyped 28460Sstevel@tonic-gate * correctly that will be common to both PXFS and regular case. 28470Sstevel@tonic-gate * We define this intermediate function that will do the right 28480Sstevel@tonic-gate * thing for driver cases. 28490Sstevel@tonic-gate */ 28500Sstevel@tonic-gate 28510Sstevel@tonic-gate static int 28520Sstevel@tonic-gate driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 28530Sstevel@tonic-gate { 28540Sstevel@tonic-gate dev_t dev; 28550Sstevel@tonic-gate struct cb_ops *cb; 28560Sstevel@tonic-gate 28570Sstevel@tonic-gate ASSERT(vp->v_type == VCHR); 28580Sstevel@tonic-gate ASSERT(!IS_PXFSVP(vp)); 28590Sstevel@tonic-gate dev = VTOS(vp)->s_dev; 28600Sstevel@tonic-gate ASSERT(STREAMSTAB(getmajor(dev)) == NULL); 28610Sstevel@tonic-gate 28620Sstevel@tonic-gate cb = devopsp[getmajor(dev)]->devo_cb_ops; 28630Sstevel@tonic-gate 28640Sstevel@tonic-gate ASSERT(cb->cb_awrite != nodev); 28650Sstevel@tonic-gate return ((*cb->cb_awrite)(dev, aio, cred_p)); 28660Sstevel@tonic-gate } 28670Sstevel@tonic-gate 28680Sstevel@tonic-gate /* 28690Sstevel@tonic-gate * Clustering: We want check_vp to return a function prototyped 28700Sstevel@tonic-gate * correctly that will be common to both PXFS and regular case. 28710Sstevel@tonic-gate * We define this intermediate function that will do the right 28720Sstevel@tonic-gate * thing for driver cases. 28730Sstevel@tonic-gate */ 28740Sstevel@tonic-gate 28750Sstevel@tonic-gate static int 28760Sstevel@tonic-gate driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p) 28770Sstevel@tonic-gate { 28780Sstevel@tonic-gate dev_t dev; 28790Sstevel@tonic-gate struct cb_ops *cb; 28800Sstevel@tonic-gate 28810Sstevel@tonic-gate ASSERT(vp->v_type == VCHR); 28820Sstevel@tonic-gate ASSERT(!IS_PXFSVP(vp)); 28830Sstevel@tonic-gate dev = VTOS(vp)->s_dev; 28840Sstevel@tonic-gate ASSERT(!STREAMSTAB(getmajor(dev))); 28850Sstevel@tonic-gate 28860Sstevel@tonic-gate cb = devopsp[getmajor(dev)]->devo_cb_ops; 28870Sstevel@tonic-gate 28880Sstevel@tonic-gate ASSERT(cb->cb_aread != nodev); 28890Sstevel@tonic-gate return ((*cb->cb_aread)(dev, aio, cred_p)); 28900Sstevel@tonic-gate } 28910Sstevel@tonic-gate 28920Sstevel@tonic-gate /* 28930Sstevel@tonic-gate * This routine is called when a largefile call is made by a 32bit 28940Sstevel@tonic-gate * process on a ILP32 or LP64 kernel. All 64bit processes are large 28950Sstevel@tonic-gate * file by definition and will call alio() instead. 28960Sstevel@tonic-gate */ 28970Sstevel@tonic-gate static int 28980Sstevel@tonic-gate alioLF( 28990Sstevel@tonic-gate int mode_arg, 29000Sstevel@tonic-gate void *aiocb_arg, 29010Sstevel@tonic-gate int nent, 29020Sstevel@tonic-gate void *sigev) 29030Sstevel@tonic-gate { 29040Sstevel@tonic-gate file_t *fp; 29050Sstevel@tonic-gate file_t *prev_fp = NULL; 29060Sstevel@tonic-gate int prev_mode = -1; 29070Sstevel@tonic-gate struct vnode *vp; 29080Sstevel@tonic-gate aio_lio_t *head; 29090Sstevel@tonic-gate aio_req_t *reqp; 29100Sstevel@tonic-gate aio_t *aiop; 29110Sstevel@tonic-gate caddr_t cbplist; 29121885Sraf aiocb64_32_t cb64; 29131885Sraf aiocb64_32_t *aiocb = &cb64; 29140Sstevel@tonic-gate aiocb64_32_t *cbp; 29150Sstevel@tonic-gate caddr32_t *ucbp; 29160Sstevel@tonic-gate #ifdef _LP64 29170Sstevel@tonic-gate aiocb_t aiocb_n; 29180Sstevel@tonic-gate #endif 29190Sstevel@tonic-gate struct sigevent32 sigevk; 29200Sstevel@tonic-gate sigqueue_t *sqp; 29210Sstevel@tonic-gate int (*aio_func)(); 29220Sstevel@tonic-gate int mode; 29231885Sraf int error = 0; 29241885Sraf int aio_errors = 0; 29250Sstevel@tonic-gate int i; 29260Sstevel@tonic-gate size_t ssize; 29270Sstevel@tonic-gate int deadhead = 0; 29280Sstevel@tonic-gate int aio_notsupported = 0; 29291885Sraf int lio_head_port; 29301885Sraf int aio_port; 29311885Sraf int aio_thread; 29320Sstevel@tonic-gate port_kevent_t *pkevtp = NULL; 29330Sstevel@tonic-gate port_notify32_t pnotify; 29341885Sraf int event; 29350Sstevel@tonic-gate 29360Sstevel@tonic-gate aiop = curproc->p_aio; 29370Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 29380Sstevel@tonic-gate return (EINVAL); 29390Sstevel@tonic-gate 29400Sstevel@tonic-gate ASSERT(get_udatamodel() == DATAMODEL_ILP32); 29410Sstevel@tonic-gate 29420Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent); 29430Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP); 29440Sstevel@tonic-gate ucbp = (caddr32_t *)cbplist; 29450Sstevel@tonic-gate 29461885Sraf if (copyin(aiocb_arg, cbplist, ssize) || 29471885Sraf (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) { 29480Sstevel@tonic-gate kmem_free(cbplist, ssize); 29490Sstevel@tonic-gate return (EFAULT); 29500Sstevel@tonic-gate } 29510Sstevel@tonic-gate 29521885Sraf /* Event Ports */ 29531885Sraf if (sigev && 29541885Sraf (sigevk.sigev_notify == SIGEV_THREAD || 29551885Sraf sigevk.sigev_notify == SIGEV_PORT)) { 29561885Sraf if (sigevk.sigev_notify == SIGEV_THREAD) { 29571885Sraf pnotify.portnfy_port = sigevk.sigev_signo; 29581885Sraf pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 29591885Sraf } else if (copyin( 29601885Sraf (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 29611885Sraf &pnotify, sizeof (pnotify))) { 29620Sstevel@tonic-gate kmem_free(cbplist, ssize); 29630Sstevel@tonic-gate return (EFAULT); 29640Sstevel@tonic-gate } 29651885Sraf error = port_alloc_event(pnotify.portnfy_port, 29661885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 29671885Sraf if (error) { 29681885Sraf if (error == ENOMEM || error == EAGAIN) 29691885Sraf error = EAGAIN; 29701885Sraf else 29711885Sraf error = EINVAL; 29721885Sraf kmem_free(cbplist, ssize); 29731885Sraf return (error); 29741885Sraf } 29751885Sraf lio_head_port = pnotify.portnfy_port; 29760Sstevel@tonic-gate } 29770Sstevel@tonic-gate 29780Sstevel@tonic-gate /* 29790Sstevel@tonic-gate * a list head should be allocated if notification is 29800Sstevel@tonic-gate * enabled for this list. 29810Sstevel@tonic-gate */ 29820Sstevel@tonic-gate head = NULL; 29830Sstevel@tonic-gate 29841885Sraf if (mode_arg == LIO_WAIT || sigev) { 29850Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 29860Sstevel@tonic-gate error = aio_lio_alloc(&head); 29870Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 29880Sstevel@tonic-gate if (error) 29890Sstevel@tonic-gate goto done; 29900Sstevel@tonic-gate deadhead = 1; 29910Sstevel@tonic-gate head->lio_nent = nent; 29920Sstevel@tonic-gate head->lio_refcnt = nent; 29931885Sraf head->lio_port = -1; 29941885Sraf head->lio_portkev = NULL; 29951885Sraf if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 29961885Sraf sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 29970Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 29980Sstevel@tonic-gate if (sqp == NULL) { 29990Sstevel@tonic-gate error = EAGAIN; 30000Sstevel@tonic-gate goto done; 30010Sstevel@tonic-gate } 30020Sstevel@tonic-gate sqp->sq_func = NULL; 30030Sstevel@tonic-gate sqp->sq_next = NULL; 30040Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 30050Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 30060Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 30070Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 30080Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 30090Sstevel@tonic-gate sqp->sq_info.si_signo = sigevk.sigev_signo; 30100Sstevel@tonic-gate sqp->sq_info.si_value.sival_int = 30110Sstevel@tonic-gate sigevk.sigev_value.sival_int; 30120Sstevel@tonic-gate head->lio_sigqp = sqp; 30130Sstevel@tonic-gate } else { 30140Sstevel@tonic-gate head->lio_sigqp = NULL; 30150Sstevel@tonic-gate } 30161885Sraf if (pkevtp) { 30171885Sraf /* 30181885Sraf * Prepare data to send when list of aiocb's 30191885Sraf * has completed. 30201885Sraf */ 30211885Sraf port_init_event(pkevtp, (uintptr_t)sigev, 30221885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 30231885Sraf NULL, head); 30241885Sraf pkevtp->portkev_events = AIOLIO64; 30251885Sraf head->lio_portkev = pkevtp; 30261885Sraf head->lio_port = pnotify.portnfy_port; 30271885Sraf } 30280Sstevel@tonic-gate } 30290Sstevel@tonic-gate 30300Sstevel@tonic-gate for (i = 0; i < nent; i++, ucbp++) { 30310Sstevel@tonic-gate 30320Sstevel@tonic-gate cbp = (aiocb64_32_t *)(uintptr_t)*ucbp; 30330Sstevel@tonic-gate /* skip entry if it can't be copied. */ 30341885Sraf if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) { 30350Sstevel@tonic-gate if (head) { 30360Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 30370Sstevel@tonic-gate head->lio_nent--; 30380Sstevel@tonic-gate head->lio_refcnt--; 30390Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 30400Sstevel@tonic-gate } 30410Sstevel@tonic-gate continue; 30420Sstevel@tonic-gate } 30430Sstevel@tonic-gate 30440Sstevel@tonic-gate /* skip if opcode for aiocb is LIO_NOP */ 30450Sstevel@tonic-gate mode = aiocb->aio_lio_opcode; 30460Sstevel@tonic-gate if (mode == LIO_NOP) { 30470Sstevel@tonic-gate cbp = NULL; 30480Sstevel@tonic-gate if (head) { 30490Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 30500Sstevel@tonic-gate head->lio_nent--; 30510Sstevel@tonic-gate head->lio_refcnt--; 30520Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 30530Sstevel@tonic-gate } 30540Sstevel@tonic-gate continue; 30550Sstevel@tonic-gate } 30560Sstevel@tonic-gate 30570Sstevel@tonic-gate /* increment file descriptor's ref count. */ 30580Sstevel@tonic-gate if ((fp = getf(aiocb->aio_fildes)) == NULL) { 30590Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 30600Sstevel@tonic-gate if (head) { 30610Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 30620Sstevel@tonic-gate head->lio_nent--; 30630Sstevel@tonic-gate head->lio_refcnt--; 30640Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 30650Sstevel@tonic-gate } 30660Sstevel@tonic-gate aio_errors++; 30670Sstevel@tonic-gate continue; 30680Sstevel@tonic-gate } 30690Sstevel@tonic-gate 30700Sstevel@tonic-gate /* 30710Sstevel@tonic-gate * check the permission of the partition 30720Sstevel@tonic-gate */ 30730Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 30740Sstevel@tonic-gate releasef(aiocb->aio_fildes); 30750Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 30760Sstevel@tonic-gate if (head) { 30770Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 30780Sstevel@tonic-gate head->lio_nent--; 30790Sstevel@tonic-gate head->lio_refcnt--; 30800Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 30810Sstevel@tonic-gate } 30820Sstevel@tonic-gate aio_errors++; 30830Sstevel@tonic-gate continue; 30840Sstevel@tonic-gate } 30850Sstevel@tonic-gate 30860Sstevel@tonic-gate /* 30870Sstevel@tonic-gate * common case where requests are to the same fd 30880Sstevel@tonic-gate * for the same r/w operation 30890Sstevel@tonic-gate * for UFS, need to set EBADFD 30900Sstevel@tonic-gate */ 30911885Sraf vp = fp->f_vnode; 30921885Sraf if (fp != prev_fp || mode != prev_mode) { 30930Sstevel@tonic-gate aio_func = check_vp(vp, mode); 30940Sstevel@tonic-gate if (aio_func == NULL) { 30950Sstevel@tonic-gate prev_fp = NULL; 30960Sstevel@tonic-gate releasef(aiocb->aio_fildes); 30970Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADFD); 30980Sstevel@tonic-gate aio_notsupported++; 30990Sstevel@tonic-gate if (head) { 31000Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 31010Sstevel@tonic-gate head->lio_nent--; 31020Sstevel@tonic-gate head->lio_refcnt--; 31030Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 31040Sstevel@tonic-gate } 31050Sstevel@tonic-gate continue; 31060Sstevel@tonic-gate } else { 31070Sstevel@tonic-gate prev_fp = fp; 31080Sstevel@tonic-gate prev_mode = mode; 31090Sstevel@tonic-gate } 31100Sstevel@tonic-gate } 31111885Sraf 31120Sstevel@tonic-gate #ifdef _LP64 31130Sstevel@tonic-gate aiocb_LFton(aiocb, &aiocb_n); 31140Sstevel@tonic-gate error = aio_req_setup(&reqp, aiop, &aiocb_n, 31151885Sraf (aio_result_t *)&cbp->aio_resultp, vp); 31160Sstevel@tonic-gate #else 31170Sstevel@tonic-gate error = aio_req_setupLF(&reqp, aiop, aiocb, 31181885Sraf (aio_result_t *)&cbp->aio_resultp, vp); 31190Sstevel@tonic-gate #endif /* _LP64 */ 31200Sstevel@tonic-gate if (error) { 31210Sstevel@tonic-gate releasef(aiocb->aio_fildes); 31221885Sraf lio_set_uerror(&cbp->aio_resultp, error); 31230Sstevel@tonic-gate if (head) { 31240Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 31250Sstevel@tonic-gate head->lio_nent--; 31260Sstevel@tonic-gate head->lio_refcnt--; 31270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 31280Sstevel@tonic-gate } 31290Sstevel@tonic-gate aio_errors++; 31300Sstevel@tonic-gate continue; 31310Sstevel@tonic-gate } 31320Sstevel@tonic-gate 31330Sstevel@tonic-gate reqp->aio_req_lio = head; 31340Sstevel@tonic-gate deadhead = 0; 31350Sstevel@tonic-gate 31360Sstevel@tonic-gate /* 31370Sstevel@tonic-gate * Set the errno field now before sending the request to 31380Sstevel@tonic-gate * the driver to avoid a race condition 31390Sstevel@tonic-gate */ 31400Sstevel@tonic-gate (void) suword32(&cbp->aio_resultp.aio_errno, 31410Sstevel@tonic-gate EINPROGRESS); 31420Sstevel@tonic-gate 31430Sstevel@tonic-gate reqp->aio_req_iocb.iocb32 = *ucbp; 31440Sstevel@tonic-gate 31451885Sraf event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64; 31461885Sraf aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 31471885Sraf aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 31481885Sraf if (aio_port | aio_thread) { 31491885Sraf port_kevent_t *lpkevp; 31501885Sraf /* 31511885Sraf * Prepare data to send with each aiocb completed. 31521885Sraf */ 31531885Sraf if (aio_port) { 31541885Sraf void *paddr = (void *)(uintptr_t) 31551885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 31561885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify))) 31571885Sraf error = EFAULT; 31581885Sraf } else { /* aio_thread */ 31591885Sraf pnotify.portnfy_port = 31601885Sraf aiocb->aio_sigevent.sigev_signo; 31611885Sraf pnotify.portnfy_user = 31621885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 31631885Sraf } 31641885Sraf if (error) 31651885Sraf /* EMPTY */; 31661885Sraf else if (pkevtp != NULL && 31671885Sraf pnotify.portnfy_port == lio_head_port) 31681885Sraf error = port_dup_event(pkevtp, &lpkevp, 31691885Sraf PORT_ALLOC_DEFAULT); 31701885Sraf else 31711885Sraf error = port_alloc_event(pnotify.portnfy_port, 31721885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 31731885Sraf &lpkevp); 31741885Sraf if (error == 0) { 31751885Sraf port_init_event(lpkevp, (uintptr_t)*ucbp, 31761885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 31771885Sraf aio_port_callback, reqp); 31781885Sraf lpkevp->portkev_events = event; 31791885Sraf reqp->aio_req_portkev = lpkevp; 31801885Sraf reqp->aio_req_port = pnotify.portnfy_port; 31811885Sraf } 31820Sstevel@tonic-gate } 31830Sstevel@tonic-gate 31840Sstevel@tonic-gate /* 31850Sstevel@tonic-gate * send the request to driver. 31860Sstevel@tonic-gate */ 31870Sstevel@tonic-gate if (error == 0) { 31880Sstevel@tonic-gate if (aiocb->aio_nbytes == 0) { 31890Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 31900Sstevel@tonic-gate aio_zerolen(reqp); 31910Sstevel@tonic-gate continue; 31920Sstevel@tonic-gate } 31930Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 31940Sstevel@tonic-gate CRED()); 31950Sstevel@tonic-gate } 31960Sstevel@tonic-gate 31970Sstevel@tonic-gate /* 31980Sstevel@tonic-gate * the fd's ref count is not decremented until the IO has 31990Sstevel@tonic-gate * completed unless there was an error. 32000Sstevel@tonic-gate */ 32010Sstevel@tonic-gate if (error) { 32020Sstevel@tonic-gate releasef(aiocb->aio_fildes); 32030Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 32040Sstevel@tonic-gate if (head) { 32050Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 32060Sstevel@tonic-gate head->lio_nent--; 32070Sstevel@tonic-gate head->lio_refcnt--; 32080Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 32090Sstevel@tonic-gate } 32100Sstevel@tonic-gate if (error == ENOTSUP) 32110Sstevel@tonic-gate aio_notsupported++; 32120Sstevel@tonic-gate else 32130Sstevel@tonic-gate aio_errors++; 32140Sstevel@tonic-gate lio_set_error(reqp); 32150Sstevel@tonic-gate } else { 32160Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 32170Sstevel@tonic-gate } 32180Sstevel@tonic-gate } 32190Sstevel@tonic-gate 32200Sstevel@tonic-gate if (aio_notsupported) { 32210Sstevel@tonic-gate error = ENOTSUP; 32220Sstevel@tonic-gate } else if (aio_errors) { 32230Sstevel@tonic-gate /* 32240Sstevel@tonic-gate * return EIO if any request failed 32250Sstevel@tonic-gate */ 32260Sstevel@tonic-gate error = EIO; 32270Sstevel@tonic-gate } 32280Sstevel@tonic-gate 32290Sstevel@tonic-gate if (mode_arg == LIO_WAIT) { 32300Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 32310Sstevel@tonic-gate while (head->lio_refcnt > 0) { 32320Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 32330Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 32340Sstevel@tonic-gate error = EINTR; 32350Sstevel@tonic-gate goto done; 32360Sstevel@tonic-gate } 32370Sstevel@tonic-gate } 32380Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 32390Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE); 32400Sstevel@tonic-gate } 32410Sstevel@tonic-gate 32420Sstevel@tonic-gate done: 32430Sstevel@tonic-gate kmem_free(cbplist, ssize); 32440Sstevel@tonic-gate if (deadhead) { 32450Sstevel@tonic-gate if (head->lio_sigqp) 32460Sstevel@tonic-gate kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 32471885Sraf if (head->lio_portkev) 32481885Sraf port_free_event(head->lio_portkev); 32490Sstevel@tonic-gate kmem_free(head, sizeof (aio_lio_t)); 32500Sstevel@tonic-gate } 32510Sstevel@tonic-gate return (error); 32520Sstevel@tonic-gate } 32530Sstevel@tonic-gate 32540Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 32550Sstevel@tonic-gate static void 32560Sstevel@tonic-gate aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest) 32570Sstevel@tonic-gate { 32580Sstevel@tonic-gate dest->aio_fildes = src->aio_fildes; 32590Sstevel@tonic-gate dest->aio_buf = (void *)(uintptr_t)src->aio_buf; 32600Sstevel@tonic-gate dest->aio_nbytes = (size_t)src->aio_nbytes; 32610Sstevel@tonic-gate dest->aio_offset = (off_t)src->aio_offset; 32620Sstevel@tonic-gate dest->aio_reqprio = src->aio_reqprio; 32630Sstevel@tonic-gate dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 32640Sstevel@tonic-gate dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 32650Sstevel@tonic-gate 32660Sstevel@tonic-gate /* 32670Sstevel@tonic-gate * See comment in sigqueue32() on handling of 32-bit 32680Sstevel@tonic-gate * sigvals in a 64-bit kernel. 32690Sstevel@tonic-gate */ 32700Sstevel@tonic-gate dest->aio_sigevent.sigev_value.sival_int = 32710Sstevel@tonic-gate (int)src->aio_sigevent.sigev_value.sival_int; 32720Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 32730Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_function; 32740Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 32750Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 32760Sstevel@tonic-gate dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 32770Sstevel@tonic-gate dest->aio_lio_opcode = src->aio_lio_opcode; 32780Sstevel@tonic-gate dest->aio_state = src->aio_state; 32790Sstevel@tonic-gate dest->aio__pad[0] = src->aio__pad[0]; 32800Sstevel@tonic-gate } 32810Sstevel@tonic-gate #endif 32820Sstevel@tonic-gate 32830Sstevel@tonic-gate /* 32840Sstevel@tonic-gate * This function is used only for largefile calls made by 32851885Sraf * 32 bit applications. 32860Sstevel@tonic-gate */ 32870Sstevel@tonic-gate static int 32880Sstevel@tonic-gate aio_req_setupLF( 32890Sstevel@tonic-gate aio_req_t **reqpp, 32900Sstevel@tonic-gate aio_t *aiop, 32910Sstevel@tonic-gate aiocb64_32_t *arg, 32920Sstevel@tonic-gate aio_result_t *resultp, 32930Sstevel@tonic-gate vnode_t *vp) 32940Sstevel@tonic-gate { 32951885Sraf sigqueue_t *sqp = NULL; 32960Sstevel@tonic-gate aio_req_t *reqp; 32971885Sraf struct uio *uio; 32981885Sraf struct sigevent32 *sigev; 32990Sstevel@tonic-gate int error; 33000Sstevel@tonic-gate 33011885Sraf sigev = &arg->aio_sigevent; 33021885Sraf if (sigev->sigev_notify == SIGEV_SIGNAL && 33031885Sraf sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) { 33040Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 33050Sstevel@tonic-gate if (sqp == NULL) 33060Sstevel@tonic-gate return (EAGAIN); 33070Sstevel@tonic-gate sqp->sq_func = NULL; 33080Sstevel@tonic-gate sqp->sq_next = NULL; 33090Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 33100Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 33110Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 33120Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 33130Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 33140Sstevel@tonic-gate sqp->sq_info.si_signo = sigev->sigev_signo; 33151885Sraf sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int; 33161885Sraf } 33170Sstevel@tonic-gate 33180Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 33190Sstevel@tonic-gate 33200Sstevel@tonic-gate if (aiop->aio_flags & AIO_REQ_BLOCK) { 33210Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 33220Sstevel@tonic-gate if (sqp) 33230Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t)); 33240Sstevel@tonic-gate return (EIO); 33250Sstevel@tonic-gate } 33260Sstevel@tonic-gate /* 33270Sstevel@tonic-gate * get an aio_reqp from the free list or allocate one 33280Sstevel@tonic-gate * from dynamic memory. 33290Sstevel@tonic-gate */ 33300Sstevel@tonic-gate if (error = aio_req_alloc(&reqp, resultp)) { 33310Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 33320Sstevel@tonic-gate if (sqp) 33330Sstevel@tonic-gate kmem_free(sqp, sizeof (sigqueue_t)); 33340Sstevel@tonic-gate return (error); 33350Sstevel@tonic-gate } 33360Sstevel@tonic-gate aiop->aio_pending++; 33370Sstevel@tonic-gate aiop->aio_outstanding++; 33380Sstevel@tonic-gate reqp->aio_req_flags = AIO_PENDING; 33391885Sraf if (sigev->sigev_notify == SIGEV_THREAD || 33401885Sraf sigev->sigev_notify == SIGEV_PORT) 33411885Sraf aio_enq(&aiop->aio_portpending, reqp, 0); 33420Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 33430Sstevel@tonic-gate /* 33440Sstevel@tonic-gate * initialize aio request. 33450Sstevel@tonic-gate */ 33460Sstevel@tonic-gate reqp->aio_req_fd = arg->aio_fildes; 33470Sstevel@tonic-gate reqp->aio_req_sigqp = sqp; 33480Sstevel@tonic-gate reqp->aio_req_iocb.iocb = NULL; 33491885Sraf reqp->aio_req_lio = NULL; 33500Sstevel@tonic-gate reqp->aio_req_buf.b_file = vp; 33510Sstevel@tonic-gate uio = reqp->aio_req.aio_uio; 33520Sstevel@tonic-gate uio->uio_iovcnt = 1; 33530Sstevel@tonic-gate uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf; 33540Sstevel@tonic-gate uio->uio_iov->iov_len = arg->aio_nbytes; 33550Sstevel@tonic-gate uio->uio_loffset = arg->aio_offset; 33560Sstevel@tonic-gate *reqpp = reqp; 33570Sstevel@tonic-gate return (0); 33580Sstevel@tonic-gate } 33590Sstevel@tonic-gate 33600Sstevel@tonic-gate /* 33610Sstevel@tonic-gate * This routine is called when a non largefile call is made by a 32bit 33620Sstevel@tonic-gate * process on a ILP32 or LP64 kernel. 33630Sstevel@tonic-gate */ 33640Sstevel@tonic-gate static int 33650Sstevel@tonic-gate alio32( 33660Sstevel@tonic-gate int mode_arg, 33670Sstevel@tonic-gate void *aiocb_arg, 33680Sstevel@tonic-gate int nent, 33691885Sraf void *sigev) 33700Sstevel@tonic-gate { 33710Sstevel@tonic-gate file_t *fp; 33720Sstevel@tonic-gate file_t *prev_fp = NULL; 33730Sstevel@tonic-gate int prev_mode = -1; 33740Sstevel@tonic-gate struct vnode *vp; 33750Sstevel@tonic-gate aio_lio_t *head; 33760Sstevel@tonic-gate aio_req_t *reqp; 33770Sstevel@tonic-gate aio_t *aiop; 33781885Sraf caddr_t cbplist; 33790Sstevel@tonic-gate aiocb_t cb; 33800Sstevel@tonic-gate aiocb_t *aiocb = &cb; 33810Sstevel@tonic-gate #ifdef _LP64 33820Sstevel@tonic-gate aiocb32_t *cbp; 33830Sstevel@tonic-gate caddr32_t *ucbp; 33840Sstevel@tonic-gate aiocb32_t cb32; 33850Sstevel@tonic-gate aiocb32_t *aiocb32 = &cb32; 33861885Sraf struct sigevent32 sigevk; 33870Sstevel@tonic-gate #else 33880Sstevel@tonic-gate aiocb_t *cbp, **ucbp; 33891885Sraf struct sigevent sigevk; 33900Sstevel@tonic-gate #endif 33910Sstevel@tonic-gate sigqueue_t *sqp; 33920Sstevel@tonic-gate int (*aio_func)(); 33930Sstevel@tonic-gate int mode; 33941885Sraf int error = 0; 33951885Sraf int aio_errors = 0; 33960Sstevel@tonic-gate int i; 33970Sstevel@tonic-gate size_t ssize; 33980Sstevel@tonic-gate int deadhead = 0; 33990Sstevel@tonic-gate int aio_notsupported = 0; 34001885Sraf int lio_head_port; 34011885Sraf int aio_port; 34021885Sraf int aio_thread; 34030Sstevel@tonic-gate port_kevent_t *pkevtp = NULL; 34040Sstevel@tonic-gate #ifdef _LP64 34050Sstevel@tonic-gate port_notify32_t pnotify; 34060Sstevel@tonic-gate #else 34070Sstevel@tonic-gate port_notify_t pnotify; 34080Sstevel@tonic-gate #endif 34091885Sraf int event; 34101885Sraf 34110Sstevel@tonic-gate aiop = curproc->p_aio; 34120Sstevel@tonic-gate if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX) 34130Sstevel@tonic-gate return (EINVAL); 34140Sstevel@tonic-gate 34150Sstevel@tonic-gate #ifdef _LP64 34160Sstevel@tonic-gate ssize = (sizeof (caddr32_t) * nent); 34170Sstevel@tonic-gate #else 34180Sstevel@tonic-gate ssize = (sizeof (aiocb_t *) * nent); 34190Sstevel@tonic-gate #endif 34200Sstevel@tonic-gate cbplist = kmem_alloc(ssize, KM_SLEEP); 34210Sstevel@tonic-gate ucbp = (void *)cbplist; 34220Sstevel@tonic-gate 34231885Sraf if (copyin(aiocb_arg, cbplist, ssize) || 34241885Sraf (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) { 34250Sstevel@tonic-gate kmem_free(cbplist, ssize); 34260Sstevel@tonic-gate return (EFAULT); 34270Sstevel@tonic-gate } 34280Sstevel@tonic-gate 34291885Sraf /* Event Ports */ 34301885Sraf if (sigev && 34311885Sraf (sigevk.sigev_notify == SIGEV_THREAD || 34321885Sraf sigevk.sigev_notify == SIGEV_PORT)) { 34331885Sraf if (sigevk.sigev_notify == SIGEV_THREAD) { 34341885Sraf pnotify.portnfy_port = sigevk.sigev_signo; 34351885Sraf pnotify.portnfy_user = sigevk.sigev_value.sival_ptr; 34361885Sraf } else if (copyin( 34371885Sraf (void *)(uintptr_t)sigevk.sigev_value.sival_ptr, 34381885Sraf &pnotify, sizeof (pnotify))) { 34390Sstevel@tonic-gate kmem_free(cbplist, ssize); 34400Sstevel@tonic-gate return (EFAULT); 34410Sstevel@tonic-gate } 34421885Sraf error = port_alloc_event(pnotify.portnfy_port, 34431885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp); 34441885Sraf if (error) { 34451885Sraf if (error == ENOMEM || error == EAGAIN) 34461885Sraf error = EAGAIN; 34471885Sraf else 34481885Sraf error = EINVAL; 34491885Sraf kmem_free(cbplist, ssize); 34501885Sraf return (error); 34511885Sraf } 34521885Sraf lio_head_port = pnotify.portnfy_port; 34530Sstevel@tonic-gate } 34540Sstevel@tonic-gate 34550Sstevel@tonic-gate /* 34560Sstevel@tonic-gate * a list head should be allocated if notification is 34570Sstevel@tonic-gate * enabled for this list. 34580Sstevel@tonic-gate */ 34590Sstevel@tonic-gate head = NULL; 34600Sstevel@tonic-gate 34611885Sraf if (mode_arg == LIO_WAIT || sigev) { 34620Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 34630Sstevel@tonic-gate error = aio_lio_alloc(&head); 34640Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 34650Sstevel@tonic-gate if (error) 34660Sstevel@tonic-gate goto done; 34670Sstevel@tonic-gate deadhead = 1; 34680Sstevel@tonic-gate head->lio_nent = nent; 34690Sstevel@tonic-gate head->lio_refcnt = nent; 34701885Sraf head->lio_port = -1; 34711885Sraf head->lio_portkev = NULL; 34721885Sraf if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL && 34731885Sraf sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) { 34740Sstevel@tonic-gate sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP); 34750Sstevel@tonic-gate if (sqp == NULL) { 34760Sstevel@tonic-gate error = EAGAIN; 34770Sstevel@tonic-gate goto done; 34780Sstevel@tonic-gate } 34790Sstevel@tonic-gate sqp->sq_func = NULL; 34800Sstevel@tonic-gate sqp->sq_next = NULL; 34810Sstevel@tonic-gate sqp->sq_info.si_code = SI_ASYNCIO; 34820Sstevel@tonic-gate sqp->sq_info.si_pid = curproc->p_pid; 34830Sstevel@tonic-gate sqp->sq_info.si_ctid = PRCTID(curproc); 34840Sstevel@tonic-gate sqp->sq_info.si_zoneid = getzoneid(); 34850Sstevel@tonic-gate sqp->sq_info.si_uid = crgetuid(curproc->p_cred); 34861885Sraf sqp->sq_info.si_signo = sigevk.sigev_signo; 34870Sstevel@tonic-gate sqp->sq_info.si_value.sival_int = 34881885Sraf sigevk.sigev_value.sival_int; 34890Sstevel@tonic-gate head->lio_sigqp = sqp; 34900Sstevel@tonic-gate } else { 34910Sstevel@tonic-gate head->lio_sigqp = NULL; 34920Sstevel@tonic-gate } 34931885Sraf if (pkevtp) { 34941885Sraf /* 34951885Sraf * Prepare data to send when list of aiocb's has 34961885Sraf * completed. 34971885Sraf */ 34981885Sraf port_init_event(pkevtp, (uintptr_t)sigev, 34991885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 35001885Sraf NULL, head); 35011885Sraf pkevtp->portkev_events = AIOLIO; 35021885Sraf head->lio_portkev = pkevtp; 35031885Sraf head->lio_port = pnotify.portnfy_port; 35041885Sraf } 35050Sstevel@tonic-gate } 35060Sstevel@tonic-gate 35070Sstevel@tonic-gate for (i = 0; i < nent; i++, ucbp++) { 35080Sstevel@tonic-gate 35090Sstevel@tonic-gate /* skip entry if it can't be copied. */ 35100Sstevel@tonic-gate #ifdef _LP64 35110Sstevel@tonic-gate cbp = (aiocb32_t *)(uintptr_t)*ucbp; 35121885Sraf if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32))) 35130Sstevel@tonic-gate #else 35140Sstevel@tonic-gate cbp = (aiocb_t *)*ucbp; 35151885Sraf if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) 35160Sstevel@tonic-gate #endif 35171885Sraf { 35180Sstevel@tonic-gate if (head) { 35190Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35200Sstevel@tonic-gate head->lio_nent--; 35210Sstevel@tonic-gate head->lio_refcnt--; 35220Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35230Sstevel@tonic-gate } 35240Sstevel@tonic-gate continue; 35250Sstevel@tonic-gate } 35260Sstevel@tonic-gate #ifdef _LP64 35270Sstevel@tonic-gate /* 35280Sstevel@tonic-gate * copy 32 bit structure into 64 bit structure 35290Sstevel@tonic-gate */ 35300Sstevel@tonic-gate aiocb_32ton(aiocb32, aiocb); 35310Sstevel@tonic-gate #endif /* _LP64 */ 35320Sstevel@tonic-gate 35330Sstevel@tonic-gate /* skip if opcode for aiocb is LIO_NOP */ 35340Sstevel@tonic-gate mode = aiocb->aio_lio_opcode; 35350Sstevel@tonic-gate if (mode == LIO_NOP) { 35360Sstevel@tonic-gate cbp = NULL; 35370Sstevel@tonic-gate if (head) { 35380Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35390Sstevel@tonic-gate head->lio_nent--; 35400Sstevel@tonic-gate head->lio_refcnt--; 35410Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35420Sstevel@tonic-gate } 35430Sstevel@tonic-gate continue; 35440Sstevel@tonic-gate } 35450Sstevel@tonic-gate 35460Sstevel@tonic-gate /* increment file descriptor's ref count. */ 35470Sstevel@tonic-gate if ((fp = getf(aiocb->aio_fildes)) == NULL) { 35480Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 35490Sstevel@tonic-gate if (head) { 35500Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35510Sstevel@tonic-gate head->lio_nent--; 35520Sstevel@tonic-gate head->lio_refcnt--; 35530Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35540Sstevel@tonic-gate } 35550Sstevel@tonic-gate aio_errors++; 35560Sstevel@tonic-gate continue; 35570Sstevel@tonic-gate } 35580Sstevel@tonic-gate 35590Sstevel@tonic-gate /* 35600Sstevel@tonic-gate * check the permission of the partition 35610Sstevel@tonic-gate */ 35620Sstevel@tonic-gate if ((fp->f_flag & mode) == 0) { 35630Sstevel@tonic-gate releasef(aiocb->aio_fildes); 35640Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, EBADF); 35650Sstevel@tonic-gate if (head) { 35660Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35670Sstevel@tonic-gate head->lio_nent--; 35680Sstevel@tonic-gate head->lio_refcnt--; 35690Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35700Sstevel@tonic-gate } 35710Sstevel@tonic-gate aio_errors++; 35720Sstevel@tonic-gate continue; 35730Sstevel@tonic-gate } 35740Sstevel@tonic-gate 35750Sstevel@tonic-gate /* 35760Sstevel@tonic-gate * common case where requests are to the same fd 35770Sstevel@tonic-gate * for the same r/w operation 35780Sstevel@tonic-gate * for UFS, need to set EBADFD 35790Sstevel@tonic-gate */ 35801885Sraf vp = fp->f_vnode; 35811885Sraf if (fp != prev_fp || mode != prev_mode) { 35820Sstevel@tonic-gate aio_func = check_vp(vp, mode); 35830Sstevel@tonic-gate if (aio_func == NULL) { 35840Sstevel@tonic-gate prev_fp = NULL; 35850Sstevel@tonic-gate releasef(aiocb->aio_fildes); 35861885Sraf lio_set_uerror(&cbp->aio_resultp, EBADFD); 35870Sstevel@tonic-gate aio_notsupported++; 35880Sstevel@tonic-gate if (head) { 35890Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 35900Sstevel@tonic-gate head->lio_nent--; 35910Sstevel@tonic-gate head->lio_refcnt--; 35920Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 35930Sstevel@tonic-gate } 35940Sstevel@tonic-gate continue; 35950Sstevel@tonic-gate } else { 35960Sstevel@tonic-gate prev_fp = fp; 35970Sstevel@tonic-gate prev_mode = mode; 35980Sstevel@tonic-gate } 35990Sstevel@tonic-gate } 36001885Sraf 36011885Sraf error = aio_req_setup(&reqp, aiop, aiocb, 36021885Sraf (aio_result_t *)&cbp->aio_resultp, vp); 36031885Sraf if (error) { 36040Sstevel@tonic-gate releasef(aiocb->aio_fildes); 36050Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 36060Sstevel@tonic-gate if (head) { 36070Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 36080Sstevel@tonic-gate head->lio_nent--; 36090Sstevel@tonic-gate head->lio_refcnt--; 36100Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 36110Sstevel@tonic-gate } 36120Sstevel@tonic-gate aio_errors++; 36130Sstevel@tonic-gate continue; 36140Sstevel@tonic-gate } 36150Sstevel@tonic-gate 36160Sstevel@tonic-gate reqp->aio_req_lio = head; 36170Sstevel@tonic-gate deadhead = 0; 36180Sstevel@tonic-gate 36190Sstevel@tonic-gate /* 36200Sstevel@tonic-gate * Set the errno field now before sending the request to 36210Sstevel@tonic-gate * the driver to avoid a race condition 36220Sstevel@tonic-gate */ 36230Sstevel@tonic-gate (void) suword32(&cbp->aio_resultp.aio_errno, 36240Sstevel@tonic-gate EINPROGRESS); 36250Sstevel@tonic-gate 36261885Sraf reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp; 36271885Sraf 36281885Sraf event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE; 36291885Sraf aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT); 36301885Sraf aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD); 36311885Sraf if (aio_port | aio_thread) { 36321885Sraf port_kevent_t *lpkevp; 36331885Sraf /* 36341885Sraf * Prepare data to send with each aiocb completed. 36351885Sraf */ 36360Sstevel@tonic-gate #ifdef _LP64 36371885Sraf if (aio_port) { 36381885Sraf void *paddr = (void *)(uintptr_t) 36391885Sraf aiocb32->aio_sigevent.sigev_value.sival_ptr; 36401885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify))) 36411885Sraf error = EFAULT; 36421885Sraf } else { /* aio_thread */ 36431885Sraf pnotify.portnfy_port = 36441885Sraf aiocb32->aio_sigevent.sigev_signo; 36451885Sraf pnotify.portnfy_user = 36461885Sraf aiocb32->aio_sigevent.sigev_value.sival_ptr; 36471885Sraf } 36480Sstevel@tonic-gate #else 36491885Sraf if (aio_port) { 36501885Sraf void *paddr = 36511885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 36521885Sraf if (copyin(paddr, &pnotify, sizeof (pnotify))) 36531885Sraf error = EFAULT; 36541885Sraf } else { /* aio_thread */ 36551885Sraf pnotify.portnfy_port = 36561885Sraf aiocb->aio_sigevent.sigev_signo; 36571885Sraf pnotify.portnfy_user = 36581885Sraf aiocb->aio_sigevent.sigev_value.sival_ptr; 36591885Sraf } 36600Sstevel@tonic-gate #endif 36611885Sraf if (error) 36621885Sraf /* EMPTY */; 36631885Sraf else if (pkevtp != NULL && 36641885Sraf pnotify.portnfy_port == lio_head_port) 36651885Sraf error = port_dup_event(pkevtp, &lpkevp, 36661885Sraf PORT_ALLOC_DEFAULT); 36671885Sraf else 36681885Sraf error = port_alloc_event(pnotify.portnfy_port, 36691885Sraf PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, 36701885Sraf &lpkevp); 36711885Sraf if (error == 0) { 36721885Sraf port_init_event(lpkevp, (uintptr_t)cbp, 36731885Sraf (void *)(uintptr_t)pnotify.portnfy_user, 36741885Sraf aio_port_callback, reqp); 36751885Sraf lpkevp->portkev_events = event; 36761885Sraf reqp->aio_req_portkev = lpkevp; 36771885Sraf reqp->aio_req_port = pnotify.portnfy_port; 36781885Sraf } 36790Sstevel@tonic-gate } 36800Sstevel@tonic-gate 36810Sstevel@tonic-gate /* 36820Sstevel@tonic-gate * send the request to driver. 36830Sstevel@tonic-gate */ 36840Sstevel@tonic-gate if (error == 0) { 36850Sstevel@tonic-gate if (aiocb->aio_nbytes == 0) { 36860Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 36870Sstevel@tonic-gate aio_zerolen(reqp); 36880Sstevel@tonic-gate continue; 36890Sstevel@tonic-gate } 36900Sstevel@tonic-gate error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, 36910Sstevel@tonic-gate CRED()); 36920Sstevel@tonic-gate } 36930Sstevel@tonic-gate 36940Sstevel@tonic-gate /* 36950Sstevel@tonic-gate * the fd's ref count is not decremented until the IO has 36960Sstevel@tonic-gate * completed unless there was an error. 36970Sstevel@tonic-gate */ 36980Sstevel@tonic-gate if (error) { 36990Sstevel@tonic-gate releasef(aiocb->aio_fildes); 37000Sstevel@tonic-gate lio_set_uerror(&cbp->aio_resultp, error); 37010Sstevel@tonic-gate if (head) { 37020Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 37030Sstevel@tonic-gate head->lio_nent--; 37040Sstevel@tonic-gate head->lio_refcnt--; 37050Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 37060Sstevel@tonic-gate } 37070Sstevel@tonic-gate if (error == ENOTSUP) 37080Sstevel@tonic-gate aio_notsupported++; 37090Sstevel@tonic-gate else 37100Sstevel@tonic-gate aio_errors++; 37110Sstevel@tonic-gate lio_set_error(reqp); 37120Sstevel@tonic-gate } else { 37130Sstevel@tonic-gate clear_active_fd(aiocb->aio_fildes); 37140Sstevel@tonic-gate } 37150Sstevel@tonic-gate } 37160Sstevel@tonic-gate 37170Sstevel@tonic-gate if (aio_notsupported) { 37180Sstevel@tonic-gate error = ENOTSUP; 37190Sstevel@tonic-gate } else if (aio_errors) { 37200Sstevel@tonic-gate /* 37210Sstevel@tonic-gate * return EIO if any request failed 37220Sstevel@tonic-gate */ 37230Sstevel@tonic-gate error = EIO; 37240Sstevel@tonic-gate } 37250Sstevel@tonic-gate 37260Sstevel@tonic-gate if (mode_arg == LIO_WAIT) { 37270Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 37280Sstevel@tonic-gate while (head->lio_refcnt > 0) { 37290Sstevel@tonic-gate if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) { 37300Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 37310Sstevel@tonic-gate error = EINTR; 37320Sstevel@tonic-gate goto done; 37330Sstevel@tonic-gate } 37340Sstevel@tonic-gate } 37350Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 37360Sstevel@tonic-gate alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32); 37370Sstevel@tonic-gate } 37380Sstevel@tonic-gate 37390Sstevel@tonic-gate done: 37400Sstevel@tonic-gate kmem_free(cbplist, ssize); 37410Sstevel@tonic-gate if (deadhead) { 37420Sstevel@tonic-gate if (head->lio_sigqp) 37430Sstevel@tonic-gate kmem_free(head->lio_sigqp, sizeof (sigqueue_t)); 37441885Sraf if (head->lio_portkev) 37451885Sraf port_free_event(head->lio_portkev); 37460Sstevel@tonic-gate kmem_free(head, sizeof (aio_lio_t)); 37470Sstevel@tonic-gate } 37480Sstevel@tonic-gate return (error); 37490Sstevel@tonic-gate } 37500Sstevel@tonic-gate 37510Sstevel@tonic-gate 37520Sstevel@tonic-gate #ifdef _SYSCALL32_IMPL 37530Sstevel@tonic-gate void 37540Sstevel@tonic-gate aiocb_32ton(aiocb32_t *src, aiocb_t *dest) 37550Sstevel@tonic-gate { 37560Sstevel@tonic-gate dest->aio_fildes = src->aio_fildes; 37570Sstevel@tonic-gate dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf; 37580Sstevel@tonic-gate dest->aio_nbytes = (size_t)src->aio_nbytes; 37590Sstevel@tonic-gate dest->aio_offset = (off_t)src->aio_offset; 37600Sstevel@tonic-gate dest->aio_reqprio = src->aio_reqprio; 37610Sstevel@tonic-gate dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify; 37620Sstevel@tonic-gate dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo; 37630Sstevel@tonic-gate 37640Sstevel@tonic-gate /* 37650Sstevel@tonic-gate * See comment in sigqueue32() on handling of 32-bit 37660Sstevel@tonic-gate * sigvals in a 64-bit kernel. 37670Sstevel@tonic-gate */ 37680Sstevel@tonic-gate dest->aio_sigevent.sigev_value.sival_int = 37690Sstevel@tonic-gate (int)src->aio_sigevent.sigev_value.sival_int; 37700Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval)) 37710Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_function; 37720Sstevel@tonic-gate dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *) 37730Sstevel@tonic-gate (uintptr_t)src->aio_sigevent.sigev_notify_attributes; 37740Sstevel@tonic-gate dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2; 37750Sstevel@tonic-gate dest->aio_lio_opcode = src->aio_lio_opcode; 37760Sstevel@tonic-gate dest->aio_state = src->aio_state; 37770Sstevel@tonic-gate dest->aio__pad[0] = src->aio__pad[0]; 37780Sstevel@tonic-gate } 37790Sstevel@tonic-gate #endif /* _SYSCALL32_IMPL */ 37800Sstevel@tonic-gate 37810Sstevel@tonic-gate /* 37820Sstevel@tonic-gate * aio_port_callback() is called just before the event is retrieved from the 37830Sstevel@tonic-gate * port. The task of this callback function is to finish the work of the 37840Sstevel@tonic-gate * transaction for the application, it means : 37850Sstevel@tonic-gate * - copyout transaction data to the application 37860Sstevel@tonic-gate * (this thread is running in the right process context) 37870Sstevel@tonic-gate * - keep trace of the transaction (update of counters). 37880Sstevel@tonic-gate * - free allocated buffers 37890Sstevel@tonic-gate * The aiocb pointer is the object element of the port_kevent_t structure. 37900Sstevel@tonic-gate * 37910Sstevel@tonic-gate * flag : 37920Sstevel@tonic-gate * PORT_CALLBACK_DEFAULT : do copyout and free resources 37930Sstevel@tonic-gate * PORT_CALLBACK_CLOSE : don't do copyout, free resources 37940Sstevel@tonic-gate */ 37950Sstevel@tonic-gate 37960Sstevel@tonic-gate /*ARGSUSED*/ 37970Sstevel@tonic-gate int 37980Sstevel@tonic-gate aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp) 37990Sstevel@tonic-gate { 38000Sstevel@tonic-gate aio_t *aiop = curproc->p_aio; 38010Sstevel@tonic-gate aio_req_t *reqp = arg; 38020Sstevel@tonic-gate struct iovec *iov; 38030Sstevel@tonic-gate struct buf *bp; 38040Sstevel@tonic-gate void *resultp; 38050Sstevel@tonic-gate 38060Sstevel@tonic-gate if (pid != curproc->p_pid) { 38070Sstevel@tonic-gate /* wrong proc !!, can not deliver data here ... */ 38080Sstevel@tonic-gate return (EACCES); 38090Sstevel@tonic-gate } 38100Sstevel@tonic-gate 38110Sstevel@tonic-gate mutex_enter(&aiop->aio_portq_mutex); 38120Sstevel@tonic-gate reqp->aio_req_portkev = NULL; 38130Sstevel@tonic-gate aio_req_remove_portq(aiop, reqp); /* remove request from portq */ 38140Sstevel@tonic-gate mutex_exit(&aiop->aio_portq_mutex); 38150Sstevel@tonic-gate aphysio_unlock(reqp); /* unlock used pages */ 38160Sstevel@tonic-gate mutex_enter(&aiop->aio_mutex); 38170Sstevel@tonic-gate if (reqp->aio_req_flags & AIO_COPYOUTDONE) { 38180Sstevel@tonic-gate aio_req_free_port(aiop, reqp); /* back to free list */ 38190Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 38200Sstevel@tonic-gate return (0); 38210Sstevel@tonic-gate } 38220Sstevel@tonic-gate 38230Sstevel@tonic-gate iov = reqp->aio_req_uio.uio_iov; 38240Sstevel@tonic-gate bp = &reqp->aio_req_buf; 38250Sstevel@tonic-gate resultp = (void *)reqp->aio_req_resultp; 38260Sstevel@tonic-gate aio_req_free_port(aiop, reqp); /* request struct back to free list */ 38270Sstevel@tonic-gate mutex_exit(&aiop->aio_mutex); 38280Sstevel@tonic-gate if (flag == PORT_CALLBACK_DEFAULT) 38290Sstevel@tonic-gate aio_copyout_result_port(iov, bp, resultp); 38300Sstevel@tonic-gate return (0); 38310Sstevel@tonic-gate } 3832