xref: /onnv-gate/usr/src/uts/common/os/aio_subr.c (revision 9973:7911cfe2424f)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51885Sraf  * Common Development and Distribution License (the "License").
61885Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
211885Sraf 
220Sstevel@tonic-gate /*
23*9973SSurya.Prakki@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <sys/types.h>
280Sstevel@tonic-gate #include <sys/proc.h>
290Sstevel@tonic-gate #include <sys/file.h>
300Sstevel@tonic-gate #include <sys/errno.h>
310Sstevel@tonic-gate #include <sys/param.h>
320Sstevel@tonic-gate #include <sys/sysmacros.h>
330Sstevel@tonic-gate #include <sys/cmn_err.h>
340Sstevel@tonic-gate #include <sys/systm.h>
350Sstevel@tonic-gate #include <vm/as.h>
360Sstevel@tonic-gate #include <vm/page.h>
370Sstevel@tonic-gate #include <sys/uio.h>
380Sstevel@tonic-gate #include <sys/kmem.h>
390Sstevel@tonic-gate #include <sys/debug.h>
400Sstevel@tonic-gate #include <sys/aio_impl.h>
410Sstevel@tonic-gate #include <sys/epm.h>
420Sstevel@tonic-gate #include <sys/fs/snode.h>
430Sstevel@tonic-gate #include <sys/siginfo.h>
440Sstevel@tonic-gate #include <sys/cpuvar.h>
450Sstevel@tonic-gate #include <sys/tnf_probe.h>
460Sstevel@tonic-gate #include <sys/conf.h>
470Sstevel@tonic-gate #include <sys/sdt.h>
480Sstevel@tonic-gate 
490Sstevel@tonic-gate int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *);
500Sstevel@tonic-gate void aio_done(struct buf *);
510Sstevel@tonic-gate void aphysio_unlock(aio_req_t *);
520Sstevel@tonic-gate void aio_cleanup(int);
530Sstevel@tonic-gate void aio_cleanup_exit(void);
540Sstevel@tonic-gate 
550Sstevel@tonic-gate /*
560Sstevel@tonic-gate  * private functions
570Sstevel@tonic-gate  */
580Sstevel@tonic-gate static void aio_sigev_send(proc_t *, sigqueue_t *);
590Sstevel@tonic-gate static void aio_hash_delete(aio_t *, aio_req_t *);
600Sstevel@tonic-gate static void aio_lio_free(aio_t *, aio_lio_t *);
61*9973SSurya.Prakki@Sun.COM static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int);
620Sstevel@tonic-gate static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int);
630Sstevel@tonic-gate static void aio_cleanup_pollq(aio_t *, aio_req_t *, int);
640Sstevel@tonic-gate static void aio_cleanup_portq(aio_t *, aio_req_t *, int);
650Sstevel@tonic-gate 
660Sstevel@tonic-gate /*
670Sstevel@tonic-gate  * async version of physio() that doesn't wait synchronously
680Sstevel@tonic-gate  * for the driver's strategy routine to complete.
690Sstevel@tonic-gate  */
700Sstevel@tonic-gate 
710Sstevel@tonic-gate int
720Sstevel@tonic-gate aphysio(
730Sstevel@tonic-gate 	int (*strategy)(struct buf *),
740Sstevel@tonic-gate 	int (*cancel)(struct buf *),
750Sstevel@tonic-gate 	dev_t dev,
760Sstevel@tonic-gate 	int rw,
770Sstevel@tonic-gate 	void (*mincnt)(struct buf *),
780Sstevel@tonic-gate 	struct aio_req *aio)
790Sstevel@tonic-gate {
800Sstevel@tonic-gate 	struct uio *uio = aio->aio_uio;
810Sstevel@tonic-gate 	aio_req_t *reqp = (aio_req_t *)aio->aio_private;
820Sstevel@tonic-gate 	struct buf *bp = &reqp->aio_req_buf;
830Sstevel@tonic-gate 	struct iovec *iov;
840Sstevel@tonic-gate 	struct as *as;
850Sstevel@tonic-gate 	char *a;
860Sstevel@tonic-gate 	int	error;
870Sstevel@tonic-gate 	size_t	c;
880Sstevel@tonic-gate 	struct page **pplist;
890Sstevel@tonic-gate 	struct dev_ops *ops = devopsp[getmajor(dev)];
900Sstevel@tonic-gate 
910Sstevel@tonic-gate 	if (uio->uio_loffset < 0)
920Sstevel@tonic-gate 		return (EINVAL);
930Sstevel@tonic-gate #ifdef	_ILP32
940Sstevel@tonic-gate 	/*
950Sstevel@tonic-gate 	 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
960Sstevel@tonic-gate 	 * the maximum size that can be supported by the IO subsystem.
970Sstevel@tonic-gate 	 * XXX this code assumes a D_64BIT driver.
980Sstevel@tonic-gate 	 */
990Sstevel@tonic-gate 	if (uio->uio_loffset > SPEC_MAXOFFSET_T)
1000Sstevel@tonic-gate 		return (EINVAL);
1010Sstevel@tonic-gate #endif	/* _ILP32 */
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate 	TNF_PROBE_5(aphysio_start, "kaio", /* CSTYLED */,
104*9973SSurya.Prakki@Sun.COM 	    tnf_opaque, bp, bp,
105*9973SSurya.Prakki@Sun.COM 	    tnf_device, device, dev,
106*9973SSurya.Prakki@Sun.COM 	    tnf_offset, blkno, btodt(uio->uio_loffset),
107*9973SSurya.Prakki@Sun.COM 	    tnf_size, size, uio->uio_iov->iov_len,
108*9973SSurya.Prakki@Sun.COM 	    tnf_bioflags, rw, rw);
1090Sstevel@tonic-gate 
1100Sstevel@tonic-gate 	if (rw == B_READ) {
1110Sstevel@tonic-gate 		CPU_STATS_ADD_K(sys, phread, 1);
1120Sstevel@tonic-gate 	} else {
1130Sstevel@tonic-gate 		CPU_STATS_ADD_K(sys, phwrite, 1);
1140Sstevel@tonic-gate 	}
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate 	iov = uio->uio_iov;
1170Sstevel@tonic-gate 	sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
1180Sstevel@tonic-gate 	sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate 	bp->b_error = 0;
1210Sstevel@tonic-gate 	bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw;
1220Sstevel@tonic-gate 	bp->b_edev = dev;
1230Sstevel@tonic-gate 	bp->b_dev = cmpdev(dev);
1240Sstevel@tonic-gate 	bp->b_lblkno = btodt(uio->uio_loffset);
1250Sstevel@tonic-gate 	bp->b_offset = uio->uio_loffset;
1260Sstevel@tonic-gate 	(void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
1270Sstevel@tonic-gate 	    (void *)bp->b_edev, (void **)&bp->b_dip);
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate 	/*
1300Sstevel@tonic-gate 	 * Clustering: Clustering can set the b_iodone, b_forw and
1310Sstevel@tonic-gate 	 * b_proc fields to cluster-specifc values.
1320Sstevel@tonic-gate 	 */
1330Sstevel@tonic-gate 	if (bp->b_iodone == NULL) {
1340Sstevel@tonic-gate 		bp->b_iodone = (int (*)()) aio_done;
1350Sstevel@tonic-gate 		/* b_forw points at an aio_req_t structure */
1360Sstevel@tonic-gate 		bp->b_forw = (struct buf *)reqp;
1370Sstevel@tonic-gate 		bp->b_proc = curproc;
1380Sstevel@tonic-gate 	}
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate 	a = bp->b_un.b_addr = iov->iov_base;
1410Sstevel@tonic-gate 	c = bp->b_bcount = iov->iov_len;
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate 	(*mincnt)(bp);
1440Sstevel@tonic-gate 	if (bp->b_bcount != iov->iov_len)
1450Sstevel@tonic-gate 		return (ENOTSUP);
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate 	as = bp->b_proc->p_as;
1480Sstevel@tonic-gate 
1490Sstevel@tonic-gate 	error = as_pagelock(as, &pplist, a,
1500Sstevel@tonic-gate 	    c, rw == B_READ? S_WRITE : S_READ);
1510Sstevel@tonic-gate 	if (error != 0) {
1520Sstevel@tonic-gate 		bp->b_flags |= B_ERROR;
1530Sstevel@tonic-gate 		bp->b_error = error;
1540Sstevel@tonic-gate 		bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
1550Sstevel@tonic-gate 		return (error);
1560Sstevel@tonic-gate 	}
1570Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PAGELOCKDONE;
1580Sstevel@tonic-gate 	bp->b_shadow = pplist;
1590Sstevel@tonic-gate 	if (pplist != NULL) {
1600Sstevel@tonic-gate 		bp->b_flags |= B_SHADOW;
1610Sstevel@tonic-gate 	}
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate 	if (cancel != anocancel)
1640Sstevel@tonic-gate 		cmn_err(CE_PANIC,
1650Sstevel@tonic-gate 		    "aphysio: cancellation not supported, use anocancel");
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate 	reqp->aio_req_cancel = cancel;
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate 	DTRACE_IO1(start, struct buf *, bp);
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate 	return ((*strategy)(bp));
1720Sstevel@tonic-gate }
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate /*ARGSUSED*/
1750Sstevel@tonic-gate int
1760Sstevel@tonic-gate anocancel(struct buf *bp)
1770Sstevel@tonic-gate {
1780Sstevel@tonic-gate 	return (ENXIO);
1790Sstevel@tonic-gate }
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate /*
1820Sstevel@tonic-gate  * Called from biodone().
1830Sstevel@tonic-gate  * Notify process that a pending AIO has finished.
1840Sstevel@tonic-gate  */
1850Sstevel@tonic-gate 
1860Sstevel@tonic-gate /*
1870Sstevel@tonic-gate  * Clustering: This function is made non-static as it is used
1880Sstevel@tonic-gate  * by clustering s/w as contract private interface.
1890Sstevel@tonic-gate  */
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate void
1920Sstevel@tonic-gate aio_done(struct buf *bp)
1930Sstevel@tonic-gate {
1940Sstevel@tonic-gate 	proc_t *p;
1950Sstevel@tonic-gate 	struct as *as;
1960Sstevel@tonic-gate 	aio_req_t *reqp;
1971885Sraf 	aio_lio_t *head = NULL;
1980Sstevel@tonic-gate 	aio_t *aiop;
1991885Sraf 	sigqueue_t *sigev = NULL;
2000Sstevel@tonic-gate 	sigqueue_t *lio_sigev = NULL;
2011885Sraf 	port_kevent_t *pkevp = NULL;
2021885Sraf 	port_kevent_t *lio_pkevp = NULL;
2030Sstevel@tonic-gate 	int fd;
2040Sstevel@tonic-gate 	int cleanupqflag;
2050Sstevel@tonic-gate 	int pollqflag;
2060Sstevel@tonic-gate 	int portevpend;
2070Sstevel@tonic-gate 	void (*func)();
2081885Sraf 	int use_port = 0;
2094532Ssp92102 	int reqp_flags = 0;
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate 	p = bp->b_proc;
2124532Ssp92102 	as = p->p_as;
2130Sstevel@tonic-gate 	reqp = (aio_req_t *)bp->b_forw;
2140Sstevel@tonic-gate 	fd = reqp->aio_req_fd;
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 	TNF_PROBE_5(aphysio_end, "kaio", /* CSTYLED */,
217*9973SSurya.Prakki@Sun.COM 	    tnf_opaque, bp, bp,
218*9973SSurya.Prakki@Sun.COM 	    tnf_device, device, bp->b_edev,
219*9973SSurya.Prakki@Sun.COM 	    tnf_offset, blkno, btodt(reqp->aio_req_uio.uio_loffset),
220*9973SSurya.Prakki@Sun.COM 	    tnf_size, size, reqp->aio_req_uio.uio_iov->iov_len,
221*9973SSurya.Prakki@Sun.COM 	    tnf_bioflags, rw, (bp->b_flags & (B_READ|B_WRITE)));
2220Sstevel@tonic-gate 
2230Sstevel@tonic-gate 	/*
2240Sstevel@tonic-gate 	 * mapout earlier so that more kmem is available when aio is
2250Sstevel@tonic-gate 	 * heavily used. bug #1262082
2260Sstevel@tonic-gate 	 */
2270Sstevel@tonic-gate 	if (bp->b_flags & B_REMAPPED)
2280Sstevel@tonic-gate 		bp_mapout(bp);
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate 	/* decrement fd's ref count by one, now that aio request is done. */
2310Sstevel@tonic-gate 	areleasef(fd, P_FINFO(p));
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	aiop = p->p_aio;
2340Sstevel@tonic-gate 	ASSERT(aiop != NULL);
2350Sstevel@tonic-gate 
2361885Sraf 	mutex_enter(&aiop->aio_portq_mutex);
2371885Sraf 	mutex_enter(&aiop->aio_mutex);
2381885Sraf 	ASSERT(aiop->aio_pending > 0);
2391885Sraf 	ASSERT(reqp->aio_req_flags & AIO_PENDING);
2401885Sraf 	aiop->aio_pending--;
2411885Sraf 	reqp->aio_req_flags &= ~AIO_PENDING;
2424532Ssp92102 	reqp_flags = reqp->aio_req_flags;
2431885Sraf 	if ((pkevp = reqp->aio_req_portkev) != NULL) {
2440Sstevel@tonic-gate 		/* Event port notification is desired for this transaction */
2450Sstevel@tonic-gate 		if (reqp->aio_req_flags & AIO_CLOSE_PORT) {
2460Sstevel@tonic-gate 			/*
2470Sstevel@tonic-gate 			 * The port is being closed and it is waiting for
2480Sstevel@tonic-gate 			 * pending asynchronous I/O transactions to complete.
2490Sstevel@tonic-gate 			 */
2500Sstevel@tonic-gate 			portevpend = --aiop->aio_portpendcnt;
2511885Sraf 			aio_deq(&aiop->aio_portpending, reqp);
2521885Sraf 			aio_enq(&aiop->aio_portq, reqp, 0);
2530Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
2540Sstevel@tonic-gate 			mutex_exit(&aiop->aio_portq_mutex);
2551885Sraf 			port_send_event(pkevp);
2560Sstevel@tonic-gate 			if (portevpend == 0)
2570Sstevel@tonic-gate 				cv_broadcast(&aiop->aio_portcv);
2580Sstevel@tonic-gate 			return;
2590Sstevel@tonic-gate 		}
2600Sstevel@tonic-gate 
2610Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_CLEANUP) {
2620Sstevel@tonic-gate 			/*
2630Sstevel@tonic-gate 			 * aio_cleanup_thread() is waiting for completion of
2640Sstevel@tonic-gate 			 * transactions.
2650Sstevel@tonic-gate 			 */
2660Sstevel@tonic-gate 			mutex_enter(&as->a_contents);
2671885Sraf 			aio_deq(&aiop->aio_portpending, reqp);
2681885Sraf 			aio_enq(&aiop->aio_portcleanupq, reqp, 0);
2690Sstevel@tonic-gate 			cv_signal(&aiop->aio_cleanupcv);
2700Sstevel@tonic-gate 			mutex_exit(&as->a_contents);
2710Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
2720Sstevel@tonic-gate 			mutex_exit(&aiop->aio_portq_mutex);
2730Sstevel@tonic-gate 			return;
2740Sstevel@tonic-gate 		}
2750Sstevel@tonic-gate 
2761885Sraf 		aio_deq(&aiop->aio_portpending, reqp);
2771885Sraf 		aio_enq(&aiop->aio_portq, reqp, 0);
2780Sstevel@tonic-gate 
2791885Sraf 		use_port = 1;
2801885Sraf 	} else {
2811885Sraf 		/*
2821885Sraf 		 * when the AIO_CLEANUP flag is enabled for this
2831885Sraf 		 * process, or when the AIO_POLL bit is set for
2841885Sraf 		 * this request, special handling is required.
2851885Sraf 		 * otherwise the request is put onto the doneq.
2861885Sraf 		 */
2871885Sraf 		cleanupqflag = (aiop->aio_flags & AIO_CLEANUP);
2881885Sraf 		pollqflag = (reqp->aio_req_flags & AIO_POLL);
2891885Sraf 		if (cleanupqflag | pollqflag) {
2901885Sraf 
2914532Ssp92102 			if (cleanupqflag)
2921885Sraf 				mutex_enter(&as->a_contents);
2930Sstevel@tonic-gate 
2941885Sraf 			/*
2951885Sraf 			 * requests with their AIO_POLL bit set are put
2961885Sraf 			 * on the pollq, requests with sigevent structures
2971885Sraf 			 * or with listio heads are put on the notifyq, and
2981885Sraf 			 * the remaining requests don't require any special
2991885Sraf 			 * cleanup handling, so they're put onto the default
3001885Sraf 			 * cleanupq.
3011885Sraf 			 */
3021885Sraf 			if (pollqflag)
3031885Sraf 				aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ);
3041885Sraf 			else if (reqp->aio_req_sigqp || reqp->aio_req_lio)
3051885Sraf 				aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ);
3061885Sraf 			else
3071885Sraf 				aio_enq(&aiop->aio_cleanupq, reqp,
3081885Sraf 				    AIO_CLEANUPQ);
3090Sstevel@tonic-gate 
3101885Sraf 			if (cleanupqflag) {
3111885Sraf 				cv_signal(&aiop->aio_cleanupcv);
3121885Sraf 				mutex_exit(&as->a_contents);
3131885Sraf 				mutex_exit(&aiop->aio_mutex);
3141885Sraf 				mutex_exit(&aiop->aio_portq_mutex);
3151885Sraf 			} else {
3161885Sraf 				ASSERT(pollqflag);
3171885Sraf 				/* block aio_cleanup_exit until we're done */
3181885Sraf 				aiop->aio_flags |= AIO_DONE_ACTIVE;
3191885Sraf 				mutex_exit(&aiop->aio_mutex);
3201885Sraf 				mutex_exit(&aiop->aio_portq_mutex);
3211885Sraf 				/*
3221885Sraf 				 * let the cleanup processing happen from an AST
3231885Sraf 				 * set an AST on all threads in this process
3241885Sraf 				 */
3251885Sraf 				mutex_enter(&p->p_lock);
3261885Sraf 				set_proc_ast(p);
3271885Sraf 				mutex_exit(&p->p_lock);
3281885Sraf 				mutex_enter(&aiop->aio_mutex);
3291885Sraf 				/* wakeup anybody waiting in aiowait() */
3301885Sraf 				cv_broadcast(&aiop->aio_waitcv);
3311885Sraf 
3321885Sraf 				/* wakeup aio_cleanup_exit if needed */
3331885Sraf 				if (aiop->aio_flags & AIO_CLEANUP)
3341885Sraf 					cv_signal(&aiop->aio_cleanupcv);
3351885Sraf 				aiop->aio_flags &= ~AIO_DONE_ACTIVE;
3361885Sraf 				mutex_exit(&aiop->aio_mutex);
3371885Sraf 			}
3381885Sraf 			return;
3390Sstevel@tonic-gate 		}
3400Sstevel@tonic-gate 
3410Sstevel@tonic-gate 		/*
3421885Sraf 		 * save req's sigevent pointer, and check its
3431885Sraf 		 * value after releasing aio_mutex lock.
3440Sstevel@tonic-gate 		 */
3451885Sraf 		sigev = reqp->aio_req_sigqp;
3461885Sraf 		reqp->aio_req_sigqp = NULL;
3470Sstevel@tonic-gate 
3481885Sraf 		/* put request on done queue. */
3491885Sraf 		aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
3501885Sraf 	} /* portkevent */
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 	/*
3531885Sraf 	 * when list IO notification is enabled, a notification or
3541885Sraf 	 * signal is sent only when all entries in the list are done.
3550Sstevel@tonic-gate 	 */
3560Sstevel@tonic-gate 	if ((head = reqp->aio_req_lio) != NULL) {
3570Sstevel@tonic-gate 		ASSERT(head->lio_refcnt > 0);
3580Sstevel@tonic-gate 		if (--head->lio_refcnt == 0) {
3590Sstevel@tonic-gate 			/*
3600Sstevel@tonic-gate 			 * save lio's sigevent pointer, and check
3611885Sraf 			 * its value after releasing aio_mutex lock.
3620Sstevel@tonic-gate 			 */
3630Sstevel@tonic-gate 			lio_sigev = head->lio_sigqp;
3640Sstevel@tonic-gate 			head->lio_sigqp = NULL;
3651885Sraf 			cv_signal(&head->lio_notify);
3661885Sraf 			if (head->lio_port >= 0 &&
3671885Sraf 			    (lio_pkevp = head->lio_portkev) != NULL)
3681885Sraf 				head->lio_port = -1;
3690Sstevel@tonic-gate 		}
3700Sstevel@tonic-gate 	}
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 	/*
3730Sstevel@tonic-gate 	 * if AIO_WAITN set then
3740Sstevel@tonic-gate 	 * send signal only when we reached the
3750Sstevel@tonic-gate 	 * required amount of IO's finished
3760Sstevel@tonic-gate 	 * or when all IO's are done
3770Sstevel@tonic-gate 	 */
3780Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_WAITN) {
3790Sstevel@tonic-gate 		if (aiop->aio_waitncnt > 0)
3800Sstevel@tonic-gate 			aiop->aio_waitncnt--;
3810Sstevel@tonic-gate 		if (aiop->aio_pending == 0 ||
3820Sstevel@tonic-gate 		    aiop->aio_waitncnt == 0)
3830Sstevel@tonic-gate 			cv_broadcast(&aiop->aio_waitcv);
3840Sstevel@tonic-gate 	} else {
3850Sstevel@tonic-gate 		cv_broadcast(&aiop->aio_waitcv);
3860Sstevel@tonic-gate 	}
3870Sstevel@tonic-gate 
3880Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
3891885Sraf 	mutex_exit(&aiop->aio_portq_mutex);
3901885Sraf 
3914532Ssp92102 	/*
3924532Ssp92102 	 * Could the cleanup thread be waiting for AIO with locked
3934532Ssp92102 	 * resources to finish?
3944532Ssp92102 	 * Ideally in that case cleanup thread should block on cleanupcv,
3954532Ssp92102 	 * but there is a window, where it could miss to see a new aio
3964532Ssp92102 	 * request that sneaked in.
3974532Ssp92102 	 */
3984532Ssp92102 	mutex_enter(&as->a_contents);
3994532Ssp92102 	if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as))
4004532Ssp92102 		cv_broadcast(&as->a_cv);
4014532Ssp92102 	mutex_exit(&as->a_contents);
4024532Ssp92102 
4030Sstevel@tonic-gate 	if (sigev)
4040Sstevel@tonic-gate 		aio_sigev_send(p, sigev);
4051885Sraf 	else if (!use_port && head == NULL) {
4060Sstevel@tonic-gate 		/*
4071885Sraf 		 * Send a SIGIO signal when the process has a handler enabled.
4080Sstevel@tonic-gate 		 */
409*9973SSurya.Prakki@Sun.COM 		if ((func = PTOU(p)->u_signal[SIGIO - 1]) !=
410*9973SSurya.Prakki@Sun.COM 		    SIG_DFL && (func != SIG_IGN)) {
4110Sstevel@tonic-gate 			psignal(p, SIGIO);
412*9973SSurya.Prakki@Sun.COM 			mutex_enter(&aiop->aio_mutex);
413*9973SSurya.Prakki@Sun.COM 			reqp->aio_req_flags |= AIO_SIGNALLED;
414*9973SSurya.Prakki@Sun.COM 			mutex_exit(&aiop->aio_mutex);
415*9973SSurya.Prakki@Sun.COM 		}
4160Sstevel@tonic-gate 	}
4171885Sraf 	if (pkevp)
4181885Sraf 		port_send_event(pkevp);
4191885Sraf 	if (lio_sigev)
4201885Sraf 		aio_sigev_send(p, lio_sigev);
4211885Sraf 	if (lio_pkevp)
4221885Sraf 		port_send_event(lio_pkevp);
4230Sstevel@tonic-gate }
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate /*
4260Sstevel@tonic-gate  * send a queued signal to the specified process when
4270Sstevel@tonic-gate  * the event signal is non-NULL. A return value of 1
4280Sstevel@tonic-gate  * will indicate that a signal is queued, and 0 means that
4290Sstevel@tonic-gate  * no signal was specified, nor sent.
4300Sstevel@tonic-gate  */
4310Sstevel@tonic-gate static void
4320Sstevel@tonic-gate aio_sigev_send(proc_t *p, sigqueue_t *sigev)
4330Sstevel@tonic-gate {
4340Sstevel@tonic-gate 	ASSERT(sigev != NULL);
4350Sstevel@tonic-gate 
4360Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
4370Sstevel@tonic-gate 	sigaddqa(p, NULL, sigev);
4380Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
4390Sstevel@tonic-gate }
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate /*
4420Sstevel@tonic-gate  * special case handling for zero length requests. the aio request
4430Sstevel@tonic-gate  * short circuits the normal completion path since all that's required
4440Sstevel@tonic-gate  * to complete this request is to copyout a zero to the aio request's
4450Sstevel@tonic-gate  * return value.
4460Sstevel@tonic-gate  */
4470Sstevel@tonic-gate void
4480Sstevel@tonic-gate aio_zerolen(aio_req_t *reqp)
4490Sstevel@tonic-gate {
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 	struct buf *bp = &reqp->aio_req_buf;
4520Sstevel@tonic-gate 
4530Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_ZEROLEN;
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 	bp->b_forw = (struct buf *)reqp;
4560Sstevel@tonic-gate 	bp->b_proc = curproc;
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate 	bp->b_resid = 0;
4590Sstevel@tonic-gate 	bp->b_flags = 0;
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate 	aio_done(bp);
4620Sstevel@tonic-gate }
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate /*
4650Sstevel@tonic-gate  * unlock pages previously locked by as_pagelock
4660Sstevel@tonic-gate  */
4670Sstevel@tonic-gate void
4680Sstevel@tonic-gate aphysio_unlock(aio_req_t *reqp)
4690Sstevel@tonic-gate {
4700Sstevel@tonic-gate 	struct buf *bp;
4710Sstevel@tonic-gate 	struct iovec *iov;
4720Sstevel@tonic-gate 	int flags;
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_PHYSIODONE)
4750Sstevel@tonic-gate 		return;
4760Sstevel@tonic-gate 
4770Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_PHYSIODONE;
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_ZEROLEN)
4800Sstevel@tonic-gate 		return;
4810Sstevel@tonic-gate 
4820Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
4830Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
4840Sstevel@tonic-gate 	flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ);
4850Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_PAGELOCKDONE) {
4860Sstevel@tonic-gate 		as_pageunlock(bp->b_proc->p_as,
487*9973SSurya.Prakki@Sun.COM 		    bp->b_flags & B_SHADOW ? bp->b_shadow : NULL,
488*9973SSurya.Prakki@Sun.COM 		    iov->iov_base, iov->iov_len, flags);
4890Sstevel@tonic-gate 		reqp->aio_req_flags &= ~AIO_PAGELOCKDONE;
4900Sstevel@tonic-gate 	}
4910Sstevel@tonic-gate 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
4920Sstevel@tonic-gate 	bp->b_flags |= B_DONE;
4930Sstevel@tonic-gate }
4940Sstevel@tonic-gate 
4950Sstevel@tonic-gate /*
4961885Sraf  * deletes a requests id from the hash table of outstanding io.
4970Sstevel@tonic-gate  */
4980Sstevel@tonic-gate static void
4991885Sraf aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp)
5000Sstevel@tonic-gate {
5010Sstevel@tonic-gate 	long index;
5020Sstevel@tonic-gate 	aio_result_t *resultp = reqp->aio_req_resultp;
5030Sstevel@tonic-gate 	aio_req_t *current;
5040Sstevel@tonic-gate 	aio_req_t **nextp;
5050Sstevel@tonic-gate 
5060Sstevel@tonic-gate 	index = AIO_HASH(resultp);
5070Sstevel@tonic-gate 	nextp = (aiop->aio_hash + index);
5080Sstevel@tonic-gate 	while ((current = *nextp) != NULL) {
5090Sstevel@tonic-gate 		if (current->aio_req_resultp == resultp) {
5100Sstevel@tonic-gate 			*nextp = current->aio_hash_next;
5110Sstevel@tonic-gate 			return;
5120Sstevel@tonic-gate 		}
5130Sstevel@tonic-gate 		nextp = &current->aio_hash_next;
5140Sstevel@tonic-gate 	}
5150Sstevel@tonic-gate }
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate /*
5180Sstevel@tonic-gate  * Put a list head struct onto its free list.
5190Sstevel@tonic-gate  */
5200Sstevel@tonic-gate static void
5210Sstevel@tonic-gate aio_lio_free(aio_t *aiop, aio_lio_t *head)
5220Sstevel@tonic-gate {
5230Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate 	if (head->lio_sigqp != NULL)
5260Sstevel@tonic-gate 		kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
5270Sstevel@tonic-gate 	head->lio_next = aiop->aio_lio_free;
5280Sstevel@tonic-gate 	aiop->aio_lio_free = head;
5290Sstevel@tonic-gate }
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate /*
5320Sstevel@tonic-gate  * Put a reqp onto the freelist.
5330Sstevel@tonic-gate  */
5340Sstevel@tonic-gate void
5350Sstevel@tonic-gate aio_req_free(aio_t *aiop, aio_req_t *reqp)
5360Sstevel@tonic-gate {
5370Sstevel@tonic-gate 	aio_lio_t *liop;
5380Sstevel@tonic-gate 
5390Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	if (reqp->aio_req_portkev) {
5420Sstevel@tonic-gate 		port_free_event(reqp->aio_req_portkev);
5430Sstevel@tonic-gate 		reqp->aio_req_portkev = NULL;
5440Sstevel@tonic-gate 	}
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate 	if ((liop = reqp->aio_req_lio) != NULL) {
5470Sstevel@tonic-gate 		if (--liop->lio_nent == 0)
5480Sstevel@tonic-gate 			aio_lio_free(aiop, liop);
5490Sstevel@tonic-gate 		reqp->aio_req_lio = NULL;
5500Sstevel@tonic-gate 	}
5511885Sraf 	if (reqp->aio_req_sigqp != NULL) {
5520Sstevel@tonic-gate 		kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t));
5531885Sraf 		reqp->aio_req_sigqp = NULL;
5541885Sraf 	}
5550Sstevel@tonic-gate 	reqp->aio_req_next = aiop->aio_free;
5561885Sraf 	reqp->aio_req_prev = NULL;
5570Sstevel@tonic-gate 	aiop->aio_free = reqp;
5580Sstevel@tonic-gate 	aiop->aio_outstanding--;
5590Sstevel@tonic-gate 	if (aiop->aio_outstanding == 0)
5600Sstevel@tonic-gate 		cv_broadcast(&aiop->aio_waitcv);
5610Sstevel@tonic-gate 	aio_hash_delete(aiop, reqp);
5620Sstevel@tonic-gate }
5630Sstevel@tonic-gate 
5640Sstevel@tonic-gate /*
5650Sstevel@tonic-gate  * Put a reqp onto the freelist.
5660Sstevel@tonic-gate  */
5670Sstevel@tonic-gate void
5680Sstevel@tonic-gate aio_req_free_port(aio_t *aiop, aio_req_t *reqp)
5690Sstevel@tonic-gate {
5700Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_mutex));
5710Sstevel@tonic-gate 
5720Sstevel@tonic-gate 	reqp->aio_req_next = aiop->aio_free;
5731885Sraf 	reqp->aio_req_prev = NULL;
5740Sstevel@tonic-gate 	aiop->aio_free = reqp;
5750Sstevel@tonic-gate 	aiop->aio_outstanding--;
5760Sstevel@tonic-gate 	aio_hash_delete(aiop, reqp);
5770Sstevel@tonic-gate }
5780Sstevel@tonic-gate 
5790Sstevel@tonic-gate 
5800Sstevel@tonic-gate /*
5811885Sraf  * Verify the integrity of a queue.
5820Sstevel@tonic-gate  */
5831885Sraf #if defined(DEBUG)
5840Sstevel@tonic-gate static void
5851885Sraf aio_verify_queue(aio_req_t *head,
5861885Sraf 	aio_req_t *entry_present, aio_req_t *entry_missing)
5871885Sraf {
5881885Sraf 	aio_req_t *reqp;
5891885Sraf 	int found = 0;
5901885Sraf 	int present = 0;
5911885Sraf 
5921885Sraf 	if ((reqp = head) != NULL) {
5931885Sraf 		do {
5941885Sraf 			ASSERT(reqp->aio_req_prev->aio_req_next == reqp);
5951885Sraf 			ASSERT(reqp->aio_req_next->aio_req_prev == reqp);
5961885Sraf 			if (entry_present == reqp)
5971885Sraf 				found++;
5981885Sraf 			if (entry_missing == reqp)
5991885Sraf 				present++;
6001885Sraf 		} while ((reqp = reqp->aio_req_next) != head);
6011885Sraf 	}
6021885Sraf 	ASSERT(entry_present == NULL || found == 1);
6031885Sraf 	ASSERT(entry_missing == NULL || present == 0);
6041885Sraf }
6051885Sraf #else
6061885Sraf #define	aio_verify_queue(x, y, z)
6071885Sraf #endif
6081885Sraf 
6091885Sraf /*
6101885Sraf  * Put a request onto the tail of a queue.
6111885Sraf  */
6121885Sraf void
6130Sstevel@tonic-gate aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new)
6140Sstevel@tonic-gate {
6151885Sraf 	aio_req_t *head;
6161885Sraf 	aio_req_t *prev;
6171885Sraf 
6181885Sraf 	aio_verify_queue(*qhead, NULL, reqp);
6191885Sraf 
6201885Sraf 	if ((head = *qhead) == NULL) {
6210Sstevel@tonic-gate 		reqp->aio_req_next = reqp;
6220Sstevel@tonic-gate 		reqp->aio_req_prev = reqp;
6231885Sraf 		*qhead = reqp;
6240Sstevel@tonic-gate 	} else {
6251885Sraf 		reqp->aio_req_next = head;
6261885Sraf 		reqp->aio_req_prev = prev = head->aio_req_prev;
6271885Sraf 		prev->aio_req_next = reqp;
6281885Sraf 		head->aio_req_prev = reqp;
6290Sstevel@tonic-gate 	}
6300Sstevel@tonic-gate 	reqp->aio_req_flags |= qflg_new;
6310Sstevel@tonic-gate }
6320Sstevel@tonic-gate 
6330Sstevel@tonic-gate /*
6341885Sraf  * Remove a request from its queue.
6350Sstevel@tonic-gate  */
6360Sstevel@tonic-gate void
6371885Sraf aio_deq(aio_req_t **qhead, aio_req_t *reqp)
6380Sstevel@tonic-gate {
6391885Sraf 	aio_verify_queue(*qhead, reqp, NULL);
6400Sstevel@tonic-gate 
6411885Sraf 	if (reqp->aio_req_next == reqp) {
6421885Sraf 		*qhead = NULL;
6430Sstevel@tonic-gate 	} else {
6441885Sraf 		reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
6451885Sraf 		reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
6461885Sraf 		if (*qhead == reqp)
6471885Sraf 			*qhead = reqp->aio_req_next;
6480Sstevel@tonic-gate 	}
6491885Sraf 	reqp->aio_req_next = NULL;
6500Sstevel@tonic-gate 	reqp->aio_req_prev = NULL;
6510Sstevel@tonic-gate }
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate /*
6540Sstevel@tonic-gate  * concatenate a specified queue with the cleanupq. the specified
6550Sstevel@tonic-gate  * queue is put onto the tail of the cleanupq. all elements on the
6560Sstevel@tonic-gate  * specified queue should have their aio_req_flags field cleared.
6570Sstevel@tonic-gate  */
6580Sstevel@tonic-gate /*ARGSUSED*/
6590Sstevel@tonic-gate void
6600Sstevel@tonic-gate aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg)
6610Sstevel@tonic-gate {
6620Sstevel@tonic-gate 	aio_req_t *cleanupqhead, *q2tail;
6630Sstevel@tonic-gate 	aio_req_t *reqp = q2;
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 	do {
6660Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & qflg);
6670Sstevel@tonic-gate 		reqp->aio_req_flags &= ~qflg;
6680Sstevel@tonic-gate 		reqp->aio_req_flags |= AIO_CLEANUPQ;
6690Sstevel@tonic-gate 	} while ((reqp = reqp->aio_req_next) != q2);
6700Sstevel@tonic-gate 
6710Sstevel@tonic-gate 	cleanupqhead = aiop->aio_cleanupq;
6720Sstevel@tonic-gate 	if (cleanupqhead == NULL)
6730Sstevel@tonic-gate 		aiop->aio_cleanupq = q2;
6740Sstevel@tonic-gate 	else {
6750Sstevel@tonic-gate 		cleanupqhead->aio_req_prev->aio_req_next = q2;
6760Sstevel@tonic-gate 		q2tail = q2->aio_req_prev;
6770Sstevel@tonic-gate 		q2tail->aio_req_next = cleanupqhead;
6780Sstevel@tonic-gate 		q2->aio_req_prev = cleanupqhead->aio_req_prev;
6790Sstevel@tonic-gate 		cleanupqhead->aio_req_prev = q2tail;
6800Sstevel@tonic-gate 	}
6810Sstevel@tonic-gate }
6820Sstevel@tonic-gate 
6830Sstevel@tonic-gate /*
6840Sstevel@tonic-gate  * cleanup aio requests that are on the per-process poll queue.
6850Sstevel@tonic-gate  */
6860Sstevel@tonic-gate void
6870Sstevel@tonic-gate aio_cleanup(int flag)
6880Sstevel@tonic-gate {
6890Sstevel@tonic-gate 	aio_t *aiop = curproc->p_aio;
6900Sstevel@tonic-gate 	aio_req_t *pollqhead, *cleanupqhead, *notifyqhead;
6910Sstevel@tonic-gate 	aio_req_t *cleanupport;
6920Sstevel@tonic-gate 	aio_req_t *portq = NULL;
6930Sstevel@tonic-gate 	void (*func)();
6940Sstevel@tonic-gate 	int signalled = 0;
6950Sstevel@tonic-gate 	int qflag = 0;
6960Sstevel@tonic-gate 	int exitflg;
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 	ASSERT(aiop != NULL);
6990Sstevel@tonic-gate 
7000Sstevel@tonic-gate 	if (flag == AIO_CLEANUP_EXIT)
7010Sstevel@tonic-gate 		exitflg = AIO_CLEANUP_EXIT;
7020Sstevel@tonic-gate 	else
7030Sstevel@tonic-gate 		exitflg = 0;
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate 	/*
7060Sstevel@tonic-gate 	 * We need to get the aio_cleanupq_mutex because we are calling
7070Sstevel@tonic-gate 	 * aio_cleanup_cleanupq()
7080Sstevel@tonic-gate 	 */
7090Sstevel@tonic-gate 	mutex_enter(&aiop->aio_cleanupq_mutex);
7100Sstevel@tonic-gate 	/*
7110Sstevel@tonic-gate 	 * take all the requests off the cleanupq, the notifyq,
7120Sstevel@tonic-gate 	 * and the pollq.
7130Sstevel@tonic-gate 	 */
7140Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
7150Sstevel@tonic-gate 	if ((cleanupqhead = aiop->aio_cleanupq) != NULL) {
7160Sstevel@tonic-gate 		aiop->aio_cleanupq = NULL;
7170Sstevel@tonic-gate 		qflag++;
7180Sstevel@tonic-gate 	}
7190Sstevel@tonic-gate 	if ((notifyqhead = aiop->aio_notifyq) != NULL) {
7200Sstevel@tonic-gate 		aiop->aio_notifyq = NULL;
7210Sstevel@tonic-gate 		qflag++;
7220Sstevel@tonic-gate 	}
7230Sstevel@tonic-gate 	if ((pollqhead = aiop->aio_pollq) != NULL) {
7240Sstevel@tonic-gate 		aiop->aio_pollq = NULL;
7250Sstevel@tonic-gate 		qflag++;
7260Sstevel@tonic-gate 	}
7270Sstevel@tonic-gate 	if (flag) {
7280Sstevel@tonic-gate 		if ((portq = aiop->aio_portq) != NULL)
7290Sstevel@tonic-gate 			qflag++;
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 		if ((cleanupport = aiop->aio_portcleanupq) != NULL) {
7320Sstevel@tonic-gate 			aiop->aio_portcleanupq = NULL;
7330Sstevel@tonic-gate 			qflag++;
7340Sstevel@tonic-gate 		}
7350Sstevel@tonic-gate 	}
7360Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate 	/*
7390Sstevel@tonic-gate 	 * return immediately if cleanupq, pollq, and
7400Sstevel@tonic-gate 	 * notifyq are all empty. someone else must have
7410Sstevel@tonic-gate 	 * emptied them.
7420Sstevel@tonic-gate 	 */
7430Sstevel@tonic-gate 	if (!qflag) {
7440Sstevel@tonic-gate 		mutex_exit(&aiop->aio_cleanupq_mutex);
7450Sstevel@tonic-gate 		return;
7460Sstevel@tonic-gate 	}
7470Sstevel@tonic-gate 
7480Sstevel@tonic-gate 	/*
7490Sstevel@tonic-gate 	 * do cleanup for the various queues.
7500Sstevel@tonic-gate 	 */
7510Sstevel@tonic-gate 	if (cleanupqhead)
752*9973SSurya.Prakki@Sun.COM 		signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg);
7530Sstevel@tonic-gate 	mutex_exit(&aiop->aio_cleanupq_mutex);
7540Sstevel@tonic-gate 	if (notifyqhead)
7550Sstevel@tonic-gate 		signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg);
7560Sstevel@tonic-gate 	if (pollqhead)
7570Sstevel@tonic-gate 		aio_cleanup_pollq(aiop, pollqhead, exitflg);
7580Sstevel@tonic-gate 	if (flag && (cleanupport || portq))
7590Sstevel@tonic-gate 		aio_cleanup_portq(aiop, cleanupport, exitflg);
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate 	if (exitflg)
7620Sstevel@tonic-gate 		return;
7630Sstevel@tonic-gate 
7640Sstevel@tonic-gate 	/*
7650Sstevel@tonic-gate 	 * If we have an active aio_cleanup_thread it's possible for
7660Sstevel@tonic-gate 	 * this routine to push something on to the done queue after
7670Sstevel@tonic-gate 	 * an aiowait/aiosuspend thread has already decided to block.
7680Sstevel@tonic-gate 	 * This being the case, we need a cv_broadcast here to wake
7690Sstevel@tonic-gate 	 * these threads up. It is simpler and cleaner to do this
7700Sstevel@tonic-gate 	 * broadcast here than in the individual cleanup routines.
7710Sstevel@tonic-gate 	 */
7720Sstevel@tonic-gate 
7730Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
7740Sstevel@tonic-gate 	cv_broadcast(&aiop->aio_waitcv);
7750Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
7760Sstevel@tonic-gate 
7770Sstevel@tonic-gate 	/*
7780Sstevel@tonic-gate 	 * Only if the process wasn't already signalled,
7790Sstevel@tonic-gate 	 * determine if a SIGIO signal should be delievered.
7800Sstevel@tonic-gate 	 */
7810Sstevel@tonic-gate 	if (!signalled &&
7821885Sraf 	    (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL &&
7830Sstevel@tonic-gate 	    func != SIG_IGN)
7840Sstevel@tonic-gate 		psignal(curproc, SIGIO);
7850Sstevel@tonic-gate }
7860Sstevel@tonic-gate 
7870Sstevel@tonic-gate 
7880Sstevel@tonic-gate /*
7890Sstevel@tonic-gate  * Do cleanup for every element of the port cleanup queue.
7900Sstevel@tonic-gate  */
7910Sstevel@tonic-gate static void
7920Sstevel@tonic-gate aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag)
7930Sstevel@tonic-gate {
7940Sstevel@tonic-gate 	aio_req_t	*reqp;
7950Sstevel@tonic-gate 	aio_req_t	*next;
7960Sstevel@tonic-gate 	aio_req_t	*headp;
7971885Sraf 	aio_lio_t	*liop;
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 	/* first check the portq */
8000Sstevel@tonic-gate 	if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) {
8010Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
8020Sstevel@tonic-gate 		if (aiop->aio_flags & AIO_CLEANUP)
8030Sstevel@tonic-gate 			aiop->aio_flags |= AIO_CLEANUP_PORT;
8040Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
8050Sstevel@tonic-gate 
8061885Sraf 		/*
8071885Sraf 		 * It is not allowed to hold locks during aphysio_unlock().
8081885Sraf 		 * The aio_done() interrupt function will try to acquire
8091885Sraf 		 * aio_mutex and aio_portq_mutex.  Therefore we disconnect
8101885Sraf 		 * the portq list from the aiop for the duration of the
8111885Sraf 		 * aphysio_unlock() loop below.
8121885Sraf 		 */
8130Sstevel@tonic-gate 		mutex_enter(&aiop->aio_portq_mutex);
8140Sstevel@tonic-gate 		headp = aiop->aio_portq;
8150Sstevel@tonic-gate 		aiop->aio_portq = NULL;
8160Sstevel@tonic-gate 		mutex_exit(&aiop->aio_portq_mutex);
8171885Sraf 		if ((reqp = headp) != NULL) {
8181885Sraf 			do {
8191885Sraf 				next = reqp->aio_req_next;
8201885Sraf 				aphysio_unlock(reqp);
8211885Sraf 				if (exitflag) {
8221885Sraf 					mutex_enter(&aiop->aio_mutex);
8231885Sraf 					aio_req_free(aiop, reqp);
8241885Sraf 					mutex_exit(&aiop->aio_mutex);
8251885Sraf 				}
8261885Sraf 			} while ((reqp = next) != headp);
8270Sstevel@tonic-gate 		}
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 		if (headp != NULL && exitflag == 0) {
8301885Sraf 			/* move unlocked requests back to the port queue */
8311885Sraf 			aio_req_t *newq;
8321885Sraf 
8330Sstevel@tonic-gate 			mutex_enter(&aiop->aio_portq_mutex);
8341885Sraf 			if ((newq = aiop->aio_portq) != NULL) {
8351885Sraf 				aio_req_t *headprev = headp->aio_req_prev;
8361885Sraf 				aio_req_t *newqprev = newq->aio_req_prev;
8371885Sraf 
8381885Sraf 				headp->aio_req_prev = newqprev;
8391885Sraf 				newq->aio_req_prev = headprev;
8401885Sraf 				headprev->aio_req_next = newq;
8411885Sraf 				newqprev->aio_req_next = headp;
8420Sstevel@tonic-gate 			}
8430Sstevel@tonic-gate 			aiop->aio_portq = headp;
8440Sstevel@tonic-gate 			cv_broadcast(&aiop->aio_portcv);
8450Sstevel@tonic-gate 			mutex_exit(&aiop->aio_portq_mutex);
8460Sstevel@tonic-gate 		}
8470Sstevel@tonic-gate 	}
8480Sstevel@tonic-gate 
8490Sstevel@tonic-gate 	/* now check the port cleanup queue */
8501885Sraf 	if ((reqp = cleanupq) == NULL)
8511885Sraf 		return;
8521885Sraf 	do {
8530Sstevel@tonic-gate 		next = reqp->aio_req_next;
8540Sstevel@tonic-gate 		aphysio_unlock(reqp);
8550Sstevel@tonic-gate 		if (exitflag) {
8560Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
8570Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
8580Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
8590Sstevel@tonic-gate 		} else {
8600Sstevel@tonic-gate 			mutex_enter(&aiop->aio_portq_mutex);
8611885Sraf 			aio_enq(&aiop->aio_portq, reqp, 0);
8620Sstevel@tonic-gate 			mutex_exit(&aiop->aio_portq_mutex);
8631885Sraf 			port_send_event(reqp->aio_req_portkev);
8641885Sraf 			if ((liop = reqp->aio_req_lio) != NULL) {
8651885Sraf 				int send_event = 0;
8661885Sraf 
8671885Sraf 				mutex_enter(&aiop->aio_mutex);
8681885Sraf 				ASSERT(liop->lio_refcnt > 0);
8691885Sraf 				if (--liop->lio_refcnt == 0) {
8701885Sraf 					if (liop->lio_port >= 0 &&
8711885Sraf 					    liop->lio_portkev) {
8721885Sraf 						liop->lio_port = -1;
8731885Sraf 						send_event = 1;
8741885Sraf 					}
8751885Sraf 				}
8761885Sraf 				mutex_exit(&aiop->aio_mutex);
8771885Sraf 				if (send_event)
8781885Sraf 					port_send_event(liop->lio_portkev);
8791885Sraf 			}
8800Sstevel@tonic-gate 		}
8811885Sraf 	} while ((reqp = next) != cleanupq);
8820Sstevel@tonic-gate }
8830Sstevel@tonic-gate 
8840Sstevel@tonic-gate /*
8850Sstevel@tonic-gate  * Do cleanup for every element of the cleanupq.
8860Sstevel@tonic-gate  */
887*9973SSurya.Prakki@Sun.COM static int
8880Sstevel@tonic-gate aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg)
8890Sstevel@tonic-gate {
8900Sstevel@tonic-gate 	aio_req_t *reqp, *next;
891*9973SSurya.Prakki@Sun.COM 	int signalled = 0;
8921885Sraf 
8930Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate 	/*
8960Sstevel@tonic-gate 	 * Since aio_req_done() or aio_req_find() use the HASH list to find
8970Sstevel@tonic-gate 	 * the required requests, they could potentially take away elements
8980Sstevel@tonic-gate 	 * if they are already done (AIO_DONEQ is set).
8990Sstevel@tonic-gate 	 * The aio_cleanupq_mutex protects the queue for the duration of the
9000Sstevel@tonic-gate 	 * loop from aio_req_done() and aio_req_find().
9010Sstevel@tonic-gate 	 */
9021885Sraf 	if ((reqp = qhead) == NULL)
903*9973SSurya.Prakki@Sun.COM 		return (0);
9041885Sraf 	do {
9050Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ);
9061885Sraf 		ASSERT(reqp->aio_req_portkev == NULL);
9070Sstevel@tonic-gate 		next = reqp->aio_req_next;
9080Sstevel@tonic-gate 		aphysio_unlock(reqp);
9090Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
9101885Sraf 		if (exitflg)
9110Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
9121885Sraf 		else
9131885Sraf 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
914*9973SSurya.Prakki@Sun.COM 		if (!exitflg && reqp->aio_req_flags & AIO_SIGNALLED)
915*9973SSurya.Prakki@Sun.COM 			signalled++;
9160Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
9171885Sraf 	} while ((reqp = next) != qhead);
918*9973SSurya.Prakki@Sun.COM 	return (signalled);
9190Sstevel@tonic-gate }
9200Sstevel@tonic-gate 
9210Sstevel@tonic-gate /*
9220Sstevel@tonic-gate  * do cleanup for every element of the notify queue.
9230Sstevel@tonic-gate  */
9240Sstevel@tonic-gate static int
9250Sstevel@tonic-gate aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg)
9260Sstevel@tonic-gate {
9270Sstevel@tonic-gate 	aio_req_t *reqp, *next;
9280Sstevel@tonic-gate 	aio_lio_t *liohead;
9290Sstevel@tonic-gate 	sigqueue_t *sigev, *lio_sigev = NULL;
9300Sstevel@tonic-gate 	int signalled = 0;
9310Sstevel@tonic-gate 
9321885Sraf 	if ((reqp = qhead) == NULL)
9331885Sraf 		return (0);
9341885Sraf 	do {
9350Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ);
9360Sstevel@tonic-gate 		next = reqp->aio_req_next;
9370Sstevel@tonic-gate 		aphysio_unlock(reqp);
9380Sstevel@tonic-gate 		if (exitflg) {
9390Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9400Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
9410Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
9421885Sraf 		} else {
9431885Sraf 			mutex_enter(&aiop->aio_mutex);
9441885Sraf 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
9451885Sraf 			sigev = reqp->aio_req_sigqp;
9461885Sraf 			reqp->aio_req_sigqp = NULL;
9471885Sraf 			if ((liohead = reqp->aio_req_lio) != NULL) {
9481885Sraf 				ASSERT(liohead->lio_refcnt > 0);
9491885Sraf 				if (--liohead->lio_refcnt == 0) {
9501885Sraf 					cv_signal(&liohead->lio_notify);
9511885Sraf 					lio_sigev = liohead->lio_sigqp;
9521885Sraf 					liohead->lio_sigqp = NULL;
9531885Sraf 				}
9541885Sraf 			}
9551885Sraf 			mutex_exit(&aiop->aio_mutex);
9561885Sraf 			if (sigev) {
9571885Sraf 				signalled++;
9581885Sraf 				aio_sigev_send(reqp->aio_req_buf.b_proc,
9591885Sraf 				    sigev);
9601885Sraf 			}
9611885Sraf 			if (lio_sigev) {
9621885Sraf 				signalled++;
9631885Sraf 				aio_sigev_send(reqp->aio_req_buf.b_proc,
9641885Sraf 				    lio_sigev);
9650Sstevel@tonic-gate 			}
9660Sstevel@tonic-gate 		}
9671885Sraf 	} while ((reqp = next) != qhead);
9681885Sraf 
9690Sstevel@tonic-gate 	return (signalled);
9700Sstevel@tonic-gate }
9710Sstevel@tonic-gate 
9720Sstevel@tonic-gate /*
9730Sstevel@tonic-gate  * Do cleanup for every element of the poll queue.
9740Sstevel@tonic-gate  */
9750Sstevel@tonic-gate static void
9760Sstevel@tonic-gate aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg)
9770Sstevel@tonic-gate {
9780Sstevel@tonic-gate 	aio_req_t *reqp, *next;
9790Sstevel@tonic-gate 
9800Sstevel@tonic-gate 	/*
9810Sstevel@tonic-gate 	 * As no other threads should be accessing the queue at this point,
9820Sstevel@tonic-gate 	 * it isn't necessary to hold aio_mutex while we traverse its elements.
9830Sstevel@tonic-gate 	 */
9841885Sraf 	if ((reqp = qhead) == NULL)
9851885Sraf 		return;
9861885Sraf 	do {
9870Sstevel@tonic-gate 		ASSERT(reqp->aio_req_flags & AIO_POLLQ);
9880Sstevel@tonic-gate 		next = reqp->aio_req_next;
9890Sstevel@tonic-gate 		aphysio_unlock(reqp);
9900Sstevel@tonic-gate 		if (exitflg) {
9910Sstevel@tonic-gate 			mutex_enter(&aiop->aio_mutex);
9920Sstevel@tonic-gate 			aio_req_free(aiop, reqp);
9930Sstevel@tonic-gate 			mutex_exit(&aiop->aio_mutex);
9941885Sraf 		} else {
9951885Sraf 			aio_copyout_result(reqp);
9961885Sraf 			mutex_enter(&aiop->aio_mutex);
9971885Sraf 			aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
9981885Sraf 			mutex_exit(&aiop->aio_mutex);
9990Sstevel@tonic-gate 		}
10001885Sraf 	} while ((reqp = next) != qhead);
10010Sstevel@tonic-gate }
10020Sstevel@tonic-gate 
10030Sstevel@tonic-gate /*
10040Sstevel@tonic-gate  * called by exit(). waits for all outstanding kaio to finish
10050Sstevel@tonic-gate  * before the kaio resources are freed.
10060Sstevel@tonic-gate  */
10070Sstevel@tonic-gate void
10080Sstevel@tonic-gate aio_cleanup_exit(void)
10090Sstevel@tonic-gate {
10100Sstevel@tonic-gate 	proc_t *p = curproc;
10110Sstevel@tonic-gate 	aio_t *aiop = p->p_aio;
10120Sstevel@tonic-gate 	aio_req_t *reqp, *next, *head;
10130Sstevel@tonic-gate 	aio_lio_t *nxtlio, *liop;
10140Sstevel@tonic-gate 
10150Sstevel@tonic-gate 	/*
10160Sstevel@tonic-gate 	 * wait for all outstanding kaio to complete. process
10170Sstevel@tonic-gate 	 * is now single-threaded; no other kaio requests can
10180Sstevel@tonic-gate 	 * happen once aio_pending is zero.
10190Sstevel@tonic-gate 	 */
10200Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
10210Sstevel@tonic-gate 	aiop->aio_flags |= AIO_CLEANUP;
10220Sstevel@tonic-gate 	while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE))
10230Sstevel@tonic-gate 		cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex);
10240Sstevel@tonic-gate 	mutex_exit(&aiop->aio_mutex);
10250Sstevel@tonic-gate 
10260Sstevel@tonic-gate 	/* cleanup the cleanup-thread queues. */
10270Sstevel@tonic-gate 	aio_cleanup(AIO_CLEANUP_EXIT);
10280Sstevel@tonic-gate 
10290Sstevel@tonic-gate 	/*
10300Sstevel@tonic-gate 	 * Although this process is now single-threaded, we
10310Sstevel@tonic-gate 	 * still need to protect ourselves against a race with
10320Sstevel@tonic-gate 	 * aio_cleanup_dr_delete_memory().
10330Sstevel@tonic-gate 	 */
10340Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
10350Sstevel@tonic-gate 
10360Sstevel@tonic-gate 	/*
10370Sstevel@tonic-gate 	 * free up the done queue's resources.
10380Sstevel@tonic-gate 	 */
10390Sstevel@tonic-gate 	if ((head = aiop->aio_doneq) != NULL) {
10401885Sraf 		aiop->aio_doneq = NULL;
10411885Sraf 		reqp = head;
10421885Sraf 		do {
10430Sstevel@tonic-gate 			next = reqp->aio_req_next;
10440Sstevel@tonic-gate 			aphysio_unlock(reqp);
10450Sstevel@tonic-gate 			kmem_free(reqp, sizeof (struct aio_req_t));
10461885Sraf 		} while ((reqp = next) != head);
10470Sstevel@tonic-gate 	}
10480Sstevel@tonic-gate 	/*
10490Sstevel@tonic-gate 	 * release aio request freelist.
10500Sstevel@tonic-gate 	 */
10510Sstevel@tonic-gate 	for (reqp = aiop->aio_free; reqp != NULL; reqp = next) {
10520Sstevel@tonic-gate 		next = reqp->aio_req_next;
10530Sstevel@tonic-gate 		kmem_free(reqp, sizeof (struct aio_req_t));
10540Sstevel@tonic-gate 	}
10550Sstevel@tonic-gate 
10560Sstevel@tonic-gate 	/*
10570Sstevel@tonic-gate 	 * release io list head freelist.
10580Sstevel@tonic-gate 	 */
10590Sstevel@tonic-gate 	for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) {
10600Sstevel@tonic-gate 		nxtlio = liop->lio_next;
10610Sstevel@tonic-gate 		kmem_free(liop, sizeof (aio_lio_t));
10620Sstevel@tonic-gate 	}
10630Sstevel@tonic-gate 
10640Sstevel@tonic-gate 	if (aiop->aio_iocb)
10650Sstevel@tonic-gate 		kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
10660Sstevel@tonic-gate 
10670Sstevel@tonic-gate 	mutex_destroy(&aiop->aio_mutex);
10680Sstevel@tonic-gate 	mutex_destroy(&aiop->aio_portq_mutex);
10690Sstevel@tonic-gate 	mutex_destroy(&aiop->aio_cleanupq_mutex);
10700Sstevel@tonic-gate 	p->p_aio = NULL;
10710Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
10720Sstevel@tonic-gate 	kmem_free(aiop, sizeof (struct aio));
10730Sstevel@tonic-gate }
10740Sstevel@tonic-gate 
10750Sstevel@tonic-gate /*
10760Sstevel@tonic-gate  * copy out aio request's result to a user-level result_t buffer.
10770Sstevel@tonic-gate  */
10780Sstevel@tonic-gate void
10790Sstevel@tonic-gate aio_copyout_result(aio_req_t *reqp)
10800Sstevel@tonic-gate {
10810Sstevel@tonic-gate 	struct buf	*bp;
10820Sstevel@tonic-gate 	struct iovec	*iov;
10830Sstevel@tonic-gate 	void		*resultp;
10840Sstevel@tonic-gate 	int		error;
10850Sstevel@tonic-gate 	size_t		retval;
10860Sstevel@tonic-gate 
10870Sstevel@tonic-gate 	if (reqp->aio_req_flags & AIO_COPYOUTDONE)
10880Sstevel@tonic-gate 		return;
10890Sstevel@tonic-gate 
10900Sstevel@tonic-gate 	reqp->aio_req_flags |= AIO_COPYOUTDONE;
10910Sstevel@tonic-gate 
10920Sstevel@tonic-gate 	iov = reqp->aio_req_uio.uio_iov;
10930Sstevel@tonic-gate 	bp = &reqp->aio_req_buf;
10940Sstevel@tonic-gate 	/* "resultp" points to user-level result_t buffer */
10950Sstevel@tonic-gate 	resultp = (void *)reqp->aio_req_resultp;
10960Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
10970Sstevel@tonic-gate 		if (bp->b_error)
10980Sstevel@tonic-gate 			error = bp->b_error;
10990Sstevel@tonic-gate 		else
11000Sstevel@tonic-gate 			error = EIO;
11010Sstevel@tonic-gate 		retval = (size_t)-1;
11020Sstevel@tonic-gate 	} else {
11030Sstevel@tonic-gate 		error = 0;
11040Sstevel@tonic-gate 		retval = iov->iov_len - bp->b_resid;
11050Sstevel@tonic-gate 	}
11060Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
11070Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
11080Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
11090Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
11100Sstevel@tonic-gate 	} else {
11110Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
11120Sstevel@tonic-gate 		    (int)retval);
11130Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
11140Sstevel@tonic-gate 	}
11150Sstevel@tonic-gate #else
11160Sstevel@tonic-gate 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
11170Sstevel@tonic-gate 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
11180Sstevel@tonic-gate #endif
11190Sstevel@tonic-gate }
11200Sstevel@tonic-gate 
11210Sstevel@tonic-gate 
11220Sstevel@tonic-gate void
11230Sstevel@tonic-gate aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp)
11240Sstevel@tonic-gate {
11250Sstevel@tonic-gate 	int errno;
11260Sstevel@tonic-gate 	size_t retval;
11270Sstevel@tonic-gate 
11280Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
11290Sstevel@tonic-gate 		if (bp->b_error)
11300Sstevel@tonic-gate 			errno = bp->b_error;
11310Sstevel@tonic-gate 		else
11320Sstevel@tonic-gate 			errno = EIO;
11330Sstevel@tonic-gate 		retval = (size_t)-1;
11340Sstevel@tonic-gate 	} else {
11350Sstevel@tonic-gate 		errno = 0;
11360Sstevel@tonic-gate 		retval = iov->iov_len - bp->b_resid;
11370Sstevel@tonic-gate 	}
11380Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
11390Sstevel@tonic-gate 	if (get_udatamodel() == DATAMODEL_NATIVE) {
11400Sstevel@tonic-gate 		(void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
11410Sstevel@tonic-gate 		(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
11420Sstevel@tonic-gate 	} else {
11430Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_return,
11440Sstevel@tonic-gate 		    (int)retval);
11450Sstevel@tonic-gate 		(void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno);
11460Sstevel@tonic-gate 	}
11470Sstevel@tonic-gate #else
11480Sstevel@tonic-gate 	(void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
11490Sstevel@tonic-gate 	(void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
11500Sstevel@tonic-gate #endif
11510Sstevel@tonic-gate }
11520Sstevel@tonic-gate 
11530Sstevel@tonic-gate /*
11540Sstevel@tonic-gate  * This function is used to remove a request from the done queue.
11550Sstevel@tonic-gate  */
11560Sstevel@tonic-gate 
11570Sstevel@tonic-gate void
11580Sstevel@tonic-gate aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp)
11590Sstevel@tonic-gate {
11600Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex));
11610Sstevel@tonic-gate 	while (aiop->aio_portq == NULL) {
11620Sstevel@tonic-gate 		/*
11630Sstevel@tonic-gate 		 * aio_portq is set to NULL when aio_cleanup_portq()
11640Sstevel@tonic-gate 		 * is working with the event queue.
11650Sstevel@tonic-gate 		 * The aio_cleanup_thread() uses aio_cleanup_portq()
11660Sstevel@tonic-gate 		 * to unlock all AIO buffers with completed transactions.
11670Sstevel@tonic-gate 		 * Wait here until aio_cleanup_portq() restores the
11680Sstevel@tonic-gate 		 * list of completed transactions in aio_portq.
11690Sstevel@tonic-gate 		 */
11700Sstevel@tonic-gate 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
11710Sstevel@tonic-gate 	}
11721885Sraf 	aio_deq(&aiop->aio_portq, reqp);
11730Sstevel@tonic-gate }
11740Sstevel@tonic-gate 
11750Sstevel@tonic-gate /* ARGSUSED */
11760Sstevel@tonic-gate void
11770Sstevel@tonic-gate aio_close_port(void *arg, int port, pid_t pid, int lastclose)
11780Sstevel@tonic-gate {
11790Sstevel@tonic-gate 	aio_t		*aiop;
11800Sstevel@tonic-gate 	aio_req_t 	*reqp;
11810Sstevel@tonic-gate 	aio_req_t 	*next;
11820Sstevel@tonic-gate 	aio_req_t	*headp;
11830Sstevel@tonic-gate 	int		counter;
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate 	if (arg == NULL)
11860Sstevel@tonic-gate 		aiop = curproc->p_aio;
11870Sstevel@tonic-gate 	else
11880Sstevel@tonic-gate 		aiop = (aio_t *)arg;
11890Sstevel@tonic-gate 
11900Sstevel@tonic-gate 	/*
11910Sstevel@tonic-gate 	 * The PORT_SOURCE_AIO source is always associated with every new
11920Sstevel@tonic-gate 	 * created port by default.
11930Sstevel@tonic-gate 	 * If no asynchronous I/O transactions were associated with the port
11940Sstevel@tonic-gate 	 * then the aiop pointer will still be set to NULL.
11950Sstevel@tonic-gate 	 */
11960Sstevel@tonic-gate 	if (aiop == NULL)
11970Sstevel@tonic-gate 		return;
11980Sstevel@tonic-gate 
11990Sstevel@tonic-gate 	/*
12000Sstevel@tonic-gate 	 * Within a process event ports can be used to collect events other
12010Sstevel@tonic-gate 	 * than PORT_SOURCE_AIO events. At the same time the process can submit
12020Sstevel@tonic-gate 	 * asynchronous I/Os transactions which are not associated with the
12030Sstevel@tonic-gate 	 * current port.
12040Sstevel@tonic-gate 	 * The current process oriented model of AIO uses a sigle queue for
12050Sstevel@tonic-gate 	 * pending events. On close the pending queue (queue of asynchronous
12060Sstevel@tonic-gate 	 * I/O transactions using event port notification) must be scanned
12070Sstevel@tonic-gate 	 * to detect and handle pending I/Os using the current port.
12080Sstevel@tonic-gate 	 */
12090Sstevel@tonic-gate 	mutex_enter(&aiop->aio_portq_mutex);
12100Sstevel@tonic-gate 	mutex_enter(&aiop->aio_mutex);
12111885Sraf 	counter = 0;
12121885Sraf 	if ((headp = aiop->aio_portpending) != NULL) {
12131885Sraf 		reqp = headp;
12141885Sraf 		do {
12151885Sraf 			if (reqp->aio_req_portkev &&
12161885Sraf 			    reqp->aio_req_port == port) {
12171885Sraf 				reqp->aio_req_flags |= AIO_CLOSE_PORT;
12181885Sraf 				counter++;
12191885Sraf 			}
12201885Sraf 		} while ((reqp = reqp->aio_req_next) != headp);
12210Sstevel@tonic-gate 	}
12220Sstevel@tonic-gate 	if (counter == 0) {
12230Sstevel@tonic-gate 		/* no AIOs pending */
12240Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
12250Sstevel@tonic-gate 		mutex_exit(&aiop->aio_portq_mutex);
12260Sstevel@tonic-gate 		return;
12270Sstevel@tonic-gate 	}
12280Sstevel@tonic-gate 	aiop->aio_portpendcnt += counter;
122941Spraks 	mutex_exit(&aiop->aio_mutex);
12300Sstevel@tonic-gate 	while (aiop->aio_portpendcnt)
123141Spraks 		cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
12320Sstevel@tonic-gate 
12330Sstevel@tonic-gate 	/*
12340Sstevel@tonic-gate 	 * all pending AIOs are completed.
12350Sstevel@tonic-gate 	 * check port doneq
12360Sstevel@tonic-gate 	 */
12370Sstevel@tonic-gate 	headp = NULL;
12381885Sraf 	if ((reqp = aiop->aio_portq) != NULL) {
12391885Sraf 		do {
12401885Sraf 			next = reqp->aio_req_next;
12411885Sraf 			if (reqp->aio_req_port == port) {
12421885Sraf 				/* dequeue request and discard event */
12431885Sraf 				aio_req_remove_portq(aiop, reqp);
12441885Sraf 				port_free_event(reqp->aio_req_portkev);
12451885Sraf 				/* put request in temporary queue */
12461885Sraf 				reqp->aio_req_next = headp;
12471885Sraf 				headp = reqp;
12481885Sraf 			}
12491885Sraf 		} while ((reqp = next) != aiop->aio_portq);
12500Sstevel@tonic-gate 	}
12510Sstevel@tonic-gate 	mutex_exit(&aiop->aio_portq_mutex);
12520Sstevel@tonic-gate 
12530Sstevel@tonic-gate 	/* headp points to the list of requests to be discarded */
12540Sstevel@tonic-gate 	for (reqp = headp; reqp != NULL; reqp = next) {
12550Sstevel@tonic-gate 		next = reqp->aio_req_next;
12560Sstevel@tonic-gate 		aphysio_unlock(reqp);
12570Sstevel@tonic-gate 		mutex_enter(&aiop->aio_mutex);
12580Sstevel@tonic-gate 		aio_req_free_port(aiop, reqp);
12590Sstevel@tonic-gate 		mutex_exit(&aiop->aio_mutex);
12600Sstevel@tonic-gate 	}
12610Sstevel@tonic-gate 
12620Sstevel@tonic-gate 	if (aiop->aio_flags & AIO_CLEANUP)
12630Sstevel@tonic-gate 		cv_broadcast(&aiop->aio_waitcv);
12640Sstevel@tonic-gate }
12650Sstevel@tonic-gate 
12660Sstevel@tonic-gate /*
12670Sstevel@tonic-gate  * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
1268304Spraks  * to kick start the aio_cleanup_thread for the give process to do the
1269304Spraks  * necessary cleanup.
1270304Spraks  * This is needed so that delete_memory_thread can obtain writer locks
1271304Spraks  * on pages that need to be relocated during a dr memory delete operation,
1272304Spraks  * otherwise a deadly embrace may occur.
12730Sstevel@tonic-gate  */
12740Sstevel@tonic-gate int
12750Sstevel@tonic-gate aio_cleanup_dr_delete_memory(proc_t *procp)
12760Sstevel@tonic-gate {
12770Sstevel@tonic-gate 	struct aio *aiop = procp->p_aio;
1278304Spraks 	struct as *as = procp->p_as;
1279304Spraks 	int ret = 0;
12800Sstevel@tonic-gate 
12810Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&procp->p_lock));
1282304Spraks 
1283304Spraks 	mutex_enter(&as->a_contents);
1284304Spraks 
1285304Spraks 	if (aiop != NULL) {
1286304Spraks 		aiop->aio_rqclnup = 1;
1287304Spraks 		cv_broadcast(&as->a_cv);
1288304Spraks 		ret = 1;
12890Sstevel@tonic-gate 	}
1290304Spraks 	mutex_exit(&as->a_contents);
1291304Spraks 	return (ret);
12920Sstevel@tonic-gate }
1293