xref: /onnv-gate/usr/src/uts/common/os/move.c (revision 7660:1f3541e68e06)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
56707Sbrutus  * Common Development and Distribution License (the "License").
66707Sbrutus  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
226707Sbrutus  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
270Sstevel@tonic-gate /*	  All Rights Reserved  	*/
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
310Sstevel@tonic-gate  * The Regents of the University of California
320Sstevel@tonic-gate  * All Rights Reserved
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
350Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
360Sstevel@tonic-gate  * contributors.
370Sstevel@tonic-gate  */
380Sstevel@tonic-gate 
390Sstevel@tonic-gate #include <sys/types.h>
400Sstevel@tonic-gate #include <sys/sysmacros.h>
410Sstevel@tonic-gate #include <sys/param.h>
420Sstevel@tonic-gate #include <sys/systm.h>
430Sstevel@tonic-gate #include <sys/uio.h>
440Sstevel@tonic-gate #include <sys/errno.h>
456707Sbrutus #include <sys/vmsystm.h>
466707Sbrutus #include <sys/cmn_err.h>
476707Sbrutus #include <vm/as.h>
486707Sbrutus #include <vm/page.h>
496707Sbrutus 
506707Sbrutus #include <sys/dcopy.h>
516707Sbrutus 
526707Sbrutus int64_t uioa_maxpoll = -1;	/* <0 = noblock, 0 = block, >0 = block after */
536707Sbrutus #define	UIO_DCOPY_CHANNEL	0
546707Sbrutus #define	UIO_DCOPY_CMD		1
550Sstevel@tonic-gate 
560Sstevel@tonic-gate /*
570Sstevel@tonic-gate  * Move "n" bytes at byte address "p"; "rw" indicates the direction
580Sstevel@tonic-gate  * of the move, and the I/O parameters are provided in "uio", which is
590Sstevel@tonic-gate  * update to reflect the data which was moved.  Returns 0 on success or
600Sstevel@tonic-gate  * a non-zero errno on failure.
610Sstevel@tonic-gate  */
620Sstevel@tonic-gate int
630Sstevel@tonic-gate uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
640Sstevel@tonic-gate {
650Sstevel@tonic-gate 	struct iovec *iov;
660Sstevel@tonic-gate 	ulong_t cnt;
670Sstevel@tonic-gate 	int error;
680Sstevel@tonic-gate 
690Sstevel@tonic-gate 	while (n && uio->uio_resid) {
700Sstevel@tonic-gate 		iov = uio->uio_iov;
710Sstevel@tonic-gate 		cnt = MIN(iov->iov_len, n);
720Sstevel@tonic-gate 		if (cnt == 0l) {
730Sstevel@tonic-gate 			uio->uio_iov++;
740Sstevel@tonic-gate 			uio->uio_iovcnt--;
750Sstevel@tonic-gate 			continue;
760Sstevel@tonic-gate 		}
770Sstevel@tonic-gate 		switch (uio->uio_segflg) {
780Sstevel@tonic-gate 
790Sstevel@tonic-gate 		case UIO_USERSPACE:
800Sstevel@tonic-gate 		case UIO_USERISPACE:
810Sstevel@tonic-gate 			if (rw == UIO_READ) {
820Sstevel@tonic-gate 				error = xcopyout_nta(p, iov->iov_base, cnt,
830Sstevel@tonic-gate 				    (uio->uio_extflg & UIO_COPY_CACHED));
840Sstevel@tonic-gate 			} else {
850Sstevel@tonic-gate 				error = xcopyin_nta(iov->iov_base, p, cnt,
860Sstevel@tonic-gate 				    (uio->uio_extflg & UIO_COPY_CACHED));
870Sstevel@tonic-gate 			}
880Sstevel@tonic-gate 
890Sstevel@tonic-gate 			if (error)
900Sstevel@tonic-gate 				return (error);
910Sstevel@tonic-gate 			break;
920Sstevel@tonic-gate 
930Sstevel@tonic-gate 		case UIO_SYSSPACE:
940Sstevel@tonic-gate 			if (rw == UIO_READ)
950Sstevel@tonic-gate 				error = kcopy_nta(p, iov->iov_base, cnt,
960Sstevel@tonic-gate 				    (uio->uio_extflg & UIO_COPY_CACHED));
970Sstevel@tonic-gate 			else
980Sstevel@tonic-gate 				error = kcopy_nta(iov->iov_base, p, cnt,
990Sstevel@tonic-gate 				    (uio->uio_extflg & UIO_COPY_CACHED));
1000Sstevel@tonic-gate 			if (error)
1010Sstevel@tonic-gate 				return (error);
1020Sstevel@tonic-gate 			break;
1030Sstevel@tonic-gate 		}
1040Sstevel@tonic-gate 		iov->iov_base += cnt;
1050Sstevel@tonic-gate 		iov->iov_len -= cnt;
1060Sstevel@tonic-gate 		uio->uio_resid -= cnt;
1070Sstevel@tonic-gate 		uio->uio_loffset += cnt;
1080Sstevel@tonic-gate 		p = (caddr_t)p + cnt;
1090Sstevel@tonic-gate 		n -= cnt;
1100Sstevel@tonic-gate 	}
1110Sstevel@tonic-gate 	return (0);
1120Sstevel@tonic-gate }
1130Sstevel@tonic-gate 
1140Sstevel@tonic-gate /*
1150Sstevel@tonic-gate  * transfer a character value into the address space
1160Sstevel@tonic-gate  * delineated by a uio and update fields within the
1170Sstevel@tonic-gate  * uio for next character. Return 0 for success, EFAULT
1180Sstevel@tonic-gate  * for error.
1190Sstevel@tonic-gate  */
1200Sstevel@tonic-gate int
1210Sstevel@tonic-gate ureadc(int val, struct uio *uiop)
1220Sstevel@tonic-gate {
1230Sstevel@tonic-gate 	struct iovec *iovp;
1240Sstevel@tonic-gate 	unsigned char c;
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate 	/*
1270Sstevel@tonic-gate 	 * first determine if uio is valid.  uiop should be
1280Sstevel@tonic-gate 	 * non-NULL and the resid count > 0.
1290Sstevel@tonic-gate 	 */
1300Sstevel@tonic-gate 	if (!(uiop && uiop->uio_resid > 0))
1310Sstevel@tonic-gate 		return (EFAULT);
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate 	/*
1340Sstevel@tonic-gate 	 * scan through iovecs until one is found that is non-empty.
1350Sstevel@tonic-gate 	 * Return EFAULT if none found.
1360Sstevel@tonic-gate 	 */
1370Sstevel@tonic-gate 	while (uiop->uio_iovcnt > 0) {
1380Sstevel@tonic-gate 		iovp = uiop->uio_iov;
1390Sstevel@tonic-gate 		if (iovp->iov_len <= 0) {
1400Sstevel@tonic-gate 			uiop->uio_iovcnt--;
1410Sstevel@tonic-gate 			uiop->uio_iov++;
1420Sstevel@tonic-gate 		} else
1430Sstevel@tonic-gate 			break;
1440Sstevel@tonic-gate 	}
1450Sstevel@tonic-gate 
1460Sstevel@tonic-gate 	if (uiop->uio_iovcnt <= 0)
1470Sstevel@tonic-gate 		return (EFAULT);
1480Sstevel@tonic-gate 
1490Sstevel@tonic-gate 	/*
1500Sstevel@tonic-gate 	 * Transfer character to uio space.
1510Sstevel@tonic-gate 	 */
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 	c = (unsigned char) (val & 0xFF);
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate 	switch (uiop->uio_segflg) {
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate 	case UIO_USERISPACE:
1580Sstevel@tonic-gate 	case UIO_USERSPACE:
1590Sstevel@tonic-gate 		if (copyout(&c, iovp->iov_base, sizeof (unsigned char)))
1600Sstevel@tonic-gate 			return (EFAULT);
1610Sstevel@tonic-gate 		break;
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate 	case UIO_SYSSPACE: /* can do direct copy since kernel-kernel */
1640Sstevel@tonic-gate 		*iovp->iov_base = c;
1650Sstevel@tonic-gate 		break;
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate 	default:
1680Sstevel@tonic-gate 		return (EFAULT); /* invalid segflg value */
1690Sstevel@tonic-gate 	}
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate 	/*
1720Sstevel@tonic-gate 	 * bump up/down iovec and uio members to reflect transfer.
1730Sstevel@tonic-gate 	 */
1740Sstevel@tonic-gate 	iovp->iov_base++;
1750Sstevel@tonic-gate 	iovp->iov_len--;
1760Sstevel@tonic-gate 	uiop->uio_resid--;
1770Sstevel@tonic-gate 	uiop->uio_loffset++;
1780Sstevel@tonic-gate 	return (0); /* success */
1790Sstevel@tonic-gate }
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate /*
1820Sstevel@tonic-gate  * return a character value from the address space
1830Sstevel@tonic-gate  * delineated by a uio and update fields within the
1840Sstevel@tonic-gate  * uio for next character. Return the character for success,
1850Sstevel@tonic-gate  * -1 for error.
1860Sstevel@tonic-gate  */
1870Sstevel@tonic-gate int
1880Sstevel@tonic-gate uwritec(struct uio *uiop)
1890Sstevel@tonic-gate {
1900Sstevel@tonic-gate 	struct iovec *iovp;
1910Sstevel@tonic-gate 	unsigned char c;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 	/*
1940Sstevel@tonic-gate 	 * verify we were passed a valid uio structure.
1950Sstevel@tonic-gate 	 * (1) non-NULL uiop, (2) positive resid count
1960Sstevel@tonic-gate 	 * (3) there is an iovec with positive length
1970Sstevel@tonic-gate 	 */
1980Sstevel@tonic-gate 
1990Sstevel@tonic-gate 	if (!(uiop && uiop->uio_resid > 0))
2000Sstevel@tonic-gate 		return (-1);
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	while (uiop->uio_iovcnt > 0) {
2030Sstevel@tonic-gate 		iovp = uiop->uio_iov;
2040Sstevel@tonic-gate 		if (iovp->iov_len <= 0) {
2050Sstevel@tonic-gate 			uiop->uio_iovcnt--;
2060Sstevel@tonic-gate 			uiop->uio_iov++;
2070Sstevel@tonic-gate 		} else
2080Sstevel@tonic-gate 			break;
2090Sstevel@tonic-gate 	}
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate 	if (uiop->uio_iovcnt <= 0)
2120Sstevel@tonic-gate 		return (-1);
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 	/*
2150Sstevel@tonic-gate 	 * Get the character from the uio address space.
2160Sstevel@tonic-gate 	 */
2170Sstevel@tonic-gate 	switch (uiop->uio_segflg) {
2180Sstevel@tonic-gate 
2190Sstevel@tonic-gate 	case UIO_USERISPACE:
2200Sstevel@tonic-gate 	case UIO_USERSPACE:
2210Sstevel@tonic-gate 		if (copyin(iovp->iov_base, &c, sizeof (unsigned char)))
2220Sstevel@tonic-gate 			return (-1);
2230Sstevel@tonic-gate 		break;
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate 	case UIO_SYSSPACE:
2260Sstevel@tonic-gate 		c = *iovp->iov_base;
2270Sstevel@tonic-gate 		break;
2280Sstevel@tonic-gate 
2290Sstevel@tonic-gate 	default:
2300Sstevel@tonic-gate 		return (-1); /* invalid segflg */
2310Sstevel@tonic-gate 	}
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	/*
2340Sstevel@tonic-gate 	 * Adjust fields of iovec and uio appropriately.
2350Sstevel@tonic-gate 	 */
2360Sstevel@tonic-gate 	iovp->iov_base++;
2370Sstevel@tonic-gate 	iovp->iov_len--;
2380Sstevel@tonic-gate 	uiop->uio_resid--;
2390Sstevel@tonic-gate 	uiop->uio_loffset++;
2400Sstevel@tonic-gate 	return ((int)c & 0xFF); /* success */
2410Sstevel@tonic-gate }
2420Sstevel@tonic-gate 
2430Sstevel@tonic-gate /*
2440Sstevel@tonic-gate  * Drop the next n chars out of *uiop.
2450Sstevel@tonic-gate  */
2460Sstevel@tonic-gate void
2470Sstevel@tonic-gate uioskip(uio_t *uiop, size_t n)
2480Sstevel@tonic-gate {
2490Sstevel@tonic-gate 	if (n > uiop->uio_resid)
2500Sstevel@tonic-gate 		return;
2510Sstevel@tonic-gate 	while (n != 0) {
2520Sstevel@tonic-gate 		register iovec_t	*iovp = uiop->uio_iov;
2530Sstevel@tonic-gate 		register size_t		niovb = MIN(iovp->iov_len, n);
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate 		if (niovb == 0) {
2560Sstevel@tonic-gate 			uiop->uio_iov++;
2570Sstevel@tonic-gate 			uiop->uio_iovcnt--;
2580Sstevel@tonic-gate 			continue;
2590Sstevel@tonic-gate 		}
2600Sstevel@tonic-gate 		iovp->iov_base += niovb;
2610Sstevel@tonic-gate 		uiop->uio_loffset += niovb;
2620Sstevel@tonic-gate 		iovp->iov_len -= niovb;
2630Sstevel@tonic-gate 		uiop->uio_resid -= niovb;
2640Sstevel@tonic-gate 		n -= niovb;
2650Sstevel@tonic-gate 	}
2660Sstevel@tonic-gate }
2670Sstevel@tonic-gate 
2680Sstevel@tonic-gate /*
2690Sstevel@tonic-gate  * Dup the suio into the duio and diovec of size diov_cnt. If diov
2700Sstevel@tonic-gate  * is too small to dup suio then an error will be returned, else 0.
2710Sstevel@tonic-gate  */
2720Sstevel@tonic-gate int
2730Sstevel@tonic-gate uiodup(uio_t *suio, uio_t *duio, iovec_t *diov, int diov_cnt)
2740Sstevel@tonic-gate {
2750Sstevel@tonic-gate 	int ix;
2760Sstevel@tonic-gate 	iovec_t *siov = suio->uio_iov;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 	*duio = *suio;
2790Sstevel@tonic-gate 	for (ix = 0; ix < suio->uio_iovcnt; ix++) {
2800Sstevel@tonic-gate 		diov[ix] = siov[ix];
2810Sstevel@tonic-gate 		if (ix >= diov_cnt)
2820Sstevel@tonic-gate 			return (1);
2830Sstevel@tonic-gate 	}
2840Sstevel@tonic-gate 	duio->uio_iov = diov;
2850Sstevel@tonic-gate 	return (0);
2860Sstevel@tonic-gate }
2876707Sbrutus 
2886707Sbrutus /*
2896707Sbrutus  * Shadow state for checking if a platform has hardware asynchronous
2906707Sbrutus  * copy capability and minimum copy size, e.g. Intel's I/OAT dma engine,
2916707Sbrutus  *
2926707Sbrutus  * Dcopy does a call-back to uioa_dcopy_enable() when a dma device calls
2936707Sbrutus  * into dcopy to register and uioa_dcopy_disable() when the device calls
2946707Sbrutus  * into dcopy to unregister.
2956707Sbrutus  */
2966707Sbrutus uioasync_t uioasync = {B_FALSE, 1024};
2976707Sbrutus 
2986707Sbrutus void
2996707Sbrutus uioa_dcopy_enable()
3006707Sbrutus {
3016707Sbrutus 	uioasync.enabled = B_TRUE;
3026707Sbrutus }
3036707Sbrutus 
3046707Sbrutus void
3056707Sbrutus uioa_dcopy_disable()
3066707Sbrutus {
3076707Sbrutus 	uioasync.enabled = B_FALSE;
3086707Sbrutus }
3096707Sbrutus 
3106707Sbrutus /*
3116707Sbrutus  * Schedule an asynchronous move of "n" bytes at byte address "p",
3126707Sbrutus  * "rw" indicates the direction of the move, I/O parameters and
3136707Sbrutus  * async state are provided in "uioa" which is update to reflect
3146707Sbrutus  * the data which is to be moved.
3156707Sbrutus  *
3166707Sbrutus  * Returns 0 on success or a non-zero errno on failure.
3176707Sbrutus  *
3186707Sbrutus  * Note, while the uioasync APIs are general purpose in design
3196707Sbrutus  * the current implementation is Intel I/OAT specific.
3206707Sbrutus  */
3216707Sbrutus int
3226707Sbrutus uioamove(void *p, size_t n, enum uio_rw rw, uioa_t *uioa)
3236707Sbrutus {
3246707Sbrutus 	int		soff, doff;
3256707Sbrutus 	uint64_t	pa;
3266707Sbrutus 	int		cnt;
3276707Sbrutus 	iovec_t		*iov;
3286707Sbrutus 	dcopy_handle_t	channel;
3296707Sbrutus 	dcopy_cmd_t	cmd;
3306707Sbrutus 	int		ret = 0;
3316707Sbrutus 	int		dcopy_flags;
3326707Sbrutus 
3336707Sbrutus 	if (!(uioa->uioa_state & UIOA_ENABLED)) {
3346707Sbrutus 		/* The uioa_t isn't enabled */
3356707Sbrutus 		return (ENXIO);
3366707Sbrutus 	}
3376707Sbrutus 
3386707Sbrutus 	if (uioa->uio_segflg != UIO_USERSPACE || rw != UIO_READ) {
3396707Sbrutus 		/* Only support to user-land from kernel */
3406707Sbrutus 		return (ENOTSUP);
3416707Sbrutus 	}
3426707Sbrutus 
3436707Sbrutus 
3446707Sbrutus 	channel = uioa->uioa_hwst[UIO_DCOPY_CHANNEL];
3456707Sbrutus 	cmd = uioa->uioa_hwst[UIO_DCOPY_CMD];
3466707Sbrutus 	dcopy_flags = DCOPY_NOSLEEP;
3476707Sbrutus 
3486707Sbrutus 	/*
3496707Sbrutus 	 * While source bytes and destination bytes.
3506707Sbrutus 	 */
3516707Sbrutus 	while (n > 0 && uioa->uio_resid > 0) {
3526707Sbrutus 		iov = uioa->uio_iov;
3536707Sbrutus 		if (iov->iov_len == 0l) {
3546707Sbrutus 			uioa->uio_iov++;
3556707Sbrutus 			uioa->uio_iovcnt--;
3566707Sbrutus 			uioa->uioa_lcur++;
3576707Sbrutus 			uioa->uioa_lppp = uioa->uioa_lcur->uioa_ppp;
3586707Sbrutus 			continue;
3596707Sbrutus 		}
3606707Sbrutus 		/*
3616707Sbrutus 		 * While source bytes schedule an async
3626707Sbrutus 		 * dma for destination page by page.
3636707Sbrutus 		 */
3646707Sbrutus 		while (n > 0) {
3656707Sbrutus 			/* Addr offset in page src/dst */
3666707Sbrutus 			soff = (uintptr_t)p & PAGEOFFSET;
3676707Sbrutus 			doff = (uintptr_t)iov->iov_base & PAGEOFFSET;
3686707Sbrutus 			/* Min copy count src and dst and page sized */
3696707Sbrutus 			cnt = MIN(n, iov->iov_len);
3706707Sbrutus 			cnt = MIN(cnt, PAGESIZE - soff);
3716707Sbrutus 			cnt = MIN(cnt, PAGESIZE - doff);
3726707Sbrutus 			/* XXX if next page(s) contiguous could use multipage */
3736707Sbrutus 
3746707Sbrutus 			/*
3756707Sbrutus 			 * if we have an old command, we want to link all
3766707Sbrutus 			 * other commands to the next command we alloced so
3776707Sbrutus 			 * we only need to track the last command but can
3786707Sbrutus 			 * still free them all.
3796707Sbrutus 			 */
3806707Sbrutus 			if (cmd != NULL) {
3816707Sbrutus 				dcopy_flags |= DCOPY_ALLOC_LINK;
3826707Sbrutus 			}
3836707Sbrutus 			ret = dcopy_cmd_alloc(channel, dcopy_flags, &cmd);
3846707Sbrutus 			if (ret != DCOPY_SUCCESS) {
3856707Sbrutus 				/* Error of some sort */
3866707Sbrutus 				return (EIO);
3876707Sbrutus 			}
3886707Sbrutus 			uioa->uioa_hwst[UIO_DCOPY_CMD] = cmd;
3896707Sbrutus 
3906707Sbrutus 			ASSERT(cmd->dp_version == DCOPY_CMD_V0);
3916707Sbrutus 			if (uioa_maxpoll >= 0) {
3926707Sbrutus 				/* Blocking (>0 may be) used in uioafini() */
3936707Sbrutus 				cmd->dp_flags = DCOPY_CMD_INTR;
3946707Sbrutus 			} else {
3956707Sbrutus 				/* Non blocking uioafini() so no intr */
3966707Sbrutus 				cmd->dp_flags = DCOPY_CMD_NOFLAGS;
3976707Sbrutus 			}
3986707Sbrutus 			cmd->dp_cmd = DCOPY_CMD_COPY;
3996707Sbrutus 			pa = ptob((uint64_t)hat_getpfnum(kas.a_hat, p));
4006707Sbrutus 			cmd->dp.copy.cc_source = pa + soff;
4016707Sbrutus 			if (uioa->uioa_lcur->uioa_pfncnt == 0) {
4026707Sbrutus 				/* Have a (page_t **) */
4036707Sbrutus 				pa = ptob((uint64_t)(
4046707Sbrutus 				    *(page_t **)uioa->uioa_lppp)->p_pagenum);
4056707Sbrutus 			} else {
4066707Sbrutus 				/* Have a (pfn_t *) */
4076707Sbrutus 				pa = ptob((uint64_t)(
4086707Sbrutus 				    *(pfn_t *)uioa->uioa_lppp));
4096707Sbrutus 			}
4106707Sbrutus 			cmd->dp.copy.cc_dest = pa + doff;
4116707Sbrutus 			cmd->dp.copy.cc_size = cnt;
4126707Sbrutus 			ret = dcopy_cmd_post(cmd);
4136707Sbrutus 			if (ret != DCOPY_SUCCESS) {
4146707Sbrutus 				/* Error of some sort */
4156707Sbrutus 				return (EIO);
4166707Sbrutus 			}
4176707Sbrutus 			ret = 0;
4186707Sbrutus 
4196707Sbrutus 			/* If UIOA_POLL not set, set it */
4206707Sbrutus 			if (!(uioa->uioa_state & UIOA_POLL))
4216707Sbrutus 				uioa->uioa_state |= UIOA_POLL;
4226707Sbrutus 
4236707Sbrutus 			/* Update iov, uio, and local pointers/counters */
4246707Sbrutus 			iov->iov_base += cnt;
4256707Sbrutus 			iov->iov_len -= cnt;
4266707Sbrutus 			uioa->uio_resid -= cnt;
427*7660SEric.Yu@Sun.COM 			uioa->uioa_mbytes += cnt;
4286707Sbrutus 			uioa->uio_loffset += cnt;
4296707Sbrutus 			p = (caddr_t)p + cnt;
4306707Sbrutus 			n -= cnt;
4316707Sbrutus 
4326707Sbrutus 			/* End of iovec? */
4336707Sbrutus 			if (iov->iov_len == 0) {
4346707Sbrutus 				/* Yup, next iovec */
4356707Sbrutus 				break;
4366707Sbrutus 			}
4376707Sbrutus 
4386707Sbrutus 			/* Next dst addr page? */
4396707Sbrutus 			if (doff + cnt == PAGESIZE) {
4406707Sbrutus 				/* Yup, next page_t */
4416707Sbrutus 				uioa->uioa_lppp++;
4426707Sbrutus 			}
4436707Sbrutus 		}
4446707Sbrutus 	}
4456707Sbrutus 
4466707Sbrutus 	return (ret);
4476707Sbrutus }
4486707Sbrutus 
4496707Sbrutus /*
4506707Sbrutus  * Initialize a uioa_t for a given uio_t for the current user context,
4516707Sbrutus  * copy the common uio_t to the uioa_t, walk the shared iovec_t and
4526707Sbrutus  * lock down the user-land page(s) containing iovec_t data, then mapin
4536707Sbrutus  * user-land pages using segkpm.
4546707Sbrutus  */
4556707Sbrutus int
4566707Sbrutus uioainit(uio_t *uiop, uioa_t *uioap)
4576707Sbrutus {
4586707Sbrutus 	caddr_t	addr;
4596707Sbrutus 	page_t		**pages;
4606707Sbrutus 	int		off;
4616707Sbrutus 	int		len;
4626707Sbrutus 	proc_t		*procp = ttoproc(curthread);
4636707Sbrutus 	struct as	*as = procp->p_as;
4646707Sbrutus 	iovec_t		*iov = uiop->uio_iov;
4656707Sbrutus 	int32_t		iovcnt = uiop->uio_iovcnt;
4666707Sbrutus 	uioa_page_t	*locked = uioap->uioa_locked;
4676707Sbrutus 	dcopy_handle_t	channel;
4686707Sbrutus 	int		error;
4696707Sbrutus 
4706707Sbrutus 	if (! (uioap->uioa_state & UIOA_ALLOC)) {
4716707Sbrutus 		/* Can only init() a freshly allocated uioa_t */
4726707Sbrutus 		return (EINVAL);
4736707Sbrutus 	}
4746707Sbrutus 
4756707Sbrutus 	error = dcopy_alloc(DCOPY_NOSLEEP, &channel);
4766707Sbrutus 	if (error == DCOPY_NORESOURCES) {
4776707Sbrutus 		/* Turn off uioa */
4786707Sbrutus 		uioasync.enabled = B_FALSE;
4796707Sbrutus 		return (ENODEV);
4806707Sbrutus 	}
4816707Sbrutus 	if (error != DCOPY_SUCCESS) {
4826707Sbrutus 		/* Alloc failed */
4836707Sbrutus 		return (EIO);
4846707Sbrutus 	}
4856707Sbrutus 
4866707Sbrutus 	uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = channel;
4876707Sbrutus 	uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
4886707Sbrutus 
4896707Sbrutus 	/* Indicate uioa_t (will be) initialized */
4906707Sbrutus 	uioap->uioa_state = UIOA_INIT;
4916707Sbrutus 
492*7660SEric.Yu@Sun.COM 	uioap->uioa_mbytes = 0;
493*7660SEric.Yu@Sun.COM 
494*7660SEric.Yu@Sun.COM 	uioap->uioa_mbytes = 0;
495*7660SEric.Yu@Sun.COM 
4966707Sbrutus 	/* uio_t/uioa_t uio_t common struct copy */
4976707Sbrutus 	*((uio_t *)uioap) = *uiop;
4986707Sbrutus 
4996707Sbrutus 	/* initialize *uiop->uio_iov */
5006707Sbrutus 	if (iovcnt > UIOA_IOV_MAX) {
5016707Sbrutus 		/* Too big? */
5026707Sbrutus 		return (E2BIG);
5036707Sbrutus 	}
5046707Sbrutus 	uioap->uio_iov = iov;
5056707Sbrutus 	uioap->uio_iovcnt = iovcnt;
5066707Sbrutus 
5076707Sbrutus 	/* Mark the uioap as such */
5086707Sbrutus 	uioap->uio_extflg |= UIO_ASYNC;
5096707Sbrutus 
5106707Sbrutus 	/*
5116707Sbrutus 	 * For each iovec_t, lock-down the page(s) backing the iovec_t
5126707Sbrutus 	 * and save the page_t list for phys addr use in uioamove().
5136707Sbrutus 	 */
5146707Sbrutus 	iov = uiop->uio_iov;
5156707Sbrutus 	iovcnt = uiop->uio_iovcnt;
5166707Sbrutus 	while (iovcnt > 0) {
5176707Sbrutus 		addr = iov->iov_base;
5186707Sbrutus 		off = (uintptr_t)addr & PAGEOFFSET;
5196707Sbrutus 		addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
5206707Sbrutus 		len = iov->iov_len + off;
5216707Sbrutus 
5226707Sbrutus 		/* Lock down page(s) for the iov span */
5236707Sbrutus 		if ((error = as_pagelock(as, &pages,
5246707Sbrutus 		    iov->iov_base, iov->iov_len, S_WRITE)) != 0) {
5256707Sbrutus 			/* Error */
5266707Sbrutus 			goto cleanup;
5276707Sbrutus 		}
5286707Sbrutus 
5296707Sbrutus 		if (pages == NULL) {
5306707Sbrutus 			/*
5316707Sbrutus 			 * Need page_t list, really only need
5326707Sbrutus 			 * a pfn list so build one.
5336707Sbrutus 			 */
5346707Sbrutus 			pfn_t   *pfnp;
5356707Sbrutus 			int	pcnt = len >> PAGESHIFT;
5366707Sbrutus 
5376707Sbrutus 			if (off)
5386707Sbrutus 				pcnt++;
5396707Sbrutus 			if ((pfnp = kmem_alloc(pcnt * sizeof (pfnp),
5406707Sbrutus 			    KM_NOSLEEP)) == NULL) {
5416707Sbrutus 				error = ENOMEM;
5426707Sbrutus 				goto cleanup;
5436707Sbrutus 			}
5446707Sbrutus 			locked->uioa_ppp = (void **)pfnp;
5456707Sbrutus 			locked->uioa_pfncnt = pcnt;
5466707Sbrutus 			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
5476707Sbrutus 			while (pcnt-- > 0) {
5486707Sbrutus 				*pfnp++ = hat_getpfnum(as->a_hat, addr);
5496707Sbrutus 				addr += PAGESIZE;
5506707Sbrutus 			}
5516707Sbrutus 			AS_LOCK_EXIT(as, &as->a_lock);
5526707Sbrutus 		} else {
5536707Sbrutus 			/* Have a page_t list, save it */
5546707Sbrutus 			locked->uioa_ppp = (void **)pages;
5556707Sbrutus 			locked->uioa_pfncnt = 0;
5566707Sbrutus 		}
5576707Sbrutus 		/* Save for as_pageunlock() in uioafini() */
5586707Sbrutus 		locked->uioa_base = iov->iov_base;
5596707Sbrutus 		locked->uioa_len = iov->iov_len;
5606707Sbrutus 		locked++;
5616707Sbrutus 
5626707Sbrutus 		/* Next iovec_t */
5636707Sbrutus 		iov++;
5646707Sbrutus 		iovcnt--;
5656707Sbrutus 	}
5666707Sbrutus 	/* Initialize curret pointer into uioa_locked[] and it's uioa_ppp */
5676707Sbrutus 	uioap->uioa_lcur = uioap->uioa_locked;
5686707Sbrutus 	uioap->uioa_lppp = uioap->uioa_lcur->uioa_ppp;
5696707Sbrutus 	return (0);
5706707Sbrutus 
5716707Sbrutus cleanup:
5726707Sbrutus 	/* Unlock any previously locked page_t(s) */
5736707Sbrutus 	while (locked > uioap->uioa_locked) {
5746707Sbrutus 		locked--;
5756707Sbrutus 		as_pageunlock(as, (page_t **)locked->uioa_ppp,
5766707Sbrutus 		    locked->uioa_base, locked->uioa_len, S_WRITE);
5776707Sbrutus 	}
5786707Sbrutus 
5796707Sbrutus 	/* Last indicate uioa_t still in alloc state */
5806707Sbrutus 	uioap->uioa_state = UIOA_ALLOC;
581*7660SEric.Yu@Sun.COM 	uioap->uioa_mbytes = 0;
5826707Sbrutus 
5836707Sbrutus 	return (error);
5846707Sbrutus }
5856707Sbrutus 
5866707Sbrutus /*
5876707Sbrutus  * Finish processing of a uioa_t by cleanup any pending "uioap" actions.
5886707Sbrutus  */
5896707Sbrutus int
5906707Sbrutus uioafini(uio_t *uiop, uioa_t *uioap)
5916707Sbrutus {
5926707Sbrutus 	int32_t		iovcnt = uiop->uio_iovcnt;
5936707Sbrutus 	uioa_page_t	*locked = uioap->uioa_locked;
5946707Sbrutus 	struct as	*as = ttoproc(curthread)->p_as;
5956707Sbrutus 	dcopy_handle_t	channel;
5966707Sbrutus 	dcopy_cmd_t	cmd;
5976707Sbrutus 	int		ret = 0;
5986707Sbrutus 
5996707Sbrutus 	ASSERT(uioap->uio_extflg & UIO_ASYNC);
6006707Sbrutus 
6016707Sbrutus 	if (!(uioap->uioa_state & (UIOA_ENABLED|UIOA_FINI))) {
6026707Sbrutus 		/* Must be an active uioa_t */
6036707Sbrutus 		return (EINVAL);
6046707Sbrutus 	}
6056707Sbrutus 
6066707Sbrutus 	channel = uioap->uioa_hwst[UIO_DCOPY_CHANNEL];
6076707Sbrutus 	cmd = uioap->uioa_hwst[UIO_DCOPY_CMD];
6086707Sbrutus 
6096707Sbrutus 	/* XXX - why do we get cmd == NULL sometimes? */
6106707Sbrutus 	if (cmd != NULL) {
6116707Sbrutus 		if (uioap->uioa_state & UIOA_POLL) {
6126707Sbrutus 			/* Wait for last dcopy() to finish */
6136707Sbrutus 			int64_t poll = 1;
6146707Sbrutus 			int poll_flag = DCOPY_POLL_NOFLAGS;
6156707Sbrutus 
6166707Sbrutus 			do {
6176707Sbrutus 				if (uioa_maxpoll == 0 ||
6186707Sbrutus 				    (uioa_maxpoll > 0 &&
6196707Sbrutus 				    poll >= uioa_maxpoll)) {
6206707Sbrutus 					/* Always block or after maxpoll */
6216707Sbrutus 					poll_flag = DCOPY_POLL_BLOCK;
6226707Sbrutus 				} else {
6236707Sbrutus 					/* No block, poll */
6246707Sbrutus 					poll++;
6256707Sbrutus 				}
6266707Sbrutus 				ret = dcopy_cmd_poll(cmd, poll_flag);
6276707Sbrutus 			} while (ret == DCOPY_PENDING);
6286707Sbrutus 
6296707Sbrutus 			if (ret == DCOPY_COMPLETED) {
6306707Sbrutus 				/* Poll/block succeeded */
6316707Sbrutus 				ret = 0;
6326707Sbrutus 			} else {
6336707Sbrutus 				/* Poll/block failed */
6346707Sbrutus 				ret = EIO;
6356707Sbrutus 			}
6366707Sbrutus 		}
6376707Sbrutus 		dcopy_cmd_free(&cmd);
6386707Sbrutus 	}
6396707Sbrutus 
6406707Sbrutus 	dcopy_free(&channel);
6416707Sbrutus 
6426707Sbrutus 	/* Unlock all page(s) iovec_t by iovec_t */
6436707Sbrutus 	while (iovcnt-- > 0) {
6446707Sbrutus 		page_t **pages;
6456707Sbrutus 
6466707Sbrutus 		if (locked->uioa_pfncnt == 0) {
6476707Sbrutus 			/* A as_pagelock() returned (page_t **) */
6486707Sbrutus 			pages = (page_t **)locked->uioa_ppp;
6496707Sbrutus 		} else {
6506707Sbrutus 			/* Our pfn_t array */
6516707Sbrutus 			pages = NULL;
6526707Sbrutus 			kmem_free(locked->uioa_ppp, locked->uioa_pfncnt *
6536707Sbrutus 			    sizeof (pfn_t *));
6546707Sbrutus 		}
6556707Sbrutus 		as_pageunlock(as, pages, locked->uioa_base, locked->uioa_len,
6566707Sbrutus 		    S_WRITE);
6576707Sbrutus 
6586707Sbrutus 		locked++;
6596707Sbrutus 	}
6606707Sbrutus 	/* uioa_t->uio_t common struct copy */
6616707Sbrutus 	*uiop = *((uio_t *)uioap);
6626707Sbrutus 
6636707Sbrutus 	/*
6646707Sbrutus 	 * Last, reset uioa state to alloc.
6656707Sbrutus 	 *
6666707Sbrutus 	 * Note, we only initialize the state here, all other members
6676707Sbrutus 	 * will be initialized in a subsequent uioainit().
6686707Sbrutus 	 */
6696707Sbrutus 	uioap->uioa_state = UIOA_ALLOC;
670*7660SEric.Yu@Sun.COM 	uioap->uioa_mbytes = 0;
6716707Sbrutus 
6726707Sbrutus 	uioap->uioa_hwst[UIO_DCOPY_CMD] = NULL;
6736707Sbrutus 	uioap->uioa_hwst[UIO_DCOPY_CHANNEL] = NULL;
6746707Sbrutus 
6756707Sbrutus 	return (ret);
6766707Sbrutus }
677