xref: /netbsd-src/sys/rump/librump/rumpvfs/rumpblk.c (revision 8bc54e5be648e06e7c6b48f7611f8bccfda032d4)
1*8bc54e5bSmsaitoh /*	$NetBSD: rumpblk.c,v 1.64 2016/07/07 06:55:44 msaitoh Exp $	*/
207334a46Spooka 
307334a46Spooka /*
407334a46Spooka  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
507334a46Spooka  *
607334a46Spooka  * Development of this software was supported by the
707334a46Spooka  * Finnish Cultural Foundation.
807334a46Spooka  *
907334a46Spooka  * Redistribution and use in source and binary forms, with or without
1007334a46Spooka  * modification, are permitted provided that the following conditions
1107334a46Spooka  * are met:
1207334a46Spooka  * 1. Redistributions of source code must retain the above copyright
1307334a46Spooka  *    notice, this list of conditions and the following disclaimer.
1407334a46Spooka  * 2. Redistributions in binary form must reproduce the above copyright
1507334a46Spooka  *    notice, this list of conditions and the following disclaimer in the
1607334a46Spooka  *    documentation and/or other materials provided with the distribution.
1707334a46Spooka  *
1807334a46Spooka  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
1907334a46Spooka  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
2007334a46Spooka  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
2107334a46Spooka  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2207334a46Spooka  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2307334a46Spooka  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2407334a46Spooka  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2507334a46Spooka  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2607334a46Spooka  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2707334a46Spooka  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2807334a46Spooka  * SUCH DAMAGE.
2907334a46Spooka  */
3007334a46Spooka 
3107334a46Spooka /*
3207334a46Spooka  * Block device emulation.  Presents a block device interface and
3307334a46Spooka  * uses rumpuser system calls to satisfy I/O requests.
343099e2e6Spooka  *
353099e2e6Spooka  * We provide fault injection.  The driver can be made to fail
363099e2e6Spooka  * I/O occasionally.
3707334a46Spooka  */
3807334a46Spooka 
3907334a46Spooka #include <sys/cdefs.h>
40*8bc54e5bSmsaitoh __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.64 2016/07/07 06:55:44 msaitoh Exp $");
4107334a46Spooka 
4207334a46Spooka #include <sys/param.h>
4307334a46Spooka #include <sys/buf.h>
4407334a46Spooka #include <sys/conf.h>
453099e2e6Spooka #include <sys/condvar.h>
4607334a46Spooka #include <sys/disklabel.h>
4715db645aSpooka #include <sys/evcnt.h>
4807334a46Spooka #include <sys/fcntl.h>
4907334a46Spooka #include <sys/kmem.h>
5007334a46Spooka #include <sys/malloc.h>
513099e2e6Spooka #include <sys/queue.h>
5207334a46Spooka #include <sys/stat.h>
533afd44cfStls #include <sys/cprng.h>
5407334a46Spooka 
556bb51422Spooka #include <rump-sys/kern.h>
566bb51422Spooka #include <rump-sys/vfs.h>
5707334a46Spooka 
586bb51422Spooka #include <rump/rumpuser.h>
5907334a46Spooka 
603099e2e6Spooka #if 0
613099e2e6Spooka #define DPRINTF(x) printf x
623099e2e6Spooka #else
633099e2e6Spooka #define DPRINTF(x)
643099e2e6Spooka #endif
653099e2e6Spooka 
6607334a46Spooka #define RUMPBLK_SIZE 16
6707334a46Spooka static struct rblkdev {
6807334a46Spooka 	char *rblk_path;
6907334a46Spooka 	int rblk_fd;
701e892b4aSpooka 	int rblk_mode;
71fa838a2eSpooka 
72a373547bSpooka 	uint64_t rblk_size;
7361d1563cSpooka 	uint64_t rblk_hostoffset;
749cb9168cSpooka 	uint64_t rblk_hostsize;
7561d1563cSpooka 	int rblk_ftype;
763099e2e6Spooka 
7702a11f09Spooka 	struct disklabel rblk_label;
7807334a46Spooka } minors[RUMPBLK_SIZE];
7907334a46Spooka 
80e5d3ecb4Spooka static struct evcnt ev_io_total;
81e5d3ecb4Spooka static struct evcnt ev_io_async;
82e5d3ecb4Spooka 
83e5d3ecb4Spooka static struct evcnt ev_bwrite_total;
84e5d3ecb4Spooka static struct evcnt ev_bwrite_async;
85e5d3ecb4Spooka static struct evcnt ev_bread_total;
8615db645aSpooka 
8707334a46Spooka dev_type_open(rumpblk_open);
8807334a46Spooka dev_type_close(rumpblk_close);
8907334a46Spooka dev_type_read(rumpblk_read);
9007334a46Spooka dev_type_write(rumpblk_write);
9107334a46Spooka dev_type_ioctl(rumpblk_ioctl);
9207334a46Spooka dev_type_strategy(rumpblk_strategy);
93fa73a97fSpooka dev_type_strategy(rumpblk_strategy_fail);
9407334a46Spooka dev_type_dump(rumpblk_dump);
9507334a46Spooka dev_type_size(rumpblk_size);
9607334a46Spooka 
9707334a46Spooka static const struct bdevsw rumpblk_bdevsw = {
98a68f9396Sdholland 	.d_open = rumpblk_open,
99a68f9396Sdholland 	.d_close = rumpblk_close,
100a68f9396Sdholland 	.d_strategy = rumpblk_strategy,
101a68f9396Sdholland 	.d_ioctl = rumpblk_ioctl,
102a68f9396Sdholland 	.d_dump = nodump,
103a68f9396Sdholland 	.d_psize = nosize,
1048c70ef39Sdholland 	.d_discard = nodiscard,
105a68f9396Sdholland 	.d_flag = D_DISK
10607334a46Spooka };
10707334a46Spooka 
108fa73a97fSpooka static const struct bdevsw rumpblk_bdevsw_fail = {
109a68f9396Sdholland 	.d_open = rumpblk_open,
110a68f9396Sdholland 	.d_close = rumpblk_close,
111a68f9396Sdholland 	.d_strategy = rumpblk_strategy_fail,
112a68f9396Sdholland 	.d_ioctl = rumpblk_ioctl,
113a68f9396Sdholland 	.d_dump = nodump,
114a68f9396Sdholland 	.d_psize = nosize,
1158c70ef39Sdholland 	.d_discard = nodiscard,
116a68f9396Sdholland 	.d_flag = D_DISK
117fa73a97fSpooka };
118fa73a97fSpooka 
11907334a46Spooka static const struct cdevsw rumpblk_cdevsw = {
120a68f9396Sdholland 	.d_open = rumpblk_open,
121a68f9396Sdholland 	.d_close = rumpblk_close,
122a68f9396Sdholland 	.d_read = rumpblk_read,
123a68f9396Sdholland 	.d_write = rumpblk_write,
124a68f9396Sdholland 	.d_ioctl = rumpblk_ioctl,
125a68f9396Sdholland 	.d_stop = nostop,
126a68f9396Sdholland 	.d_tty = notty,
127a68f9396Sdholland 	.d_poll = nopoll,
128a68f9396Sdholland 	.d_mmap = nommap,
129a68f9396Sdholland 	.d_kqfilter = nokqfilter,
130f9228f42Sdholland 	.d_discard = nodiscard,
131a68f9396Sdholland 	.d_flag = D_DISK
13207334a46Spooka };
13307334a46Spooka 
1341e892b4aSpooka static int backend_open(struct rblkdev *, const char *);
1351e892b4aSpooka static int backend_close(struct rblkdev *);
1361e892b4aSpooka 
137fa73a97fSpooka /* fail every n out of BLKFAIL_MAX */
138fa73a97fSpooka #define BLKFAIL_MAX 10000
139fa73a97fSpooka static int blkfail;
140fa73a97fSpooka static unsigned randstate;
141eddab4e9Spooka static kmutex_t rumpblk_lock;
1421fe7a9e1Spooka static int sectshift = DEV_BSHIFT;
14307334a46Spooka 
14402a11f09Spooka static void
makedefaultlabel(struct disklabel * lp,off_t size,int part)14502a11f09Spooka makedefaultlabel(struct disklabel *lp, off_t size, int part)
14602a11f09Spooka {
14702a11f09Spooka 	int i;
14802a11f09Spooka 
14902a11f09Spooka 	memset(lp, 0, sizeof(*lp));
15002a11f09Spooka 
15102a11f09Spooka 	lp->d_secperunit = size;
1521fe7a9e1Spooka 	lp->d_secsize = 1 << sectshift;
1531fe7a9e1Spooka 	lp->d_nsectors = size >> sectshift;
15402a11f09Spooka 	lp->d_ntracks = 1;
15502a11f09Spooka 	lp->d_ncylinders = 1;
15602a11f09Spooka 	lp->d_secpercyl = lp->d_nsectors;
15702a11f09Spooka 
15802a11f09Spooka 	/* oh dear oh dear */
15902a11f09Spooka 	strncpy(lp->d_typename, "rumpd", sizeof(lp->d_typename));
16002a11f09Spooka 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
16102a11f09Spooka 
162c182898bSchristos 	lp->d_type = DKTYPE_RUMPD;
16302a11f09Spooka 	lp->d_rpm = 11;
16402a11f09Spooka 	lp->d_interleave = 1;
16502a11f09Spooka 	lp->d_flags = 0;
16602a11f09Spooka 
16702a11f09Spooka 	/* XXX: RAW_PART handling? */
16802a11f09Spooka 	for (i = 0; i < part; i++) {
16902a11f09Spooka 		lp->d_partitions[i].p_fstype = FS_UNUSED;
17002a11f09Spooka 	}
1711fe7a9e1Spooka 	lp->d_partitions[part].p_size = size >> sectshift;
17202a11f09Spooka 	lp->d_npartitions = part+1;
17302a11f09Spooka 	/* XXX: file system type? */
17402a11f09Spooka 
17502a11f09Spooka 	lp->d_magic = DISKMAGIC;
17602a11f09Spooka 	lp->d_magic2 = DISKMAGIC;
17702a11f09Spooka 	lp->d_checksum = 0; /* XXX */
17802a11f09Spooka }
17902a11f09Spooka 
18007334a46Spooka int
rumpblk_init(void)181df7f595eScegger rumpblk_init(void)
18207334a46Spooka {
183fa73a97fSpooka 	char buf[64];
184f9ae378aSpooka 	devmajor_t rumpblkmaj = RUMPBLK_DEVMAJOR;
185dde8f9baSpooka 	unsigned tmp;
186c36d22e2Spooka 	int i;
18707334a46Spooka 
188eddab4e9Spooka 	mutex_init(&rumpblk_lock, MUTEX_DEFAULT, IPL_NONE);
189eddab4e9Spooka 
190810cff61Spooka 	if (rumpuser_getparam("RUMP_BLKFAIL", buf, sizeof(buf)) == 0) {
191fa73a97fSpooka 		blkfail = strtoul(buf, NULL, 10);
192fa73a97fSpooka 		/* fail everything */
193fa73a97fSpooka 		if (blkfail > BLKFAIL_MAX)
194fa73a97fSpooka 			blkfail = BLKFAIL_MAX;
195810cff61Spooka 		if (rumpuser_getparam("RUMP_BLKFAIL_SEED",
196810cff61Spooka 		    buf, sizeof(buf)) == 0) {
197fa73a97fSpooka 			randstate = strtoul(buf, NULL, 10);
198fa73a97fSpooka 		} else {
1993afd44cfStls 			randstate = cprng_fast32();
200fa73a97fSpooka 		}
201580ab7a0Spooka 		printf("rumpblk: FAULT INJECTION ACTIVE! fail %d/%d. "
202580ab7a0Spooka 		    "seed %u\n", blkfail, BLKFAIL_MAX, randstate);
203fa73a97fSpooka 	} else {
204fa73a97fSpooka 		blkfail = 0;
205fa73a97fSpooka 	}
206fa73a97fSpooka 
207810cff61Spooka 	if (rumpuser_getparam("RUMP_BLKSECTSHIFT", buf, sizeof(buf)) == 0) {
2081fe7a9e1Spooka 		printf("rumpblk: ");
2091fe7a9e1Spooka 		tmp = strtoul(buf, NULL, 10);
2101fe7a9e1Spooka 		if (tmp >= DEV_BSHIFT)
2111fe7a9e1Spooka 			sectshift = tmp;
2121fe7a9e1Spooka 		else
2131fe7a9e1Spooka 			printf("RUMP_BLKSECTSHIFT must be least %d (now %d), ",
2141fe7a9e1Spooka 			   DEV_BSHIFT, tmp);
2151fe7a9e1Spooka 		printf("using %d for sector shift (size %d)\n",
2161fe7a9e1Spooka 		    sectshift, 1<<sectshift);
2171fe7a9e1Spooka 	}
218dde8f9baSpooka 
2193099e2e6Spooka 	memset(minors, 0, sizeof(minors));
2203099e2e6Spooka 	for (i = 0; i < RUMPBLK_SIZE; i++) {
221ff4f0c61Spooka 		minors[i].rblk_fd = -1;
2223099e2e6Spooka 	}
2233099e2e6Spooka 
224e5d3ecb4Spooka 	evcnt_attach_dynamic(&ev_io_total, EVCNT_TYPE_MISC, NULL,
225dd870513Spooka 	    "rumpblk", "I/O reqs");
226e5d3ecb4Spooka 	evcnt_attach_dynamic(&ev_io_async, EVCNT_TYPE_MISC, NULL,
227dd870513Spooka 	    "rumpblk", "async I/O");
228e5d3ecb4Spooka 
229e5d3ecb4Spooka 	evcnt_attach_dynamic(&ev_bread_total, EVCNT_TYPE_MISC, NULL,
230dd870513Spooka 	    "rumpblk", "bytes read");
231e5d3ecb4Spooka 	evcnt_attach_dynamic(&ev_bwrite_total, EVCNT_TYPE_MISC, NULL,
232dd870513Spooka 	    "rumpblk", "bytes written");
233e5d3ecb4Spooka 	evcnt_attach_dynamic(&ev_bwrite_async, EVCNT_TYPE_MISC, NULL,
234dd870513Spooka 	    "rumpblk", "bytes written async");
235e5d3ecb4Spooka 
236fa73a97fSpooka 	if (blkfail) {
237f9ae378aSpooka 		return devsw_attach("rumpblk",
238f9ae378aSpooka 		    &rumpblk_bdevsw_fail, &rumpblkmaj,
239f9ae378aSpooka 		    &rumpblk_cdevsw, &rumpblkmaj);
240fa73a97fSpooka 	} else {
241f9ae378aSpooka 		return devsw_attach("rumpblk",
242f9ae378aSpooka 		    &rumpblk_bdevsw, &rumpblkmaj,
243f9ae378aSpooka 		    &rumpblk_cdevsw, &rumpblkmaj);
24407334a46Spooka 	}
245fa73a97fSpooka }
24607334a46Spooka 
24707334a46Spooka int
rumpblk_register(const char * path,devminor_t * dmin,uint64_t offset,uint64_t size)24861d1563cSpooka rumpblk_register(const char *path, devminor_t *dmin,
24961d1563cSpooka 	uint64_t offset, uint64_t size)
25007334a46Spooka {
25161d1563cSpooka 	struct rblkdev *rblk;
252eddab4e9Spooka 	uint64_t flen;
25307334a46Spooka 	size_t len;
2541de15b17Spooka 	int ftype, error, i;
25507334a46Spooka 
25661d1563cSpooka 	/* devices might not report correct size unless they're open */
25732a34307Spooka 	if ((error = rumpuser_getfileinfo(path, &flen, &ftype)) != 0)
25861d1563cSpooka 		return error;
25961d1563cSpooka 
260eddab4e9Spooka 	/* verify host file is of supported type */
261eddab4e9Spooka 	if (!(ftype == RUMPUSER_FT_REG
262eddab4e9Spooka 	   || ftype == RUMPUSER_FT_BLK
263eddab4e9Spooka 	   || ftype == RUMPUSER_FT_CHR))
264eddab4e9Spooka 		return EINVAL;
265eddab4e9Spooka 
266eddab4e9Spooka 	mutex_enter(&rumpblk_lock);
267eddab4e9Spooka 	for (i = 0; i < RUMPBLK_SIZE; i++) {
268eddab4e9Spooka 		if (minors[i].rblk_path&&strcmp(minors[i].rblk_path, path)==0) {
269eddab4e9Spooka 			mutex_exit(&rumpblk_lock);
270eddab4e9Spooka 			*dmin = i;
271eddab4e9Spooka 			return 0;
272eddab4e9Spooka 		}
273eddab4e9Spooka 	}
27407334a46Spooka 
27507334a46Spooka 	for (i = 0; i < RUMPBLK_SIZE; i++)
27607334a46Spooka 		if (minors[i].rblk_path == NULL)
27707334a46Spooka 			break;
278eddab4e9Spooka 	if (i == RUMPBLK_SIZE) {
279eddab4e9Spooka 		mutex_exit(&rumpblk_lock);
280eddab4e9Spooka 		return EBUSY;
281eddab4e9Spooka 	}
28207334a46Spooka 
28361d1563cSpooka 	rblk = &minors[i];
2841e892b4aSpooka 	rblk->rblk_path = __UNCONST("taken");
2851e892b4aSpooka 	mutex_exit(&rumpblk_lock);
2861e892b4aSpooka 
28707334a46Spooka 	len = strlen(path);
28861d1563cSpooka 	rblk->rblk_path = malloc(len + 1, M_TEMP, M_WAITOK);
28961d1563cSpooka 	strcpy(rblk->rblk_path, path);
29061d1563cSpooka 	rblk->rblk_hostoffset = offset;
291c92b36b8Spooka 	if (size != RUMPBLK_SIZENOTSET) {
29261d1563cSpooka 		KASSERT(size + offset <= flen);
29361d1563cSpooka 		rblk->rblk_size = size;
29461d1563cSpooka 	} else {
29561d1563cSpooka 		KASSERT(offset < flen);
29661d1563cSpooka 		rblk->rblk_size = flen - offset;
29761d1563cSpooka 	}
2989cb9168cSpooka 	rblk->rblk_hostsize = flen;
29961d1563cSpooka 	rblk->rblk_ftype = ftype;
30002a11f09Spooka 	makedefaultlabel(&rblk->rblk_label, rblk->rblk_size, i);
3011e892b4aSpooka 
3021e892b4aSpooka 	if ((error = backend_open(rblk, path)) != 0) {
3031e892b4aSpooka 		memset(&rblk->rblk_label, 0, sizeof(rblk->rblk_label));
3041e892b4aSpooka 		free(rblk->rblk_path, M_TEMP);
3051e892b4aSpooka 		rblk->rblk_path = NULL;
3061e892b4aSpooka 		return error;
3071e892b4aSpooka 	}
308eddab4e9Spooka 
309eddab4e9Spooka 	*dmin = i;
310eddab4e9Spooka 	return 0;
31107334a46Spooka }
31207334a46Spooka 
31328e67240Spooka /*
31428e67240Spooka  * Unregister rumpblk.  It's the callers responsibility to make
31528e67240Spooka  * sure it's no longer in use.
31628e67240Spooka  */
31728e67240Spooka int
rumpblk_deregister(const char * path)31828e67240Spooka rumpblk_deregister(const char *path)
31928e67240Spooka {
32028e67240Spooka 	struct rblkdev *rblk;
32128e67240Spooka 	int i;
32228e67240Spooka 
32328e67240Spooka 	mutex_enter(&rumpblk_lock);
32428e67240Spooka 	for (i = 0; i < RUMPBLK_SIZE; i++) {
32528e67240Spooka 		if (minors[i].rblk_path&&strcmp(minors[i].rblk_path, path)==0) {
32628e67240Spooka 			break;
32728e67240Spooka 		}
32828e67240Spooka 	}
32928e67240Spooka 	mutex_exit(&rumpblk_lock);
33028e67240Spooka 
33128e67240Spooka 	if (i == RUMPBLK_SIZE)
33228e67240Spooka 		return ENOENT;
33328e67240Spooka 
33428e67240Spooka 	rblk = &minors[i];
3351e892b4aSpooka 	backend_close(rblk);
33628e67240Spooka 
33728e67240Spooka 	free(rblk->rblk_path, M_TEMP);
33828e67240Spooka 	memset(&rblk->rblk_label, 0, sizeof(rblk->rblk_label));
3391e892b4aSpooka 	rblk->rblk_path = NULL;
34028e67240Spooka 
34128e67240Spooka 	return 0;
34228e67240Spooka }
34328e67240Spooka 
3443bc1213aSpooka /*
3453bc1213aSpooka  * Release all backend resources, to be called only when the rump
3463bc1213aSpooka  * kernel is being shut down.
3473bc1213aSpooka  * This routine does not do a full "fini" since we're going down anyway.
3483bc1213aSpooka  */
3493bc1213aSpooka void
rumpblk_fini(void)3503bc1213aSpooka rumpblk_fini(void)
3513bc1213aSpooka {
3523bc1213aSpooka 	int i;
3533bc1213aSpooka 
3543bc1213aSpooka 	for (i = 0; i < RUMPBLK_SIZE; i++) {
3553bc1213aSpooka 		struct rblkdev *rblk;
3563bc1213aSpooka 
3573bc1213aSpooka 		rblk = &minors[i];
3583bc1213aSpooka 		if (rblk->rblk_fd != -1)
3593bc1213aSpooka 			backend_close(rblk);
3603bc1213aSpooka 	}
3613bc1213aSpooka }
3623bc1213aSpooka 
3631e892b4aSpooka static int
backend_open(struct rblkdev * rblk,const char * path)3641e892b4aSpooka backend_open(struct rblkdev *rblk, const char *path)
36507334a46Spooka {
36607334a46Spooka 	int error, fd;
36707334a46Spooka 
368ff4f0c61Spooka 	KASSERT(rblk->rblk_fd == -1);
36932a34307Spooka 	error = rumpuser_open(path,
37032a34307Spooka 	    RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_BIO, &fd);
3711e892b4aSpooka 	if (error) {
37232a34307Spooka 		error = rumpuser_open(path,
37332a34307Spooka 		    RUMPUSER_OPEN_RDONLY | RUMPUSER_OPEN_BIO, &fd);
37407334a46Spooka 		if (error)
37507334a46Spooka 			return error;
3761e892b4aSpooka 		rblk->rblk_mode = FREAD;
3771e892b4aSpooka 	} else {
3781e892b4aSpooka 		rblk->rblk_mode = FREAD|FWRITE;
3791e892b4aSpooka 	}
380e5d3ecb4Spooka 
3813099e2e6Spooka 	rblk->rblk_fd = fd;
3823099e2e6Spooka 	KASSERT(rblk->rblk_fd != -1);
38307334a46Spooka 	return 0;
38407334a46Spooka }
38507334a46Spooka 
3861e892b4aSpooka static int
backend_close(struct rblkdev * rblk)3871e892b4aSpooka backend_close(struct rblkdev *rblk)
38807334a46Spooka {
38907334a46Spooka 
39032a34307Spooka 	rumpuser_close(rblk->rblk_fd);
39107334a46Spooka 	rblk->rblk_fd = -1;
3921e892b4aSpooka 
3931e892b4aSpooka 	return 0;
3941e892b4aSpooka }
3951e892b4aSpooka 
3961e892b4aSpooka int
rumpblk_open(dev_t dev,int flag,int fmt,struct lwp * l)3971e892b4aSpooka rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l)
3981e892b4aSpooka {
3991e892b4aSpooka 	struct rblkdev *rblk = &minors[minor(dev)];
4001e892b4aSpooka 
4011e892b4aSpooka 	if (rblk->rblk_fd == -1)
4021e892b4aSpooka 		return ENXIO;
4031e892b4aSpooka 
4041e892b4aSpooka 	if (((flag & (FREAD|FWRITE)) & ~rblk->rblk_mode) != 0) {
4051e892b4aSpooka 		return EACCES;
4061e892b4aSpooka 	}
4071e892b4aSpooka 
4081e892b4aSpooka 	return 0;
4091e892b4aSpooka }
4101e892b4aSpooka 
4111e892b4aSpooka int
rumpblk_close(dev_t dev,int flag,int fmt,struct lwp * l)4121e892b4aSpooka rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l)
4131e892b4aSpooka {
41407334a46Spooka 
41507334a46Spooka 	return 0;
41607334a46Spooka }
41707334a46Spooka 
41807334a46Spooka int
rumpblk_ioctl(dev_t dev,u_long xfer,void * addr,int flag,struct lwp * l)41907334a46Spooka rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l)
42007334a46Spooka {
42102a11f09Spooka 	devminor_t dmin = minor(dev);
42202a11f09Spooka 	struct rblkdev *rblk = &minors[dmin];
42302a11f09Spooka 	struct partinfo *pi;
4247cbc092fSchristos 	struct partition *dp;
42502a11f09Spooka 	int error = 0;
42607334a46Spooka 
42702a11f09Spooka 	/* well, me should support a few more, but we don't for now */
42802a11f09Spooka 	switch (xfer) {
42902a11f09Spooka 	case DIOCGDINFO:
43002a11f09Spooka 		*(struct disklabel *)addr = rblk->rblk_label;
43102a11f09Spooka 		break;
43207334a46Spooka 
4338d10f962Schristos 	case DIOCGPARTINFO:
4348d10f962Schristos 		dp = &rblk->rblk_label.d_partitions[DISKPART(dmin)];
43502a11f09Spooka 		pi = addr;
4368d10f962Schristos 		pi->pi_offset = dp->p_offset;
4378d10f962Schristos 		pi->pi_size = dp->p_size;
4388d10f962Schristos 		pi->pi_secsize = rblk->rblk_label.d_secsize;
4398d10f962Schristos 		pi->pi_bsize = BLKDEV_IOSIZE;
4408d10f962Schristos 		pi->pi_fstype = dp->p_fstype;
4418d10f962Schristos 		pi->pi_fsize = dp->p_fsize;
4428d10f962Schristos 		pi->pi_frag = dp->p_frag;
4438d10f962Schristos 		pi->pi_cpg = dp->p_cpg;
44402a11f09Spooka 		break;
4454686e2c3Spooka 
4464686e2c3Spooka 	/* it's synced enough along the write path */
4474686e2c3Spooka 	case DIOCCACHESYNC:
4484686e2c3Spooka 		break;
4494686e2c3Spooka 
45081098434Spooka 	case DIOCGMEDIASIZE:
45181098434Spooka 		*(off_t *)addr = (off_t)rblk->rblk_size;
45281098434Spooka 		break;
45381098434Spooka 
45402a11f09Spooka 	default:
45502a11f09Spooka 		error = ENOTTY;
45602a11f09Spooka 		break;
45707334a46Spooka 	}
45807334a46Spooka 
45907334a46Spooka 	return error;
46007334a46Spooka }
46107334a46Spooka 
46243ccc52fSpooka static int
do_physio(dev_t dev,struct uio * uio,int which)46343ccc52fSpooka do_physio(dev_t dev, struct uio *uio, int which)
46443ccc52fSpooka {
46543ccc52fSpooka 	void (*strat)(struct buf *);
46643ccc52fSpooka 
46743ccc52fSpooka 	if (blkfail)
46843ccc52fSpooka 		strat = rumpblk_strategy_fail;
46943ccc52fSpooka 	else
47043ccc52fSpooka 		strat = rumpblk_strategy;
47143ccc52fSpooka 
47243ccc52fSpooka 	return physio(strat, NULL, dev, which, minphys, uio);
47343ccc52fSpooka }
47443ccc52fSpooka 
47507334a46Spooka int
rumpblk_read(dev_t dev,struct uio * uio,int flags)47607334a46Spooka rumpblk_read(dev_t dev, struct uio *uio, int flags)
47707334a46Spooka {
47807334a46Spooka 
47943ccc52fSpooka 	return do_physio(dev, uio, B_READ);
48007334a46Spooka }
48107334a46Spooka 
48207334a46Spooka int
rumpblk_write(dev_t dev,struct uio * uio,int flags)48307334a46Spooka rumpblk_write(dev_t dev, struct uio *uio, int flags)
48407334a46Spooka {
48507334a46Spooka 
48643ccc52fSpooka 	return do_physio(dev, uio, B_WRITE);
48707334a46Spooka }
48807334a46Spooka 
489fa73a97fSpooka static void
dostrategy(struct buf * bp)490fa73a97fSpooka dostrategy(struct buf *bp)
49107334a46Spooka {
49207334a46Spooka 	struct rblkdev *rblk = &minors[minor(bp->b_dev)];
49307334a46Spooka 	off_t off;
4948a514cd6Spooka 	int async = bp->b_flags & B_ASYNC;
495fa838a2eSpooka 	int op;
49607334a46Spooka 
4972f1c12f1Spooka 	if (bp->b_bcount % (1<<sectshift) != 0) {
4982f1c12f1Spooka 		rump_biodone(bp, 0, EINVAL);
4992f1c12f1Spooka 		return;
5002f1c12f1Spooka 	}
5012f1c12f1Spooka 
502e5d3ecb4Spooka 	/* collect statistics */
503e5d3ecb4Spooka 	ev_io_total.ev_count++;
504e5d3ecb4Spooka 	if (async)
505e5d3ecb4Spooka 		ev_io_async.ev_count++;
506e5d3ecb4Spooka 	if (BUF_ISWRITE(bp)) {
507e5d3ecb4Spooka 		ev_bwrite_total.ev_count += bp->b_bcount;
508e5d3ecb4Spooka 		if (async)
509e5d3ecb4Spooka 			ev_bwrite_async.ev_count += bp->b_bcount;
510e5d3ecb4Spooka 	} else {
511e5d3ecb4Spooka 		ev_bread_total.ev_count++;
512e5d3ecb4Spooka 	}
513e5d3ecb4Spooka 
5142f1c12f1Spooka 	/*
5152f1c12f1Spooka 	 * b_blkno is always in terms of DEV_BSIZE, and since we need
5162f1c12f1Spooka 	 * to translate to a byte offset for the host read, this
5172f1c12f1Spooka 	 * calculation does not need sectshift.
5182f1c12f1Spooka 	 */
5192f1c12f1Spooka 	off = bp->b_blkno << DEV_BSHIFT;
5202f1c12f1Spooka 
521b2ba089fSpooka 	/*
522b2ba089fSpooka 	 * Do bounds checking if we're working on a file.  Otherwise
523b2ba089fSpooka 	 * invalid file systems might attempt to read beyond EOF.  This
524b2ba089fSpooka 	 * is bad(tm) especially on mmapped images.  This is essentially
525b2ba089fSpooka 	 * the kernel bounds_check() routines.
526b2ba089fSpooka 	 */
52761d1563cSpooka 	if (off + bp->b_bcount > rblk->rblk_size) {
528b2ba089fSpooka 		int64_t sz = rblk->rblk_size - off;
529b2ba089fSpooka 
530b2ba089fSpooka 		/* EOF */
531b2ba089fSpooka 		if (sz == 0) {
532b2ba089fSpooka 			rump_biodone(bp, 0, 0);
533b2ba089fSpooka 			return;
534b2ba089fSpooka 		}
535b2ba089fSpooka 		/* beyond EOF ==> error */
536b2ba089fSpooka 		if (sz < 0) {
537b2ba089fSpooka 			rump_biodone(bp, 0, EINVAL);
538b2ba089fSpooka 			return;
539b2ba089fSpooka 		}
540b2ba089fSpooka 
541b2ba089fSpooka 		/* truncate to device size */
542b2ba089fSpooka 		bp->b_bcount = sz;
543b2ba089fSpooka 	}
544b2ba089fSpooka 
54589d98845Spooka 	off += rblk->rblk_hostoffset;
54607334a46Spooka 	DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64
547e5d3ecb4Spooka 	    " (0x%" PRIx64 " - 0x%" PRIx64 "), %ssync\n",
5483099e2e6Spooka 	    bp->b_bcount, BUF_ISREAD(bp) ? "READ" : "WRITE",
549e5d3ecb4Spooka 	    off, off, (off + bp->b_bcount), async ? "a" : ""));
55007334a46Spooka 
551262a3aafSpooka 	op = BUF_ISREAD(bp) ? RUMPUSER_BIO_READ : RUMPUSER_BIO_WRITE;
552e5d3ecb4Spooka 	if (BUF_ISWRITE(bp) && !async)
553262a3aafSpooka 		op |= RUMPUSER_BIO_SYNC;
554262a3aafSpooka 
555262a3aafSpooka 	rumpuser_bio(rblk->rblk_fd, op, bp->b_data, bp->b_bcount, off,
556262a3aafSpooka 	    rump_biodone, bp);
55707334a46Spooka }
558fa73a97fSpooka 
559fa73a97fSpooka void
rumpblk_strategy(struct buf * bp)560fa73a97fSpooka rumpblk_strategy(struct buf *bp)
561fa73a97fSpooka {
562fa73a97fSpooka 
563fa73a97fSpooka 	dostrategy(bp);
564fa73a97fSpooka }
565fa73a97fSpooka 
566fa73a97fSpooka /*
567d600a654Spooka  * Simple random number generator.  This is private so that we can
568d600a654Spooka  * very repeatedly control which blocks will fail.
569d600a654Spooka  *
570fa73a97fSpooka  * <mlelstv> pooka, rand()
571fa73a97fSpooka  * <mlelstv> [paste]
572fa73a97fSpooka  */
573fa73a97fSpooka static unsigned
gimmerand(void)574fa73a97fSpooka gimmerand(void)
575fa73a97fSpooka {
576fa73a97fSpooka 
577fa73a97fSpooka 	return (randstate = randstate * 1103515245 + 12345) % (0x80000000L);
578fa73a97fSpooka }
579fa73a97fSpooka 
580fa73a97fSpooka /*
581fa73a97fSpooka  * Block device with very simple fault injection.  Fails every
582fa73a97fSpooka  * n out of BLKFAIL_MAX I/O with EIO.  n is determined by the env
583fa73a97fSpooka  * variable RUMP_BLKFAIL.
584fa73a97fSpooka  */
585fa73a97fSpooka void
rumpblk_strategy_fail(struct buf * bp)586fa73a97fSpooka rumpblk_strategy_fail(struct buf *bp)
587fa73a97fSpooka {
588fa73a97fSpooka 
589fa73a97fSpooka 	if (gimmerand() % BLKFAIL_MAX >= blkfail) {
590fa73a97fSpooka 		dostrategy(bp);
591fa73a97fSpooka 	} else {
592fa73a97fSpooka 		printf("block fault injection: failing I/O on block %lld\n",
593fa73a97fSpooka 		    (long long)bp->b_blkno);
594fa73a97fSpooka 		bp->b_error = EIO;
595fa73a97fSpooka 		biodone(bp);
596fa73a97fSpooka 	}
597fa73a97fSpooka }
598