xref: /minix3/minix/lib/libminixfs/bio.c (revision 7c48de6cc4c6d56f2277d378dba01dbac8a8c3b9)
1ebd3c067SDavid van Moolenbroek /*
2ebd3c067SDavid van Moolenbroek  * This file provides an implementation for block I/O functions as expected by
3ebd3c067SDavid van Moolenbroek  * libfsdriver for root file systems.  In particular, the lmfs_driver function
4ebd3c067SDavid van Moolenbroek  * can be used to implement fdr_driver, the lmfs_bio function can be used to
5ebd3c067SDavid van Moolenbroek  * implement the fdr_bread, fdr_bwrite, and fdr_bpeek hooks, and the the
6ebd3c067SDavid van Moolenbroek  * lmfs_bflush function can be used to implement the fdr_bflush hook.  At the
7ebd3c067SDavid van Moolenbroek  * very least, a file system that makes use of the provided functionality
8ebd3c067SDavid van Moolenbroek  * must adhere to the following rules:
9ebd3c067SDavid van Moolenbroek  *
10ebd3c067SDavid van Moolenbroek  *   o  it must initialize this library in order to set up a buffer pool for
11ebd3c067SDavid van Moolenbroek  *      use by these functions, using the lmfs_buf_pool function; the
12ebd3c067SDavid van Moolenbroek  *      recommended number of blocks for *non*-disk-backed file systems is
134472b590SDavid van Moolenbroek  *      LMFS_MAX_PREFETCH buffers (disk-backed file systems typically use many
144472b590SDavid van Moolenbroek  *      more);
15ebd3c067SDavid van Moolenbroek  *   o  it must enable VM caching in order to support memory mapping of block
16ebd3c067SDavid van Moolenbroek  *      devices, using the lmfs_may_use_vmcache function;
17ebd3c067SDavid van Moolenbroek  *   o  it must either use lmfs_flushall as implementation for the fdr_sync
18ebd3c067SDavid van Moolenbroek  *      hook, or call lmfs_flushall as part of its own fdr_sync implementation.
19ebd3c067SDavid van Moolenbroek  *
20ebd3c067SDavid van Moolenbroek  * In addition, a disk-backed file system (as opposed to e.g. a networked file
21ebd3c067SDavid van Moolenbroek  * system that intends to be able to serve as a root file system) should
22ebd3c067SDavid van Moolenbroek  * consider the following points:
23ebd3c067SDavid van Moolenbroek  *
24ebd3c067SDavid van Moolenbroek  *   o  it may restrict calls to fdr_bwrite on the mounted partition, for
25ebd3c067SDavid van Moolenbroek  *      example to the partition's first 1024 bytes; it should generally not
26ebd3c067SDavid van Moolenbroek  *      prevent that area from being written even if the file system is mounted
27ebd3c067SDavid van Moolenbroek  *      read-only;
28ebd3c067SDavid van Moolenbroek  *   o  it is free to set its own block size, although the default block size
29ebd3c067SDavid van Moolenbroek  *      works fine for raw block I/O as well.
30ebd3c067SDavid van Moolenbroek  */
31ebd3c067SDavid van Moolenbroek 
32ebd3c067SDavid van Moolenbroek #include <minix/drivers.h>
33ebd3c067SDavid van Moolenbroek #include <minix/libminixfs.h>
34ebd3c067SDavid van Moolenbroek #include <minix/fsdriver.h>
35ebd3c067SDavid van Moolenbroek #include <minix/bdev.h>
366c46a77dSDavid van Moolenbroek #include <minix/partition.h>
376c46a77dSDavid van Moolenbroek #include <sys/ioctl.h>
38ebd3c067SDavid van Moolenbroek #include <assert.h>
39ebd3c067SDavid van Moolenbroek 
406c46a77dSDavid van Moolenbroek #include "inc.h"
416c46a77dSDavid van Moolenbroek 
42ebd3c067SDavid van Moolenbroek /*
43ebd3c067SDavid van Moolenbroek  * Set the driver label of the device identified by 'dev' to 'label'.  While
44ebd3c067SDavid van Moolenbroek  * 'dev' is a full device number, only its major device number is to be used.
45ebd3c067SDavid van Moolenbroek  * This is a very thin wrapper right now, but eventually we will want to hide
46ebd3c067SDavid van Moolenbroek  * all of libbdev from file systems that use this library, so it is a start.
47ebd3c067SDavid van Moolenbroek  */
48ebd3c067SDavid van Moolenbroek void
lmfs_driver(dev_t dev,char * label)49ebd3c067SDavid van Moolenbroek lmfs_driver(dev_t dev, char *label)
50ebd3c067SDavid van Moolenbroek {
51ebd3c067SDavid van Moolenbroek 
52ebd3c067SDavid van Moolenbroek 	bdev_driver(dev, label);
53ebd3c067SDavid van Moolenbroek }
54ebd3c067SDavid van Moolenbroek 
55ebd3c067SDavid van Moolenbroek /*
56ebd3c067SDavid van Moolenbroek  * Prefetch up to "nblocks" blocks on "dev" starting from block number "block".
576c46a77dSDavid van Moolenbroek  * The size to be used for the last block in the range is given as "last_size".
58ebd3c067SDavid van Moolenbroek  * Stop early when either the I/O request fills up or when a block is already
59ebd3c067SDavid van Moolenbroek  * found to be in the cache.  The latter is likely to happen often, since this
60ebd3c067SDavid van Moolenbroek  * function is called before getting each block for reading.  Prefetching is a
61ebd3c067SDavid van Moolenbroek  * strictly best-effort operation, and may fail silently.
62ebd3c067SDavid van Moolenbroek  * TODO: limit according to the number of available buffers.
63ebd3c067SDavid van Moolenbroek  */
64ebd3c067SDavid van Moolenbroek static void
block_prefetch(dev_t dev,block64_t block,unsigned int nblocks,size_t block_size,size_t last_size)656c46a77dSDavid van Moolenbroek block_prefetch(dev_t dev, block64_t block, unsigned int nblocks,
666c46a77dSDavid van Moolenbroek 	size_t block_size, size_t last_size)
67ebd3c067SDavid van Moolenbroek {
684472b590SDavid van Moolenbroek 	struct buf *bp;
694472b590SDavid van Moolenbroek 	unsigned int count, limit;
706c46a77dSDavid van Moolenbroek 	int r;
716c46a77dSDavid van Moolenbroek 
724472b590SDavid van Moolenbroek 	limit = lmfs_readahead_limit();
734472b590SDavid van Moolenbroek 	assert(limit >= 1 && limit <= LMFS_MAX_PREFETCH);
744472b590SDavid van Moolenbroek 
754472b590SDavid van Moolenbroek 	if (nblocks > limit) {
764472b590SDavid van Moolenbroek 		nblocks = limit;
776c46a77dSDavid van Moolenbroek 
786c46a77dSDavid van Moolenbroek 		last_size = block_size;
796c46a77dSDavid van Moolenbroek 	}
80ebd3c067SDavid van Moolenbroek 
81ebd3c067SDavid van Moolenbroek 	for (count = 0; count < nblocks; count++) {
826c46a77dSDavid van Moolenbroek 		if (count == nblocks - 1 && last_size < block_size)
836c46a77dSDavid van Moolenbroek 			r = lmfs_get_partial_block(&bp, dev, block + count,
844472b590SDavid van Moolenbroek 			    PEEK, last_size);
856c46a77dSDavid van Moolenbroek 		else
864472b590SDavid van Moolenbroek 			r = lmfs_get_block(&bp, dev, block + count, PEEK);
876c46a77dSDavid van Moolenbroek 
884472b590SDavid van Moolenbroek 		if (r == OK) {
890314acfbSDavid van Moolenbroek 			lmfs_put_block(bp);
90ebd3c067SDavid van Moolenbroek 
914472b590SDavid van Moolenbroek 			last_size = block_size;
924472b590SDavid van Moolenbroek 
93ebd3c067SDavid van Moolenbroek 			break;
94ebd3c067SDavid van Moolenbroek 		}
95ebd3c067SDavid van Moolenbroek 	}
96ebd3c067SDavid van Moolenbroek 
97ebd3c067SDavid van Moolenbroek 	if (count > 0)
984472b590SDavid van Moolenbroek 		lmfs_readahead(dev, block, count, last_size);
99ebd3c067SDavid van Moolenbroek }
100ebd3c067SDavid van Moolenbroek 
101ebd3c067SDavid van Moolenbroek /*
102ebd3c067SDavid van Moolenbroek  * Perform block I/O, on "dev", starting from offset "pos", for a total of
103ebd3c067SDavid van Moolenbroek  * "bytes" bytes.  Reading, writing, and peeking are highly similar, and thus,
104ebd3c067SDavid van Moolenbroek  * this function implements all of them.  The "call" parameter indicates the
105ebd3c067SDavid van Moolenbroek  * call type (one of FSC_READ, FSC_WRITE, FSC_PEEK).  For read and write calls,
106ebd3c067SDavid van Moolenbroek  * "data" will identify the user buffer to use; for peek calls, "data" is set
107ebd3c067SDavid van Moolenbroek  * to NULL.  In all cases, this function returns the number of bytes
108ebd3c067SDavid van Moolenbroek  * successfully transferred, 0 on end-of-file conditions, and a negative error
109ebd3c067SDavid van Moolenbroek  * code if no bytes could be transferred due to an error.  Dirty data is not
110ebd3c067SDavid van Moolenbroek  * flushed immediately, and thus, a successful write only indicates that the
111ebd3c067SDavid van Moolenbroek  * data have been taken in by the cache (for immediate I/O, a character device
112ebd3c067SDavid van Moolenbroek  * would have to be used, but MINIX3 no longer supports this), which may be
1136c46a77dSDavid van Moolenbroek  * follwed later by silent failures.  End-of-file conditions are always
1146c46a77dSDavid van Moolenbroek  * reported immediately, though.
115ebd3c067SDavid van Moolenbroek  */
116ebd3c067SDavid van Moolenbroek ssize_t
lmfs_bio(dev_t dev,struct fsdriver_data * data,size_t bytes,off_t pos,int call)117ebd3c067SDavid van Moolenbroek lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos,
118ebd3c067SDavid van Moolenbroek 	int call)
119ebd3c067SDavid van Moolenbroek {
120b65ad59eSDavid van Moolenbroek 	block64_t block;
1216c46a77dSDavid van Moolenbroek 	struct part_geom part;
1226c46a77dSDavid van Moolenbroek 	size_t block_size, off, block_off, last_size, size, chunk;
123b65ad59eSDavid van Moolenbroek 	unsigned int blocks_left;
124ebd3c067SDavid van Moolenbroek 	struct buf *bp;
125*7c48de6cSDavid van Moolenbroek 	int r, do_write, how;
126ebd3c067SDavid van Moolenbroek 
127ebd3c067SDavid van Moolenbroek 	if (dev == NO_DEV)
128ebd3c067SDavid van Moolenbroek 		return EINVAL;
129ebd3c067SDavid van Moolenbroek 
130ebd3c067SDavid van Moolenbroek 	block_size = lmfs_fs_block_size();
131*7c48de6cSDavid van Moolenbroek 	do_write = (call == FSC_WRITE);
132ebd3c067SDavid van Moolenbroek 
133ebd3c067SDavid van Moolenbroek 	assert(block_size > 0);
134ebd3c067SDavid van Moolenbroek 
135b65ad59eSDavid van Moolenbroek 	if (bytes == 0)
136b65ad59eSDavid van Moolenbroek 		return 0; /* just in case */
137b65ad59eSDavid van Moolenbroek 
138b65ad59eSDavid van Moolenbroek 	if (pos < 0 || bytes > SSIZE_MAX || pos > INT64_MAX - bytes + 1)
139ebd3c067SDavid van Moolenbroek 		return EINVAL;
140ebd3c067SDavid van Moolenbroek 
1416c46a77dSDavid van Moolenbroek 	/*
1426c46a77dSDavid van Moolenbroek 	 * Get the partition size, so that we can handle EOF ourselves.
1436c46a77dSDavid van Moolenbroek 	 * Unfortunately, we cannot cache the results between calls, since we
1446c46a77dSDavid van Moolenbroek 	 * do not get to see DIOCSETP ioctls--see also repartition(8).
1456c46a77dSDavid van Moolenbroek 	 */
1466c46a77dSDavid van Moolenbroek 	if ((r = bdev_ioctl(dev, DIOCGETP, &part, NONE /*user_endpt*/)) != OK)
1476c46a77dSDavid van Moolenbroek 		return r;
1486c46a77dSDavid van Moolenbroek 
1496c46a77dSDavid van Moolenbroek 	if ((uint64_t)pos >= part.size)
1506c46a77dSDavid van Moolenbroek 		return 0; /* EOF */
1516c46a77dSDavid van Moolenbroek 
1526c46a77dSDavid van Moolenbroek 	if ((uint64_t)pos > part.size - bytes)
1536c46a77dSDavid van Moolenbroek 		bytes = part.size - pos;
1546c46a77dSDavid van Moolenbroek 
155ebd3c067SDavid van Moolenbroek 	off = 0;
156ebd3c067SDavid van Moolenbroek 	block = pos / block_size;
157ebd3c067SDavid van Moolenbroek 	block_off = (size_t)(pos % block_size);
158ebd3c067SDavid van Moolenbroek 	blocks_left = howmany(block_off + bytes, block_size);
159ebd3c067SDavid van Moolenbroek 
1606c46a77dSDavid van Moolenbroek 	assert(blocks_left > 0);
1616c46a77dSDavid van Moolenbroek 
1626c46a77dSDavid van Moolenbroek 	/*
1636c46a77dSDavid van Moolenbroek 	 * If the last block we need is also the last block of the device,
1646c46a77dSDavid van Moolenbroek 	 * see how many bytes we should actually transfer for that block.
1656c46a77dSDavid van Moolenbroek 	 */
1666c46a77dSDavid van Moolenbroek 	if (block + blocks_left - 1 == part.size / block_size)
1676c46a77dSDavid van Moolenbroek 		last_size = part.size % block_size;
1686c46a77dSDavid van Moolenbroek 	else
1696c46a77dSDavid van Moolenbroek 		last_size = block_size;
1706c46a77dSDavid van Moolenbroek 
171ebd3c067SDavid van Moolenbroek 	r = OK;
172ebd3c067SDavid van Moolenbroek 
1736c46a77dSDavid van Moolenbroek 	for (off = 0; off < bytes && blocks_left > 0; off += chunk) {
1746c46a77dSDavid van Moolenbroek 		size = (blocks_left == 1) ? last_size : block_size;
1756c46a77dSDavid van Moolenbroek 
1766c46a77dSDavid van Moolenbroek 		chunk = size - block_off;
177ebd3c067SDavid van Moolenbroek 		if (chunk > bytes - off)
178ebd3c067SDavid van Moolenbroek 			chunk = bytes - off;
179ebd3c067SDavid van Moolenbroek 
1806c46a77dSDavid van Moolenbroek 		assert(chunk > 0 && chunk <= size);
1816c46a77dSDavid van Moolenbroek 
182ebd3c067SDavid van Moolenbroek 		/*
183ebd3c067SDavid van Moolenbroek 		 * For read requests, help the block driver form larger I/O
184ebd3c067SDavid van Moolenbroek 		 * requests.
185ebd3c067SDavid van Moolenbroek 		 */
186*7c48de6cSDavid van Moolenbroek 		if (!do_write)
1876c46a77dSDavid van Moolenbroek 			block_prefetch(dev, block, blocks_left, block_size,
1886c46a77dSDavid van Moolenbroek 			    last_size);
189ebd3c067SDavid van Moolenbroek 
190ebd3c067SDavid van Moolenbroek 		/*
191ebd3c067SDavid van Moolenbroek 		 * Do not read the block from disk if we will end up
192ebd3c067SDavid van Moolenbroek 		 * overwriting all of its contents.
193ebd3c067SDavid van Moolenbroek 		 */
194*7c48de6cSDavid van Moolenbroek 		how = (do_write && chunk == size) ? NO_READ : NORMAL;
195ebd3c067SDavid van Moolenbroek 
1966c46a77dSDavid van Moolenbroek 		if (size < block_size)
1976c46a77dSDavid van Moolenbroek 			r = lmfs_get_partial_block(&bp, dev, block, how, size);
1986c46a77dSDavid van Moolenbroek 		else
1996c46a77dSDavid van Moolenbroek 			r = lmfs_get_block(&bp, dev, block, how);
200ebd3c067SDavid van Moolenbroek 
2016c46a77dSDavid van Moolenbroek 		if (r != OK) {
2026c46a77dSDavid van Moolenbroek 			printf("libminixfs: error getting block <%"PRIx64","
2036c46a77dSDavid van Moolenbroek 			    "%"PRIu64"> for device I/O (%d)\n", dev, block, r);
204ebd3c067SDavid van Moolenbroek 
2056c46a77dSDavid van Moolenbroek 			break;
2066c46a77dSDavid van Moolenbroek 		}
2076c46a77dSDavid van Moolenbroek 
2086c46a77dSDavid van Moolenbroek 		/* Perform the actual copy. */
209ebd3c067SDavid van Moolenbroek 		if (r == OK && data != NULL) {
210*7c48de6cSDavid van Moolenbroek 			if (do_write) {
211ebd3c067SDavid van Moolenbroek 				r = fsdriver_copyin(data, off,
212ebd3c067SDavid van Moolenbroek 				    (char *)bp->data + block_off, chunk);
213ebd3c067SDavid van Moolenbroek 
214ebd3c067SDavid van Moolenbroek 				/*
215ebd3c067SDavid van Moolenbroek 				 * Mark the block as dirty even if the copy
216ebd3c067SDavid van Moolenbroek 				 * failed, since the copy may in fact have
217ebd3c067SDavid van Moolenbroek 				 * succeeded partially.  This is an interface
218ebd3c067SDavid van Moolenbroek 				 * issue that should be resolved at some point,
219ebd3c067SDavid van Moolenbroek 				 * but for now we do not want the cache to be
220ebd3c067SDavid van Moolenbroek 				 * desynchronized from the disk contents.
221ebd3c067SDavid van Moolenbroek 				 */
222ebd3c067SDavid van Moolenbroek 				lmfs_markdirty(bp);
223ebd3c067SDavid van Moolenbroek 			} else
224ebd3c067SDavid van Moolenbroek 				r = fsdriver_copyout(data, off,
225ebd3c067SDavid van Moolenbroek 				    (char *)bp->data + block_off, chunk);
226ebd3c067SDavid van Moolenbroek 		}
227ebd3c067SDavid van Moolenbroek 
2280314acfbSDavid van Moolenbroek 		lmfs_put_block(bp);
229ebd3c067SDavid van Moolenbroek 
230ebd3c067SDavid van Moolenbroek 		if (r != OK)
231ebd3c067SDavid van Moolenbroek 			break;
232ebd3c067SDavid van Moolenbroek 
233ebd3c067SDavid van Moolenbroek 		block++;
234ebd3c067SDavid van Moolenbroek 		block_off = 0;
235ebd3c067SDavid van Moolenbroek 		blocks_left--;
236ebd3c067SDavid van Moolenbroek 	}
237ebd3c067SDavid van Moolenbroek 
238ebd3c067SDavid van Moolenbroek 	/*
2396c46a77dSDavid van Moolenbroek 	 * If we were not able to do any I/O, return the error.  Otherwise,
2406c46a77dSDavid van Moolenbroek 	 * return how many bytes we did manage to transfer.
241ebd3c067SDavid van Moolenbroek 	 */
242ebd3c067SDavid van Moolenbroek 	if (r != OK && off == 0)
2436c46a77dSDavid van Moolenbroek 		return r;
244ebd3c067SDavid van Moolenbroek 
245ebd3c067SDavid van Moolenbroek 	return off;
246ebd3c067SDavid van Moolenbroek }
247ebd3c067SDavid van Moolenbroek 
248ebd3c067SDavid van Moolenbroek /*
249ebd3c067SDavid van Moolenbroek  * Perform a flush request on a block device, flushing and invalidating all
250ebd3c067SDavid van Moolenbroek  * blocks associated with this device, both in the local cache and in VM.
251ebd3c067SDavid van Moolenbroek  * This operation is called after a block device is closed and must prevent
252ebd3c067SDavid van Moolenbroek  * that stale copies of blocks remain in any cache.
253ebd3c067SDavid van Moolenbroek  */
254ebd3c067SDavid van Moolenbroek void
lmfs_bflush(dev_t dev)255ebd3c067SDavid van Moolenbroek lmfs_bflush(dev_t dev)
256ebd3c067SDavid van Moolenbroek {
257ebd3c067SDavid van Moolenbroek 
258ebd3c067SDavid van Moolenbroek 	/* First flush any dirty blocks on this device to disk. */
259ebd3c067SDavid van Moolenbroek 	lmfs_flushdev(dev);
260ebd3c067SDavid van Moolenbroek 
261ebd3c067SDavid van Moolenbroek 	/* Then purge any blocks associated with the device. */
262ebd3c067SDavid van Moolenbroek 	lmfs_invalidate(dev);
263ebd3c067SDavid van Moolenbroek }
264