1ebd3c067SDavid van Moolenbroek /*
2ebd3c067SDavid van Moolenbroek * This file provides an implementation for block I/O functions as expected by
3ebd3c067SDavid van Moolenbroek * libfsdriver for root file systems. In particular, the lmfs_driver function
4ebd3c067SDavid van Moolenbroek * can be used to implement fdr_driver, the lmfs_bio function can be used to
5ebd3c067SDavid van Moolenbroek * implement the fdr_bread, fdr_bwrite, and fdr_bpeek hooks, and the the
6ebd3c067SDavid van Moolenbroek * lmfs_bflush function can be used to implement the fdr_bflush hook. At the
7ebd3c067SDavid van Moolenbroek * very least, a file system that makes use of the provided functionality
8ebd3c067SDavid van Moolenbroek * must adhere to the following rules:
9ebd3c067SDavid van Moolenbroek *
10ebd3c067SDavid van Moolenbroek * o it must initialize this library in order to set up a buffer pool for
11ebd3c067SDavid van Moolenbroek * use by these functions, using the lmfs_buf_pool function; the
12ebd3c067SDavid van Moolenbroek * recommended number of blocks for *non*-disk-backed file systems is
134472b590SDavid van Moolenbroek * LMFS_MAX_PREFETCH buffers (disk-backed file systems typically use many
144472b590SDavid van Moolenbroek * more);
15ebd3c067SDavid van Moolenbroek * o it must enable VM caching in order to support memory mapping of block
16ebd3c067SDavid van Moolenbroek * devices, using the lmfs_may_use_vmcache function;
17ebd3c067SDavid van Moolenbroek * o it must either use lmfs_flushall as implementation for the fdr_sync
18ebd3c067SDavid van Moolenbroek * hook, or call lmfs_flushall as part of its own fdr_sync implementation.
19ebd3c067SDavid van Moolenbroek *
20ebd3c067SDavid van Moolenbroek * In addition, a disk-backed file system (as opposed to e.g. a networked file
21ebd3c067SDavid van Moolenbroek * system that intends to be able to serve as a root file system) should
22ebd3c067SDavid van Moolenbroek * consider the following points:
23ebd3c067SDavid van Moolenbroek *
24ebd3c067SDavid van Moolenbroek * o it may restrict calls to fdr_bwrite on the mounted partition, for
25ebd3c067SDavid van Moolenbroek * example to the partition's first 1024 bytes; it should generally not
26ebd3c067SDavid van Moolenbroek * prevent that area from being written even if the file system is mounted
27ebd3c067SDavid van Moolenbroek * read-only;
28ebd3c067SDavid van Moolenbroek * o it is free to set its own block size, although the default block size
29ebd3c067SDavid van Moolenbroek * works fine for raw block I/O as well.
30ebd3c067SDavid van Moolenbroek */
31ebd3c067SDavid van Moolenbroek
32ebd3c067SDavid van Moolenbroek #include <minix/drivers.h>
33ebd3c067SDavid van Moolenbroek #include <minix/libminixfs.h>
34ebd3c067SDavid van Moolenbroek #include <minix/fsdriver.h>
35ebd3c067SDavid van Moolenbroek #include <minix/bdev.h>
366c46a77dSDavid van Moolenbroek #include <minix/partition.h>
376c46a77dSDavid van Moolenbroek #include <sys/ioctl.h>
38ebd3c067SDavid van Moolenbroek #include <assert.h>
39ebd3c067SDavid van Moolenbroek
406c46a77dSDavid van Moolenbroek #include "inc.h"
416c46a77dSDavid van Moolenbroek
42ebd3c067SDavid van Moolenbroek /*
43ebd3c067SDavid van Moolenbroek * Set the driver label of the device identified by 'dev' to 'label'. While
44ebd3c067SDavid van Moolenbroek * 'dev' is a full device number, only its major device number is to be used.
45ebd3c067SDavid van Moolenbroek * This is a very thin wrapper right now, but eventually we will want to hide
46ebd3c067SDavid van Moolenbroek * all of libbdev from file systems that use this library, so it is a start.
47ebd3c067SDavid van Moolenbroek */
48ebd3c067SDavid van Moolenbroek void
lmfs_driver(dev_t dev,char * label)49ebd3c067SDavid van Moolenbroek lmfs_driver(dev_t dev, char *label)
50ebd3c067SDavid van Moolenbroek {
51ebd3c067SDavid van Moolenbroek
52ebd3c067SDavid van Moolenbroek bdev_driver(dev, label);
53ebd3c067SDavid van Moolenbroek }
54ebd3c067SDavid van Moolenbroek
55ebd3c067SDavid van Moolenbroek /*
56ebd3c067SDavid van Moolenbroek * Prefetch up to "nblocks" blocks on "dev" starting from block number "block".
576c46a77dSDavid van Moolenbroek * The size to be used for the last block in the range is given as "last_size".
58ebd3c067SDavid van Moolenbroek * Stop early when either the I/O request fills up or when a block is already
59ebd3c067SDavid van Moolenbroek * found to be in the cache. The latter is likely to happen often, since this
60ebd3c067SDavid van Moolenbroek * function is called before getting each block for reading. Prefetching is a
61ebd3c067SDavid van Moolenbroek * strictly best-effort operation, and may fail silently.
62ebd3c067SDavid van Moolenbroek * TODO: limit according to the number of available buffers.
63ebd3c067SDavid van Moolenbroek */
64ebd3c067SDavid van Moolenbroek static void
block_prefetch(dev_t dev,block64_t block,unsigned int nblocks,size_t block_size,size_t last_size)656c46a77dSDavid van Moolenbroek block_prefetch(dev_t dev, block64_t block, unsigned int nblocks,
666c46a77dSDavid van Moolenbroek size_t block_size, size_t last_size)
67ebd3c067SDavid van Moolenbroek {
684472b590SDavid van Moolenbroek struct buf *bp;
694472b590SDavid van Moolenbroek unsigned int count, limit;
706c46a77dSDavid van Moolenbroek int r;
716c46a77dSDavid van Moolenbroek
724472b590SDavid van Moolenbroek limit = lmfs_readahead_limit();
734472b590SDavid van Moolenbroek assert(limit >= 1 && limit <= LMFS_MAX_PREFETCH);
744472b590SDavid van Moolenbroek
754472b590SDavid van Moolenbroek if (nblocks > limit) {
764472b590SDavid van Moolenbroek nblocks = limit;
776c46a77dSDavid van Moolenbroek
786c46a77dSDavid van Moolenbroek last_size = block_size;
796c46a77dSDavid van Moolenbroek }
80ebd3c067SDavid van Moolenbroek
81ebd3c067SDavid van Moolenbroek for (count = 0; count < nblocks; count++) {
826c46a77dSDavid van Moolenbroek if (count == nblocks - 1 && last_size < block_size)
836c46a77dSDavid van Moolenbroek r = lmfs_get_partial_block(&bp, dev, block + count,
844472b590SDavid van Moolenbroek PEEK, last_size);
856c46a77dSDavid van Moolenbroek else
864472b590SDavid van Moolenbroek r = lmfs_get_block(&bp, dev, block + count, PEEK);
876c46a77dSDavid van Moolenbroek
884472b590SDavid van Moolenbroek if (r == OK) {
890314acfbSDavid van Moolenbroek lmfs_put_block(bp);
90ebd3c067SDavid van Moolenbroek
914472b590SDavid van Moolenbroek last_size = block_size;
924472b590SDavid van Moolenbroek
93ebd3c067SDavid van Moolenbroek break;
94ebd3c067SDavid van Moolenbroek }
95ebd3c067SDavid van Moolenbroek }
96ebd3c067SDavid van Moolenbroek
97ebd3c067SDavid van Moolenbroek if (count > 0)
984472b590SDavid van Moolenbroek lmfs_readahead(dev, block, count, last_size);
99ebd3c067SDavid van Moolenbroek }
100ebd3c067SDavid van Moolenbroek
101ebd3c067SDavid van Moolenbroek /*
102ebd3c067SDavid van Moolenbroek * Perform block I/O, on "dev", starting from offset "pos", for a total of
103ebd3c067SDavid van Moolenbroek * "bytes" bytes. Reading, writing, and peeking are highly similar, and thus,
104ebd3c067SDavid van Moolenbroek * this function implements all of them. The "call" parameter indicates the
105ebd3c067SDavid van Moolenbroek * call type (one of FSC_READ, FSC_WRITE, FSC_PEEK). For read and write calls,
106ebd3c067SDavid van Moolenbroek * "data" will identify the user buffer to use; for peek calls, "data" is set
107ebd3c067SDavid van Moolenbroek * to NULL. In all cases, this function returns the number of bytes
108ebd3c067SDavid van Moolenbroek * successfully transferred, 0 on end-of-file conditions, and a negative error
109ebd3c067SDavid van Moolenbroek * code if no bytes could be transferred due to an error. Dirty data is not
110ebd3c067SDavid van Moolenbroek * flushed immediately, and thus, a successful write only indicates that the
111ebd3c067SDavid van Moolenbroek * data have been taken in by the cache (for immediate I/O, a character device
112ebd3c067SDavid van Moolenbroek * would have to be used, but MINIX3 no longer supports this), which may be
1136c46a77dSDavid van Moolenbroek * follwed later by silent failures. End-of-file conditions are always
1146c46a77dSDavid van Moolenbroek * reported immediately, though.
115ebd3c067SDavid van Moolenbroek */
116ebd3c067SDavid van Moolenbroek ssize_t
lmfs_bio(dev_t dev,struct fsdriver_data * data,size_t bytes,off_t pos,int call)117ebd3c067SDavid van Moolenbroek lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos,
118ebd3c067SDavid van Moolenbroek int call)
119ebd3c067SDavid van Moolenbroek {
120b65ad59eSDavid van Moolenbroek block64_t block;
1216c46a77dSDavid van Moolenbroek struct part_geom part;
1226c46a77dSDavid van Moolenbroek size_t block_size, off, block_off, last_size, size, chunk;
123b65ad59eSDavid van Moolenbroek unsigned int blocks_left;
124ebd3c067SDavid van Moolenbroek struct buf *bp;
125*7c48de6cSDavid van Moolenbroek int r, do_write, how;
126ebd3c067SDavid van Moolenbroek
127ebd3c067SDavid van Moolenbroek if (dev == NO_DEV)
128ebd3c067SDavid van Moolenbroek return EINVAL;
129ebd3c067SDavid van Moolenbroek
130ebd3c067SDavid van Moolenbroek block_size = lmfs_fs_block_size();
131*7c48de6cSDavid van Moolenbroek do_write = (call == FSC_WRITE);
132ebd3c067SDavid van Moolenbroek
133ebd3c067SDavid van Moolenbroek assert(block_size > 0);
134ebd3c067SDavid van Moolenbroek
135b65ad59eSDavid van Moolenbroek if (bytes == 0)
136b65ad59eSDavid van Moolenbroek return 0; /* just in case */
137b65ad59eSDavid van Moolenbroek
138b65ad59eSDavid van Moolenbroek if (pos < 0 || bytes > SSIZE_MAX || pos > INT64_MAX - bytes + 1)
139ebd3c067SDavid van Moolenbroek return EINVAL;
140ebd3c067SDavid van Moolenbroek
1416c46a77dSDavid van Moolenbroek /*
1426c46a77dSDavid van Moolenbroek * Get the partition size, so that we can handle EOF ourselves.
1436c46a77dSDavid van Moolenbroek * Unfortunately, we cannot cache the results between calls, since we
1446c46a77dSDavid van Moolenbroek * do not get to see DIOCSETP ioctls--see also repartition(8).
1456c46a77dSDavid van Moolenbroek */
1466c46a77dSDavid van Moolenbroek if ((r = bdev_ioctl(dev, DIOCGETP, &part, NONE /*user_endpt*/)) != OK)
1476c46a77dSDavid van Moolenbroek return r;
1486c46a77dSDavid van Moolenbroek
1496c46a77dSDavid van Moolenbroek if ((uint64_t)pos >= part.size)
1506c46a77dSDavid van Moolenbroek return 0; /* EOF */
1516c46a77dSDavid van Moolenbroek
1526c46a77dSDavid van Moolenbroek if ((uint64_t)pos > part.size - bytes)
1536c46a77dSDavid van Moolenbroek bytes = part.size - pos;
1546c46a77dSDavid van Moolenbroek
155ebd3c067SDavid van Moolenbroek off = 0;
156ebd3c067SDavid van Moolenbroek block = pos / block_size;
157ebd3c067SDavid van Moolenbroek block_off = (size_t)(pos % block_size);
158ebd3c067SDavid van Moolenbroek blocks_left = howmany(block_off + bytes, block_size);
159ebd3c067SDavid van Moolenbroek
1606c46a77dSDavid van Moolenbroek assert(blocks_left > 0);
1616c46a77dSDavid van Moolenbroek
1626c46a77dSDavid van Moolenbroek /*
1636c46a77dSDavid van Moolenbroek * If the last block we need is also the last block of the device,
1646c46a77dSDavid van Moolenbroek * see how many bytes we should actually transfer for that block.
1656c46a77dSDavid van Moolenbroek */
1666c46a77dSDavid van Moolenbroek if (block + blocks_left - 1 == part.size / block_size)
1676c46a77dSDavid van Moolenbroek last_size = part.size % block_size;
1686c46a77dSDavid van Moolenbroek else
1696c46a77dSDavid van Moolenbroek last_size = block_size;
1706c46a77dSDavid van Moolenbroek
171ebd3c067SDavid van Moolenbroek r = OK;
172ebd3c067SDavid van Moolenbroek
1736c46a77dSDavid van Moolenbroek for (off = 0; off < bytes && blocks_left > 0; off += chunk) {
1746c46a77dSDavid van Moolenbroek size = (blocks_left == 1) ? last_size : block_size;
1756c46a77dSDavid van Moolenbroek
1766c46a77dSDavid van Moolenbroek chunk = size - block_off;
177ebd3c067SDavid van Moolenbroek if (chunk > bytes - off)
178ebd3c067SDavid van Moolenbroek chunk = bytes - off;
179ebd3c067SDavid van Moolenbroek
1806c46a77dSDavid van Moolenbroek assert(chunk > 0 && chunk <= size);
1816c46a77dSDavid van Moolenbroek
182ebd3c067SDavid van Moolenbroek /*
183ebd3c067SDavid van Moolenbroek * For read requests, help the block driver form larger I/O
184ebd3c067SDavid van Moolenbroek * requests.
185ebd3c067SDavid van Moolenbroek */
186*7c48de6cSDavid van Moolenbroek if (!do_write)
1876c46a77dSDavid van Moolenbroek block_prefetch(dev, block, blocks_left, block_size,
1886c46a77dSDavid van Moolenbroek last_size);
189ebd3c067SDavid van Moolenbroek
190ebd3c067SDavid van Moolenbroek /*
191ebd3c067SDavid van Moolenbroek * Do not read the block from disk if we will end up
192ebd3c067SDavid van Moolenbroek * overwriting all of its contents.
193ebd3c067SDavid van Moolenbroek */
194*7c48de6cSDavid van Moolenbroek how = (do_write && chunk == size) ? NO_READ : NORMAL;
195ebd3c067SDavid van Moolenbroek
1966c46a77dSDavid van Moolenbroek if (size < block_size)
1976c46a77dSDavid van Moolenbroek r = lmfs_get_partial_block(&bp, dev, block, how, size);
1986c46a77dSDavid van Moolenbroek else
1996c46a77dSDavid van Moolenbroek r = lmfs_get_block(&bp, dev, block, how);
200ebd3c067SDavid van Moolenbroek
2016c46a77dSDavid van Moolenbroek if (r != OK) {
2026c46a77dSDavid van Moolenbroek printf("libminixfs: error getting block <%"PRIx64","
2036c46a77dSDavid van Moolenbroek "%"PRIu64"> for device I/O (%d)\n", dev, block, r);
204ebd3c067SDavid van Moolenbroek
2056c46a77dSDavid van Moolenbroek break;
2066c46a77dSDavid van Moolenbroek }
2076c46a77dSDavid van Moolenbroek
2086c46a77dSDavid van Moolenbroek /* Perform the actual copy. */
209ebd3c067SDavid van Moolenbroek if (r == OK && data != NULL) {
210*7c48de6cSDavid van Moolenbroek if (do_write) {
211ebd3c067SDavid van Moolenbroek r = fsdriver_copyin(data, off,
212ebd3c067SDavid van Moolenbroek (char *)bp->data + block_off, chunk);
213ebd3c067SDavid van Moolenbroek
214ebd3c067SDavid van Moolenbroek /*
215ebd3c067SDavid van Moolenbroek * Mark the block as dirty even if the copy
216ebd3c067SDavid van Moolenbroek * failed, since the copy may in fact have
217ebd3c067SDavid van Moolenbroek * succeeded partially. This is an interface
218ebd3c067SDavid van Moolenbroek * issue that should be resolved at some point,
219ebd3c067SDavid van Moolenbroek * but for now we do not want the cache to be
220ebd3c067SDavid van Moolenbroek * desynchronized from the disk contents.
221ebd3c067SDavid van Moolenbroek */
222ebd3c067SDavid van Moolenbroek lmfs_markdirty(bp);
223ebd3c067SDavid van Moolenbroek } else
224ebd3c067SDavid van Moolenbroek r = fsdriver_copyout(data, off,
225ebd3c067SDavid van Moolenbroek (char *)bp->data + block_off, chunk);
226ebd3c067SDavid van Moolenbroek }
227ebd3c067SDavid van Moolenbroek
2280314acfbSDavid van Moolenbroek lmfs_put_block(bp);
229ebd3c067SDavid van Moolenbroek
230ebd3c067SDavid van Moolenbroek if (r != OK)
231ebd3c067SDavid van Moolenbroek break;
232ebd3c067SDavid van Moolenbroek
233ebd3c067SDavid van Moolenbroek block++;
234ebd3c067SDavid van Moolenbroek block_off = 0;
235ebd3c067SDavid van Moolenbroek blocks_left--;
236ebd3c067SDavid van Moolenbroek }
237ebd3c067SDavid van Moolenbroek
238ebd3c067SDavid van Moolenbroek /*
2396c46a77dSDavid van Moolenbroek * If we were not able to do any I/O, return the error. Otherwise,
2406c46a77dSDavid van Moolenbroek * return how many bytes we did manage to transfer.
241ebd3c067SDavid van Moolenbroek */
242ebd3c067SDavid van Moolenbroek if (r != OK && off == 0)
2436c46a77dSDavid van Moolenbroek return r;
244ebd3c067SDavid van Moolenbroek
245ebd3c067SDavid van Moolenbroek return off;
246ebd3c067SDavid van Moolenbroek }
247ebd3c067SDavid van Moolenbroek
248ebd3c067SDavid van Moolenbroek /*
249ebd3c067SDavid van Moolenbroek * Perform a flush request on a block device, flushing and invalidating all
250ebd3c067SDavid van Moolenbroek * blocks associated with this device, both in the local cache and in VM.
251ebd3c067SDavid van Moolenbroek * This operation is called after a block device is closed and must prevent
252ebd3c067SDavid van Moolenbroek * that stale copies of blocks remain in any cache.
253ebd3c067SDavid van Moolenbroek */
254ebd3c067SDavid van Moolenbroek void
lmfs_bflush(dev_t dev)255ebd3c067SDavid van Moolenbroek lmfs_bflush(dev_t dev)
256ebd3c067SDavid van Moolenbroek {
257ebd3c067SDavid van Moolenbroek
258ebd3c067SDavid van Moolenbroek /* First flush any dirty blocks on this device to disk. */
259ebd3c067SDavid van Moolenbroek lmfs_flushdev(dev);
260ebd3c067SDavid van Moolenbroek
261ebd3c067SDavid van Moolenbroek /* Then purge any blocks associated with the device. */
262ebd3c067SDavid van Moolenbroek lmfs_invalidate(dev);
263ebd3c067SDavid van Moolenbroek }
264