1cfe60390STomohiro Kusumi /*-
2cfe60390STomohiro Kusumi * modified for Lites 1.1
3cfe60390STomohiro Kusumi *
4cfe60390STomohiro Kusumi * Aug 1995, Godmar Back (gback@cs.utah.edu)
5cfe60390STomohiro Kusumi * University of Utah, Department of Computer Science
6cfe60390STomohiro Kusumi */
7cfe60390STomohiro Kusumi /*-
8cfe60390STomohiro Kusumi * SPDX-License-Identifier: BSD-3-Clause
9cfe60390STomohiro Kusumi *
10cfe60390STomohiro Kusumi * Copyright (c) 1982, 1986, 1989, 1993
11cfe60390STomohiro Kusumi * The Regents of the University of California. All rights reserved.
12cfe60390STomohiro Kusumi *
13cfe60390STomohiro Kusumi * Redistribution and use in source and binary forms, with or without
14cfe60390STomohiro Kusumi * modification, are permitted provided that the following conditions
15cfe60390STomohiro Kusumi * are met:
16cfe60390STomohiro Kusumi * 1. Redistributions of source code must retain the above copyright
17cfe60390STomohiro Kusumi * notice, this list of conditions and the following disclaimer.
18cfe60390STomohiro Kusumi * 2. Redistributions in binary form must reproduce the above copyright
19cfe60390STomohiro Kusumi * notice, this list of conditions and the following disclaimer in the
20cfe60390STomohiro Kusumi * documentation and/or other materials provided with the distribution.
21cfe60390STomohiro Kusumi * 3. Neither the name of the University nor the names of its contributors
22cfe60390STomohiro Kusumi * may be used to endorse or promote products derived from this software
23cfe60390STomohiro Kusumi * without specific prior written permission.
24cfe60390STomohiro Kusumi *
25cfe60390STomohiro Kusumi * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26cfe60390STomohiro Kusumi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27cfe60390STomohiro Kusumi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28cfe60390STomohiro Kusumi * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29cfe60390STomohiro Kusumi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30cfe60390STomohiro Kusumi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31cfe60390STomohiro Kusumi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32cfe60390STomohiro Kusumi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33cfe60390STomohiro Kusumi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34cfe60390STomohiro Kusumi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35cfe60390STomohiro Kusumi * SUCH DAMAGE.
36cfe60390STomohiro Kusumi *
37cfe60390STomohiro Kusumi * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94
38cfe60390STomohiro Kusumi * $FreeBSD$
39cfe60390STomohiro Kusumi */
40cfe60390STomohiro Kusumi
41cfe60390STomohiro Kusumi #include <sys/param.h>
42cfe60390STomohiro Kusumi #include <sys/systm.h>
43cfe60390STomohiro Kusumi #include <sys/conf.h>
44cfe60390STomohiro Kusumi #include <sys/vnode.h>
45cfe60390STomohiro Kusumi #include <sys/stat.h>
46cfe60390STomohiro Kusumi #include <sys/mount.h>
47cfe60390STomohiro Kusumi #include <sys/sysctl.h>
48cfe60390STomohiro Kusumi #include <sys/syslog.h>
49cfe60390STomohiro Kusumi #include <sys/buf2.h>
50cfe60390STomohiro Kusumi #include <sys/endian.h>
51cfe60390STomohiro Kusumi #include <sys/malloc.h>
52cfe60390STomohiro Kusumi #include <sys/mutex2.h>
53cfe60390STomohiro Kusumi
54cfe60390STomohiro Kusumi #include <vfs/ext2fs/fs.h>
55cfe60390STomohiro Kusumi #include <vfs/ext2fs/inode.h>
56cfe60390STomohiro Kusumi #include <vfs/ext2fs/ext2_mount.h>
57cfe60390STomohiro Kusumi #include <vfs/ext2fs/ext2fs.h>
58cfe60390STomohiro Kusumi #include <vfs/ext2fs/ext2_extern.h>
59cfe60390STomohiro Kusumi
60cfe60390STomohiro Kusumi SDT_PROVIDER_DEFINE(ext2fs);
61cfe60390STomohiro Kusumi /*
62cfe60390STomohiro Kusumi * ext2fs trace probe:
63cfe60390STomohiro Kusumi * arg0: verbosity. Higher numbers give more verbose messages
64cfe60390STomohiro Kusumi * arg1: Textual message
65cfe60390STomohiro Kusumi */
66cfe60390STomohiro Kusumi SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*");
67cfe60390STomohiro Kusumi SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc,
68cfe60390STomohiro Kusumi "ino_t", "e2fs_lbn_t", "e2fs_lbn_t");
69cfe60390STomohiro Kusumi SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t");
70cfe60390STomohiro Kusumi SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t");
71cfe60390STomohiro Kusumi SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int");
72cfe60390STomohiro Kusumi SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted,
73cfe60390STomohiro Kusumi "int", "daddr_t", "char*");
74cfe60390STomohiro Kusumi SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t");
75cfe60390STomohiro Kusumi SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t");
76cfe60390STomohiro Kusumi
77cfe60390STomohiro Kusumi static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int);
78cfe60390STomohiro Kusumi static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int);
79cfe60390STomohiro Kusumi static u_long ext2_dirpref(struct inode *);
80cfe60390STomohiro Kusumi static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int,
81cfe60390STomohiro Kusumi daddr_t (*)(struct inode *, int, daddr_t,
82cfe60390STomohiro Kusumi int));
83cfe60390STomohiro Kusumi static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int);
84cfe60390STomohiro Kusumi static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
85cfe60390STomohiro Kusumi
86cfe60390STomohiro Kusumi /*
87cfe60390STomohiro Kusumi * Allocate a block in the filesystem.
88cfe60390STomohiro Kusumi *
89cfe60390STomohiro Kusumi * A preference may be optionally specified. If a preference is given
90cfe60390STomohiro Kusumi * the following hierarchy is used to allocate a block:
91cfe60390STomohiro Kusumi * 1) allocate the requested block.
92cfe60390STomohiro Kusumi * 2) allocate a rotationally optimal block in the same cylinder.
93cfe60390STomohiro Kusumi * 3) allocate a block in the same cylinder group.
94*f39b800bSTomohiro Kusumi * 4) quadratically rehash into other cylinder groups, until an
95cfe60390STomohiro Kusumi * available block is located.
96cfe60390STomohiro Kusumi * If no block preference is given the following hierarchy is used
97cfe60390STomohiro Kusumi * to allocate a block:
98cfe60390STomohiro Kusumi * 1) allocate a block in the cylinder group that contains the
99cfe60390STomohiro Kusumi * inode for the file.
100*f39b800bSTomohiro Kusumi * 2) quadratically rehash into other cylinder groups, until an
101cfe60390STomohiro Kusumi * available block is located.
102cfe60390STomohiro Kusumi */
103cfe60390STomohiro Kusumi int
ext2_alloc(struct inode * ip,daddr_t lbn,e4fs_daddr_t bpref,int size,struct ucred * cred,e4fs_daddr_t * bnp)104cfe60390STomohiro Kusumi ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size,
105cfe60390STomohiro Kusumi struct ucred *cred, e4fs_daddr_t *bnp)
106cfe60390STomohiro Kusumi {
107cfe60390STomohiro Kusumi struct m_ext2fs *fs;
108cfe60390STomohiro Kusumi struct ext2mount *ump;
109cfe60390STomohiro Kusumi e4fs_daddr_t bno;
110cfe60390STomohiro Kusumi int cg;
111cfe60390STomohiro Kusumi
112cfe60390STomohiro Kusumi *bnp = 0;
113cfe60390STomohiro Kusumi fs = ip->i_e2fs;
114cfe60390STomohiro Kusumi ump = ip->i_ump;
115cfe60390STomohiro Kusumi mtx_assert(EXT2_MTX(ump), MA_OWNED);
116cfe60390STomohiro Kusumi #ifdef INVARIANTS
117cfe60390STomohiro Kusumi if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
118cfe60390STomohiro Kusumi printf("bsize = %lu, size = %d, fs = %s\n",
119cfe60390STomohiro Kusumi (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
120cfe60390STomohiro Kusumi panic("ext2_alloc: bad size");
121cfe60390STomohiro Kusumi }
122cfe60390STomohiro Kusumi if (cred == NOCRED)
123cfe60390STomohiro Kusumi panic("ext2_alloc: missing credential");
124cfe60390STomohiro Kusumi #endif /* INVARIANTS */
125cfe60390STomohiro Kusumi if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0)
126cfe60390STomohiro Kusumi goto nospace;
127cfe60390STomohiro Kusumi if (cred->cr_uid != 0 &&
128cfe60390STomohiro Kusumi fs->e2fs_fbcount < fs->e2fs_rbcount)
129cfe60390STomohiro Kusumi goto nospace;
130cfe60390STomohiro Kusumi if (bpref >= fs->e2fs_bcount)
131cfe60390STomohiro Kusumi bpref = 0;
132cfe60390STomohiro Kusumi if (bpref == 0)
133cfe60390STomohiro Kusumi cg = ino_to_cg(fs, ip->i_number);
134cfe60390STomohiro Kusumi else
135cfe60390STomohiro Kusumi cg = dtog(fs, bpref);
136cfe60390STomohiro Kusumi bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
137cfe60390STomohiro Kusumi ext2_alloccg);
138cfe60390STomohiro Kusumi if (bno > 0) {
139cfe60390STomohiro Kusumi /* set next_alloc fields as done in block_getblk */
140cfe60390STomohiro Kusumi ip->i_next_alloc_block = lbn;
141cfe60390STomohiro Kusumi ip->i_next_alloc_goal = bno;
142cfe60390STomohiro Kusumi
143cfe60390STomohiro Kusumi ip->i_blocks += btodb(fs->e2fs_bsize);
144cfe60390STomohiro Kusumi ip->i_flag |= IN_CHANGE | IN_UPDATE;
145cfe60390STomohiro Kusumi *bnp = bno;
146cfe60390STomohiro Kusumi return (0);
147cfe60390STomohiro Kusumi }
148cfe60390STomohiro Kusumi nospace:
149cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
150cfe60390STomohiro Kusumi SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block");
151cfe60390STomohiro Kusumi return (ENOSPC);
152cfe60390STomohiro Kusumi }
153cfe60390STomohiro Kusumi
154cfe60390STomohiro Kusumi /*
155cfe60390STomohiro Kusumi * Allocate EA's block for inode.
156cfe60390STomohiro Kusumi */
157cfe60390STomohiro Kusumi e4fs_daddr_t
ext2_alloc_meta(struct inode * ip)158cfe60390STomohiro Kusumi ext2_alloc_meta(struct inode *ip)
159cfe60390STomohiro Kusumi {
160cfe60390STomohiro Kusumi struct m_ext2fs *fs;
161cfe60390STomohiro Kusumi daddr_t blk;
162cfe60390STomohiro Kusumi
163cfe60390STomohiro Kusumi fs = ip->i_e2fs;
164cfe60390STomohiro Kusumi
165cfe60390STomohiro Kusumi EXT2_LOCK(ip->i_ump);
166cfe60390STomohiro Kusumi blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize,
167cfe60390STomohiro Kusumi ext2_alloccg);
168cfe60390STomohiro Kusumi if (0 == blk) {
169cfe60390STomohiro Kusumi EXT2_UNLOCK(ip->i_ump);
170cfe60390STomohiro Kusumi SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block");
171cfe60390STomohiro Kusumi }
172cfe60390STomohiro Kusumi
173cfe60390STomohiro Kusumi return (blk);
174cfe60390STomohiro Kusumi }
175cfe60390STomohiro Kusumi
176cfe60390STomohiro Kusumi /*
177cfe60390STomohiro Kusumi * Reallocate a sequence of blocks into a contiguous sequence of blocks.
178cfe60390STomohiro Kusumi *
179cfe60390STomohiro Kusumi * The vnode and an array of buffer pointers for a range of sequential
180cfe60390STomohiro Kusumi * logical blocks to be made contiguous is given. The allocator attempts
181cfe60390STomohiro Kusumi * to find a range of sequential blocks starting as close as possible to
182cfe60390STomohiro Kusumi * an fs_rotdelay offset from the end of the allocation for the logical
183cfe60390STomohiro Kusumi * block immediately preceding the current range. If successful, the
184cfe60390STomohiro Kusumi * physical block numbers in the buffer pointers and in the inode are
185cfe60390STomohiro Kusumi * changed to reflect the new allocation. If unsuccessful, the allocation
186cfe60390STomohiro Kusumi * is left unchanged. The success in doing the reallocation is returned.
187cfe60390STomohiro Kusumi * Note that the error return is not reflected back to the user. Rather
188cfe60390STomohiro Kusumi * the previous block allocation will be used.
189cfe60390STomohiro Kusumi */
190cfe60390STomohiro Kusumi
191cfe60390STomohiro Kusumi static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem");
192cfe60390STomohiro Kusumi
193cfe60390STomohiro Kusumi static int doasyncfree = 1;
194cfe60390STomohiro Kusumi
195cfe60390STomohiro Kusumi SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
1965a4a4dbaSSascha Wildner "Use asynchronous writes to update block pointers when freeing blocks");
197cfe60390STomohiro Kusumi
198cfe60390STomohiro Kusumi static int doreallocblks = 0;
199cfe60390STomohiro Kusumi
200cfe60390STomohiro Kusumi SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
201cfe60390STomohiro Kusumi
202cfe60390STomohiro Kusumi int
ext2_reallocblks(struct vop_reallocblks_args * ap)203cfe60390STomohiro Kusumi ext2_reallocblks(struct vop_reallocblks_args *ap)
204cfe60390STomohiro Kusumi {
205cfe60390STomohiro Kusumi struct m_ext2fs *fs;
206cfe60390STomohiro Kusumi struct inode *ip;
207cfe60390STomohiro Kusumi struct vnode *vp;
208cfe60390STomohiro Kusumi struct buf *sbp, *ebp;
209cfe60390STomohiro Kusumi uint32_t *bap, *sbap, *ebap;
210cfe60390STomohiro Kusumi struct ext2mount *ump;
211cfe60390STomohiro Kusumi struct cluster_save *buflist;
212cfe60390STomohiro Kusumi struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp;
213cfe60390STomohiro Kusumi e2fs_lbn_t start_lbn, end_lbn;
214cfe60390STomohiro Kusumi int soff;
215cfe60390STomohiro Kusumi e2fs_daddr_t newblk, blkno;
216cfe60390STomohiro Kusumi int i, len, start_lvl, end_lvl, pref, ssize;
217cfe60390STomohiro Kusumi
218cfe60390STomohiro Kusumi if (doreallocblks == 0)
219cfe60390STomohiro Kusumi return (ENOSPC);
220cfe60390STomohiro Kusumi
221cfe60390STomohiro Kusumi vp = ap->a_vp;
222cfe60390STomohiro Kusumi ip = VTOI(vp);
223cfe60390STomohiro Kusumi fs = ip->i_e2fs;
224cfe60390STomohiro Kusumi ump = ip->i_ump;
225cfe60390STomohiro Kusumi
226cfe60390STomohiro Kusumi if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS)
227cfe60390STomohiro Kusumi return (ENOSPC);
228cfe60390STomohiro Kusumi
229cfe60390STomohiro Kusumi buflist = ap->a_buflist;
230cfe60390STomohiro Kusumi len = buflist->bs_nchildren;
231cfe60390STomohiro Kusumi start_lbn = lblkno(fs, buflist->bs_children[0]->b_loffset);
232cfe60390STomohiro Kusumi end_lbn = start_lbn + len - 1;
233cfe60390STomohiro Kusumi #ifdef INVARIANTS
234cfe60390STomohiro Kusumi for (i = 1; i < len; i++)
235cfe60390STomohiro Kusumi if (buflist->bs_children[i]->b_loffset != lblktodoff(fs, start_lbn + i))
236cfe60390STomohiro Kusumi panic("ext2_reallocblks: non-cluster");
237cfe60390STomohiro Kusumi #endif
238cfe60390STomohiro Kusumi /*
239cfe60390STomohiro Kusumi * If the cluster crosses the boundary for the first indirect
240cfe60390STomohiro Kusumi * block, leave space for the indirect block. Indirect blocks
241cfe60390STomohiro Kusumi * are initially laid out in a position after the last direct
242cfe60390STomohiro Kusumi * block. Block reallocation would usually destroy locality by
243cfe60390STomohiro Kusumi * moving the indirect block out of the way to make room for
244cfe60390STomohiro Kusumi * data blocks if we didn't compensate here. We should also do
245cfe60390STomohiro Kusumi * this for other indirect block boundaries, but it is only
246cfe60390STomohiro Kusumi * important for the first one.
247cfe60390STomohiro Kusumi */
248cfe60390STomohiro Kusumi if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR)
249cfe60390STomohiro Kusumi return (ENOSPC);
250cfe60390STomohiro Kusumi /*
251cfe60390STomohiro Kusumi * If the latest allocation is in a new cylinder group, assume that
252cfe60390STomohiro Kusumi * the filesystem has decided to move and do not force it back to
253cfe60390STomohiro Kusumi * the previous cylinder group.
254cfe60390STomohiro Kusumi */
255cfe60390STomohiro Kusumi if (dtog(fs, dofftofsb(fs, buflist->bs_children[0]->b_bio2.bio_offset)) !=
256cfe60390STomohiro Kusumi dtog(fs, dofftofsb(fs, buflist->bs_children[len - 1]->b_bio2.bio_offset)))
257cfe60390STomohiro Kusumi return (ENOSPC);
258cfe60390STomohiro Kusumi if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
259cfe60390STomohiro Kusumi ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
260cfe60390STomohiro Kusumi return (ENOSPC);
261cfe60390STomohiro Kusumi /*
262cfe60390STomohiro Kusumi * Get the starting offset and block map for the first block.
263cfe60390STomohiro Kusumi */
264cfe60390STomohiro Kusumi if (start_lvl == 0) {
265cfe60390STomohiro Kusumi sbap = &ip->i_db[0];
266cfe60390STomohiro Kusumi soff = start_lbn;
267cfe60390STomohiro Kusumi } else {
268cfe60390STomohiro Kusumi idp = &start_ap[start_lvl - 1];
269e5b38eb5STomohiro Kusumi if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->e2fs_bsize,
270e5b38eb5STomohiro Kusumi &sbp)) {
271e5b38eb5STomohiro Kusumi brelse(sbp);
272cfe60390STomohiro Kusumi return (ENOSPC);
273cfe60390STomohiro Kusumi }
274cfe60390STomohiro Kusumi sbap = (u_int *)sbp->b_data;
275cfe60390STomohiro Kusumi soff = idp->in_off;
276cfe60390STomohiro Kusumi }
277cfe60390STomohiro Kusumi /*
278cfe60390STomohiro Kusumi * If the block range spans two block maps, get the second map.
279cfe60390STomohiro Kusumi */
280cfe60390STomohiro Kusumi ebap = NULL;
281cfe60390STomohiro Kusumi if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
282cfe60390STomohiro Kusumi ssize = len;
283cfe60390STomohiro Kusumi } else {
284cfe60390STomohiro Kusumi #ifdef INVARIANTS
285cfe60390STomohiro Kusumi if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
286cfe60390STomohiro Kusumi panic("ext2_reallocblks: start == end");
287cfe60390STomohiro Kusumi #endif
288cfe60390STomohiro Kusumi ssize = len - (idp->in_off + 1);
289e5b38eb5STomohiro Kusumi if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->e2fs_bsize,
290e5b38eb5STomohiro Kusumi &ebp))
291cfe60390STomohiro Kusumi goto fail;
292cfe60390STomohiro Kusumi ebap = (u_int *)ebp->b_data;
293cfe60390STomohiro Kusumi }
294cfe60390STomohiro Kusumi /*
295cfe60390STomohiro Kusumi * Find the preferred location for the cluster.
296cfe60390STomohiro Kusumi */
297cfe60390STomohiro Kusumi EXT2_LOCK(ump);
298cfe60390STomohiro Kusumi pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
299cfe60390STomohiro Kusumi /*
300cfe60390STomohiro Kusumi * Search the block map looking for an allocation of the desired size.
301cfe60390STomohiro Kusumi */
302cfe60390STomohiro Kusumi if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
303cfe60390STomohiro Kusumi len, ext2_clusteralloc)) == 0) {
304cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
305cfe60390STomohiro Kusumi goto fail;
306cfe60390STomohiro Kusumi }
307cfe60390STomohiro Kusumi /*
308cfe60390STomohiro Kusumi * We have found a new contiguous block.
309cfe60390STomohiro Kusumi *
310cfe60390STomohiro Kusumi * First we have to replace the old block pointers with the new
311cfe60390STomohiro Kusumi * block pointers in the inode and indirect blocks associated
312cfe60390STomohiro Kusumi * with the file.
313cfe60390STomohiro Kusumi */
314cfe60390STomohiro Kusumi SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc,
315cfe60390STomohiro Kusumi ip->i_number, start_lbn, end_lbn);
316cfe60390STomohiro Kusumi blkno = newblk;
317cfe60390STomohiro Kusumi for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
318cfe60390STomohiro Kusumi if (i == ssize) {
319cfe60390STomohiro Kusumi bap = ebap;
320cfe60390STomohiro Kusumi soff = -i;
321cfe60390STomohiro Kusumi }
322cfe60390STomohiro Kusumi #ifdef INVARIANTS
323cfe60390STomohiro Kusumi if (buflist->bs_children[i]->b_bio2.bio_offset !=
324cfe60390STomohiro Kusumi fsbtodoff(fs, *bap))
325cfe60390STomohiro Kusumi panic("ext2_reallocblks: alloc mismatch");
326cfe60390STomohiro Kusumi #endif
327cfe60390STomohiro Kusumi SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap);
328cfe60390STomohiro Kusumi *bap++ = blkno;
329cfe60390STomohiro Kusumi }
330cfe60390STomohiro Kusumi /*
331cfe60390STomohiro Kusumi * Next we must write out the modified inode and indirect blocks.
332cfe60390STomohiro Kusumi * For strict correctness, the writes should be synchronous since
333cfe60390STomohiro Kusumi * the old block values may have been written to disk. In practise
334cfe60390STomohiro Kusumi * they are almost never written, but if we are concerned about
335cfe60390STomohiro Kusumi * strict correctness, the `doasyncfree' flag should be set to zero.
336cfe60390STomohiro Kusumi *
337cfe60390STomohiro Kusumi * The test on `doasyncfree' should be changed to test a flag
338cfe60390STomohiro Kusumi * that shows whether the associated buffers and inodes have
339cfe60390STomohiro Kusumi * been written. The flag should be set when the cluster is
340cfe60390STomohiro Kusumi * started and cleared whenever the buffer or inode is flushed.
341cfe60390STomohiro Kusumi * We can then check below to see if it is set, and do the
342cfe60390STomohiro Kusumi * synchronous write only when it has been cleared.
343cfe60390STomohiro Kusumi */
344cfe60390STomohiro Kusumi if (sbap != &ip->i_db[0]) {
345cfe60390STomohiro Kusumi if (doasyncfree)
346cfe60390STomohiro Kusumi bdwrite(sbp);
347cfe60390STomohiro Kusumi else
348cfe60390STomohiro Kusumi bwrite(sbp);
349cfe60390STomohiro Kusumi } else {
350cfe60390STomohiro Kusumi ip->i_flag |= IN_CHANGE | IN_UPDATE;
351cfe60390STomohiro Kusumi if (!doasyncfree)
352cfe60390STomohiro Kusumi ext2_update(vp, 1);
353cfe60390STomohiro Kusumi }
354cfe60390STomohiro Kusumi if (ssize < len) {
355cfe60390STomohiro Kusumi if (doasyncfree)
356cfe60390STomohiro Kusumi bdwrite(ebp);
357cfe60390STomohiro Kusumi else
358cfe60390STomohiro Kusumi bwrite(ebp);
359cfe60390STomohiro Kusumi }
360cfe60390STomohiro Kusumi /*
361cfe60390STomohiro Kusumi * Last, free the old blocks and assign the new blocks to the buffers.
362cfe60390STomohiro Kusumi */
363cfe60390STomohiro Kusumi for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
364cfe60390STomohiro Kusumi ext2_blkfree(ip, dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset),
365cfe60390STomohiro Kusumi fs->e2fs_bsize);
366cfe60390STomohiro Kusumi buflist->bs_children[i]->b_bio2.bio_offset = fsbtodoff(fs, blkno);
367cfe60390STomohiro Kusumi SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno);
368cfe60390STomohiro Kusumi }
369cfe60390STomohiro Kusumi
370cfe60390STomohiro Kusumi return (0);
371cfe60390STomohiro Kusumi
372cfe60390STomohiro Kusumi fail:
373cfe60390STomohiro Kusumi if (ssize < len)
374e5b38eb5STomohiro Kusumi brelse(ebp);
375cfe60390STomohiro Kusumi if (sbap != &ip->i_db[0])
376e5b38eb5STomohiro Kusumi brelse(sbp);
377cfe60390STomohiro Kusumi return (ENOSPC);
378cfe60390STomohiro Kusumi }
379cfe60390STomohiro Kusumi
380cfe60390STomohiro Kusumi /*
381cfe60390STomohiro Kusumi * Allocate an inode in the filesystem.
382cfe60390STomohiro Kusumi *
383cfe60390STomohiro Kusumi */
384cfe60390STomohiro Kusumi int
ext2_valloc(struct vnode * pvp,int mode,struct ucred * cred,struct vnode ** vpp)385cfe60390STomohiro Kusumi ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp)
386cfe60390STomohiro Kusumi {
387cfe60390STomohiro Kusumi struct timespec ts;
388cfe60390STomohiro Kusumi struct m_ext2fs *fs;
389cfe60390STomohiro Kusumi struct ext2mount *ump;
390cfe60390STomohiro Kusumi struct inode *pip;
391cfe60390STomohiro Kusumi struct inode *ip;
392cfe60390STomohiro Kusumi struct vnode *vp;
393cfe60390STomohiro Kusumi ino_t ino, ipref;
394cfe60390STomohiro Kusumi int error, cg;
395cfe60390STomohiro Kusumi
396cfe60390STomohiro Kusumi *vpp = NULL;
397cfe60390STomohiro Kusumi pip = VTOI(pvp);
398cfe60390STomohiro Kusumi fs = pip->i_e2fs;
399cfe60390STomohiro Kusumi ump = pip->i_ump;
400cfe60390STomohiro Kusumi
401cfe60390STomohiro Kusumi EXT2_LOCK(ump);
402cfe60390STomohiro Kusumi if (fs->e2fs_ficount == 0)
403cfe60390STomohiro Kusumi goto noinodes;
404cfe60390STomohiro Kusumi /*
405cfe60390STomohiro Kusumi * If it is a directory then obtain a cylinder group based on
406cfe60390STomohiro Kusumi * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
407cfe60390STomohiro Kusumi * always the next inode.
408cfe60390STomohiro Kusumi */
409cfe60390STomohiro Kusumi if ((mode & IFMT) == IFDIR) {
410cfe60390STomohiro Kusumi cg = ext2_dirpref(pip);
411cfe60390STomohiro Kusumi if (fs->e2fs_contigdirs[cg] < 255)
412cfe60390STomohiro Kusumi fs->e2fs_contigdirs[cg]++;
413cfe60390STomohiro Kusumi } else {
414cfe60390STomohiro Kusumi cg = ino_to_cg(fs, pip->i_number);
415cfe60390STomohiro Kusumi if (fs->e2fs_contigdirs[cg] > 0)
416cfe60390STomohiro Kusumi fs->e2fs_contigdirs[cg]--;
417cfe60390STomohiro Kusumi }
418cfe60390STomohiro Kusumi ipref = cg * fs->e2fs_ipg + 1;
419cfe60390STomohiro Kusumi ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
420cfe60390STomohiro Kusumi if (ino == 0)
421cfe60390STomohiro Kusumi goto noinodes;
422cfe60390STomohiro Kusumi restart:
423cfe60390STomohiro Kusumi if ((vp = ext2_ihashget(ump->um_dev, ino)) != NULL) {
424cfe60390STomohiro Kusumi printf("ext2_valloc: vp %p exists for inode %lu\n", vp, ino);
425cfe60390STomohiro Kusumi return (EEXIST);
426cfe60390STomohiro Kusumi }
427ffd4f0afSTomohiro Kusumi if (ext2_alloc_vnode(ump->um_mountp, ino, &vp) == -1)
428cfe60390STomohiro Kusumi goto restart;
429ffd4f0afSTomohiro Kusumi ip = VTOI(vp);
430cfe60390STomohiro Kusumi
431cfe60390STomohiro Kusumi if ((error = ext2_vinit(vp->v_mount, &vp)) != 0) {
432cfe60390STomohiro Kusumi *vpp = NULL;
433cfe60390STomohiro Kusumi vp->v_type = VBAD;
434cfe60390STomohiro Kusumi vx_put(vp);
435cfe60390STomohiro Kusumi return (error);
436cfe60390STomohiro Kusumi }
437cfe60390STomohiro Kusumi
438cfe60390STomohiro Kusumi if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS)
439cfe60390STomohiro Kusumi && (S_ISREG(mode) || S_ISDIR(mode)))
440cfe60390STomohiro Kusumi ext4_ext_tree_init(ip);
441cfe60390STomohiro Kusumi else
442cfe60390STomohiro Kusumi memset(ip->i_data, 0, sizeof(ip->i_data));
443cfe60390STomohiro Kusumi
444cfe60390STomohiro Kusumi /*
445cfe60390STomohiro Kusumi * Set up a new generation number for this inode.
446cfe60390STomohiro Kusumi * Avoid zero values.
447cfe60390STomohiro Kusumi */
448cfe60390STomohiro Kusumi do {
449cfe60390STomohiro Kusumi ip->i_gen = karc4random();
450cfe60390STomohiro Kusumi } while (ip->i_gen == 0);
451cfe60390STomohiro Kusumi
452cfe60390STomohiro Kusumi vfs_timestamp(&ts);
453cfe60390STomohiro Kusumi ip->i_birthtime = ts.tv_sec;
454cfe60390STomohiro Kusumi ip->i_birthnsec = ts.tv_nsec;
455cfe60390STomohiro Kusumi
456cfe60390STomohiro Kusumi /*
457cfe60390STomohiro Kusumi * Finish inode initialization now that aliasing has been resolved.
458cfe60390STomohiro Kusumi */
459cfe60390STomohiro Kusumi vref(ip->i_devvp);
460cfe60390STomohiro Kusumi /*
461cfe60390STomohiro Kusumi * Return the locked and refd vnode.
462cfe60390STomohiro Kusumi */
463cfe60390STomohiro Kusumi vx_downgrade(vp); /* downgrade VX lock to VN lock */
464cfe60390STomohiro Kusumi *vpp = vp;
465cfe60390STomohiro Kusumi
466cfe60390STomohiro Kusumi return (0);
467cfe60390STomohiro Kusumi
468cfe60390STomohiro Kusumi noinodes:
469cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
470cfe60390STomohiro Kusumi SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes");
471cfe60390STomohiro Kusumi return (ENOSPC);
472cfe60390STomohiro Kusumi }
473cfe60390STomohiro Kusumi
474cfe60390STomohiro Kusumi /*
475cfe60390STomohiro Kusumi * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h
476cfe60390STomohiro Kusumi */
477cfe60390STomohiro Kusumi uint64_t
e2fs_gd_get_b_bitmap(struct ext2_gd * gd)478cfe60390STomohiro Kusumi e2fs_gd_get_b_bitmap(struct ext2_gd *gd)
479cfe60390STomohiro Kusumi {
480cfe60390STomohiro Kusumi
481cfe60390STomohiro Kusumi return (((uint64_t)(le32toh(gd->ext4bgd_b_bitmap_hi)) << 32) |
482cfe60390STomohiro Kusumi le32toh(gd->ext2bgd_b_bitmap));
483cfe60390STomohiro Kusumi }
484cfe60390STomohiro Kusumi
485cfe60390STomohiro Kusumi uint64_t
e2fs_gd_get_i_bitmap(struct ext2_gd * gd)486cfe60390STomohiro Kusumi e2fs_gd_get_i_bitmap(struct ext2_gd *gd)
487cfe60390STomohiro Kusumi {
488cfe60390STomohiro Kusumi
489cfe60390STomohiro Kusumi return (((uint64_t)(le32toh(gd->ext4bgd_i_bitmap_hi)) << 32) |
490cfe60390STomohiro Kusumi le32toh(gd->ext2bgd_i_bitmap));
491cfe60390STomohiro Kusumi }
492cfe60390STomohiro Kusumi
493cfe60390STomohiro Kusumi uint64_t
e2fs_gd_get_i_tables(struct ext2_gd * gd)494cfe60390STomohiro Kusumi e2fs_gd_get_i_tables(struct ext2_gd *gd)
495cfe60390STomohiro Kusumi {
496cfe60390STomohiro Kusumi
497cfe60390STomohiro Kusumi return (((uint64_t)(le32toh(gd->ext4bgd_i_tables_hi)) << 32) |
498cfe60390STomohiro Kusumi le32toh(gd->ext2bgd_i_tables));
499cfe60390STomohiro Kusumi }
500cfe60390STomohiro Kusumi
501cfe60390STomohiro Kusumi static uint32_t
e2fs_gd_get_nbfree(struct ext2_gd * gd)502cfe60390STomohiro Kusumi e2fs_gd_get_nbfree(struct ext2_gd *gd)
503cfe60390STomohiro Kusumi {
504cfe60390STomohiro Kusumi
505cfe60390STomohiro Kusumi return (((uint32_t)(le16toh(gd->ext4bgd_nbfree_hi)) << 16) |
506cfe60390STomohiro Kusumi le16toh(gd->ext2bgd_nbfree));
507cfe60390STomohiro Kusumi }
508cfe60390STomohiro Kusumi
509cfe60390STomohiro Kusumi static void
e2fs_gd_set_nbfree(struct ext2_gd * gd,uint32_t val)510cfe60390STomohiro Kusumi e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val)
511cfe60390STomohiro Kusumi {
512cfe60390STomohiro Kusumi
513cfe60390STomohiro Kusumi gd->ext2bgd_nbfree = htole16(val & 0xffff);
514cfe60390STomohiro Kusumi gd->ext4bgd_nbfree_hi = htole16(val >> 16);
515cfe60390STomohiro Kusumi }
516cfe60390STomohiro Kusumi
517cfe60390STomohiro Kusumi static uint32_t
e2fs_gd_get_nifree(struct ext2_gd * gd)518cfe60390STomohiro Kusumi e2fs_gd_get_nifree(struct ext2_gd *gd)
519cfe60390STomohiro Kusumi {
520cfe60390STomohiro Kusumi
521cfe60390STomohiro Kusumi return (((uint32_t)(le16toh(gd->ext4bgd_nifree_hi)) << 16) |
522cfe60390STomohiro Kusumi le16toh(gd->ext2bgd_nifree));
523cfe60390STomohiro Kusumi }
524cfe60390STomohiro Kusumi
525cfe60390STomohiro Kusumi static void
e2fs_gd_set_nifree(struct ext2_gd * gd,uint32_t val)526cfe60390STomohiro Kusumi e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val)
527cfe60390STomohiro Kusumi {
528cfe60390STomohiro Kusumi
529cfe60390STomohiro Kusumi gd->ext2bgd_nifree = htole16(val & 0xffff);
530cfe60390STomohiro Kusumi gd->ext4bgd_nifree_hi = htole16(val >> 16);
531cfe60390STomohiro Kusumi }
532cfe60390STomohiro Kusumi
533cfe60390STomohiro Kusumi uint32_t
e2fs_gd_get_ndirs(struct ext2_gd * gd)534cfe60390STomohiro Kusumi e2fs_gd_get_ndirs(struct ext2_gd *gd)
535cfe60390STomohiro Kusumi {
536cfe60390STomohiro Kusumi
537cfe60390STomohiro Kusumi return (((uint32_t)(le16toh(gd->ext4bgd_ndirs_hi)) << 16) |
538cfe60390STomohiro Kusumi le16toh(gd->ext2bgd_ndirs));
539cfe60390STomohiro Kusumi }
540cfe60390STomohiro Kusumi
541cfe60390STomohiro Kusumi static void
e2fs_gd_set_ndirs(struct ext2_gd * gd,uint32_t val)542cfe60390STomohiro Kusumi e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val)
543cfe60390STomohiro Kusumi {
544cfe60390STomohiro Kusumi
545cfe60390STomohiro Kusumi gd->ext2bgd_ndirs = htole16(val & 0xffff);
546cfe60390STomohiro Kusumi gd->ext4bgd_ndirs_hi = htole16(val >> 16);
547cfe60390STomohiro Kusumi }
548cfe60390STomohiro Kusumi
549cfe60390STomohiro Kusumi static uint32_t
e2fs_gd_get_i_unused(struct ext2_gd * gd)550cfe60390STomohiro Kusumi e2fs_gd_get_i_unused(struct ext2_gd *gd)
551cfe60390STomohiro Kusumi {
552cfe60390STomohiro Kusumi return ((uint32_t)(le16toh(gd->ext4bgd_i_unused_hi) << 16) |
553cfe60390STomohiro Kusumi le16toh(gd->ext4bgd_i_unused));
554cfe60390STomohiro Kusumi }
555cfe60390STomohiro Kusumi
556cfe60390STomohiro Kusumi static void
e2fs_gd_set_i_unused(struct ext2_gd * gd,uint32_t val)557cfe60390STomohiro Kusumi e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val)
558cfe60390STomohiro Kusumi {
559cfe60390STomohiro Kusumi
560cfe60390STomohiro Kusumi gd->ext4bgd_i_unused = htole16(val & 0xffff);
561cfe60390STomohiro Kusumi gd->ext4bgd_i_unused_hi = htole16(val >> 16);
562cfe60390STomohiro Kusumi }
563cfe60390STomohiro Kusumi
564cfe60390STomohiro Kusumi /*
565cfe60390STomohiro Kusumi * Find a cylinder to place a directory.
566cfe60390STomohiro Kusumi *
567cfe60390STomohiro Kusumi * The policy implemented by this algorithm is to allocate a
568cfe60390STomohiro Kusumi * directory inode in the same cylinder group as its parent
569cfe60390STomohiro Kusumi * directory, but also to reserve space for its files inodes
570cfe60390STomohiro Kusumi * and data. Restrict the number of directories which may be
571cfe60390STomohiro Kusumi * allocated one after another in the same cylinder group
572cfe60390STomohiro Kusumi * without intervening allocation of files.
573cfe60390STomohiro Kusumi *
574cfe60390STomohiro Kusumi * If we allocate a first level directory then force allocation
575cfe60390STomohiro Kusumi * in another cylinder group.
576cfe60390STomohiro Kusumi *
577cfe60390STomohiro Kusumi */
578cfe60390STomohiro Kusumi static u_long
ext2_dirpref(struct inode * pip)579cfe60390STomohiro Kusumi ext2_dirpref(struct inode *pip)
580cfe60390STomohiro Kusumi {
581cfe60390STomohiro Kusumi struct m_ext2fs *fs;
582cfe60390STomohiro Kusumi int cg, prefcg, cgsize;
583cfe60390STomohiro Kusumi uint64_t avgbfree, minbfree;
584cfe60390STomohiro Kusumi u_int avgifree, avgndir, curdirsize;
585cfe60390STomohiro Kusumi u_int minifree, maxndir;
586cfe60390STomohiro Kusumi u_int mincg, minndir;
587cfe60390STomohiro Kusumi u_int dirsize, maxcontigdirs;
588cfe60390STomohiro Kusumi
589cfe60390STomohiro Kusumi mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
590cfe60390STomohiro Kusumi fs = pip->i_e2fs;
591cfe60390STomohiro Kusumi
592cfe60390STomohiro Kusumi avgifree = fs->e2fs_ficount / fs->e2fs_gcount;
593cfe60390STomohiro Kusumi avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount;
594cfe60390STomohiro Kusumi avgndir = fs->e2fs_total_dir / fs->e2fs_gcount;
595cfe60390STomohiro Kusumi
596cfe60390STomohiro Kusumi /*
597cfe60390STomohiro Kusumi * Force allocation in another cg if creating a first level dir.
598cfe60390STomohiro Kusumi */
599cfe60390STomohiro Kusumi ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
600cfe60390STomohiro Kusumi if (ITOV(pip)->v_flag & VROOT) {
601cfe60390STomohiro Kusumi prefcg = karc4random() % fs->e2fs_gcount;
602cfe60390STomohiro Kusumi mincg = prefcg;
603cfe60390STomohiro Kusumi minndir = fs->e2fs_ipg;
604cfe60390STomohiro Kusumi for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
605cfe60390STomohiro Kusumi if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir &&
606cfe60390STomohiro Kusumi e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree &&
607cfe60390STomohiro Kusumi e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) {
608cfe60390STomohiro Kusumi mincg = cg;
609cfe60390STomohiro Kusumi minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]);
610cfe60390STomohiro Kusumi }
611cfe60390STomohiro Kusumi for (cg = 0; cg < prefcg; cg++)
612cfe60390STomohiro Kusumi if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir &&
613cfe60390STomohiro Kusumi e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree &&
614cfe60390STomohiro Kusumi e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) {
615cfe60390STomohiro Kusumi mincg = cg;
616cfe60390STomohiro Kusumi minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]);
617cfe60390STomohiro Kusumi }
618cfe60390STomohiro Kusumi return (mincg);
619cfe60390STomohiro Kusumi }
620cfe60390STomohiro Kusumi /*
621cfe60390STomohiro Kusumi * Count various limits which used for
622cfe60390STomohiro Kusumi * optimal allocation of a directory inode.
623cfe60390STomohiro Kusumi */
624cfe60390STomohiro Kusumi maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
625cfe60390STomohiro Kusumi minifree = avgifree - avgifree / 4;
626cfe60390STomohiro Kusumi if (minifree < 1)
627cfe60390STomohiro Kusumi minifree = 1;
628cfe60390STomohiro Kusumi minbfree = avgbfree - avgbfree / 4;
629cfe60390STomohiro Kusumi if (minbfree < 1)
630cfe60390STomohiro Kusumi minbfree = 1;
631cfe60390STomohiro Kusumi cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
632cfe60390STomohiro Kusumi dirsize = AVGDIRSIZE;
633cfe60390STomohiro Kusumi curdirsize = avgndir ?
634cfe60390STomohiro Kusumi (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
635cfe60390STomohiro Kusumi if (dirsize < curdirsize)
636cfe60390STomohiro Kusumi dirsize = curdirsize;
637cfe60390STomohiro Kusumi maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
638cfe60390STomohiro Kusumi maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
639cfe60390STomohiro Kusumi if (maxcontigdirs == 0)
640cfe60390STomohiro Kusumi maxcontigdirs = 1;
641cfe60390STomohiro Kusumi
642cfe60390STomohiro Kusumi /*
643cfe60390STomohiro Kusumi * Limit number of dirs in one cg and reserve space for
644cfe60390STomohiro Kusumi * regular files, but only if we have no deficit in
645cfe60390STomohiro Kusumi * inodes or space.
646cfe60390STomohiro Kusumi */
647cfe60390STomohiro Kusumi prefcg = ino_to_cg(fs, pip->i_number);
648cfe60390STomohiro Kusumi for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
649cfe60390STomohiro Kusumi if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir &&
650cfe60390STomohiro Kusumi e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree &&
651cfe60390STomohiro Kusumi e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) {
652cfe60390STomohiro Kusumi if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
653cfe60390STomohiro Kusumi return (cg);
654cfe60390STomohiro Kusumi }
655cfe60390STomohiro Kusumi for (cg = 0; cg < prefcg; cg++)
656cfe60390STomohiro Kusumi if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir &&
657cfe60390STomohiro Kusumi e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree &&
658cfe60390STomohiro Kusumi e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) {
659cfe60390STomohiro Kusumi if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
660cfe60390STomohiro Kusumi return (cg);
661cfe60390STomohiro Kusumi }
662cfe60390STomohiro Kusumi /*
663cfe60390STomohiro Kusumi * This is a backstop when we have deficit in space.
664cfe60390STomohiro Kusumi */
665cfe60390STomohiro Kusumi for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
666cfe60390STomohiro Kusumi if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree)
667cfe60390STomohiro Kusumi return (cg);
668cfe60390STomohiro Kusumi for (cg = 0; cg < prefcg; cg++)
669cfe60390STomohiro Kusumi if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree)
670cfe60390STomohiro Kusumi break;
671cfe60390STomohiro Kusumi return (cg);
672cfe60390STomohiro Kusumi }
673cfe60390STomohiro Kusumi
674cfe60390STomohiro Kusumi /*
675cfe60390STomohiro Kusumi * Select the desired position for the next block in a file.
676cfe60390STomohiro Kusumi *
677cfe60390STomohiro Kusumi * we try to mimic what Remy does in inode_getblk/block_getblk
678cfe60390STomohiro Kusumi *
679cfe60390STomohiro Kusumi * we note: blocknr == 0 means that we're about to allocate either
680cfe60390STomohiro Kusumi * a direct block or a pointer block at the first level of indirection
681cfe60390STomohiro Kusumi * (In other words, stuff that will go in i_db[] or i_ib[])
682cfe60390STomohiro Kusumi *
683cfe60390STomohiro Kusumi * blocknr != 0 means that we're allocating a block that is none
684cfe60390STomohiro Kusumi * of the above. Then, blocknr tells us the number of the block
685cfe60390STomohiro Kusumi * that will hold the pointer
686cfe60390STomohiro Kusumi */
687cfe60390STomohiro Kusumi e4fs_daddr_t
ext2_blkpref(struct inode * ip,e2fs_lbn_t lbn,int indx,e2fs_daddr_t * bap,e2fs_daddr_t blocknr)688cfe60390STomohiro Kusumi ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap,
689cfe60390STomohiro Kusumi e2fs_daddr_t blocknr)
690cfe60390STomohiro Kusumi {
691cfe60390STomohiro Kusumi struct m_ext2fs *fs;
692cfe60390STomohiro Kusumi int tmp;
693cfe60390STomohiro Kusumi
694cfe60390STomohiro Kusumi fs = ip->i_e2fs;
695cfe60390STomohiro Kusumi
696cfe60390STomohiro Kusumi mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
697cfe60390STomohiro Kusumi
698cfe60390STomohiro Kusumi /*
699cfe60390STomohiro Kusumi * If the next block is actually what we thought it is, then set the
700cfe60390STomohiro Kusumi * goal to what we thought it should be.
701cfe60390STomohiro Kusumi */
702cfe60390STomohiro Kusumi if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
703cfe60390STomohiro Kusumi return ip->i_next_alloc_goal;
704cfe60390STomohiro Kusumi
705cfe60390STomohiro Kusumi /*
706cfe60390STomohiro Kusumi * Now check whether we were provided with an array that basically
707cfe60390STomohiro Kusumi * tells us previous blocks to which we want to stay close.
708cfe60390STomohiro Kusumi */
709cfe60390STomohiro Kusumi if (bap)
710cfe60390STomohiro Kusumi for (tmp = indx - 1; tmp >= 0; tmp--)
711cfe60390STomohiro Kusumi if (bap[tmp])
712cfe60390STomohiro Kusumi return (le32toh(bap[tmp]));
713cfe60390STomohiro Kusumi
714cfe60390STomohiro Kusumi /*
715cfe60390STomohiro Kusumi * Else lets fall back to the blocknr or, if there is none, follow
716cfe60390STomohiro Kusumi * the rule that a block should be allocated near its inode.
717cfe60390STomohiro Kusumi */
718cfe60390STomohiro Kusumi return (blocknr ? blocknr :
719cfe60390STomohiro Kusumi (e2fs_daddr_t)(ip->i_block_group *
720cfe60390STomohiro Kusumi EXT2_BLOCKS_PER_GROUP(fs)) + le32toh(fs->e2fs->e2fs_first_dblock));
721cfe60390STomohiro Kusumi }
722cfe60390STomohiro Kusumi
723cfe60390STomohiro Kusumi /*
724cfe60390STomohiro Kusumi * Implement the cylinder overflow algorithm.
725cfe60390STomohiro Kusumi *
726cfe60390STomohiro Kusumi * The policy implemented by this algorithm is:
727cfe60390STomohiro Kusumi * 1) allocate the block in its requested cylinder group.
728*f39b800bSTomohiro Kusumi * 2) quadratically rehash on the cylinder group number.
729cfe60390STomohiro Kusumi * 3) brute force search for a free block.
730cfe60390STomohiro Kusumi */
731cfe60390STomohiro Kusumi static e4fs_daddr_t
ext2_hashalloc(struct inode * ip,int cg,long pref,int size,daddr_t (* allocator)(struct inode *,int,daddr_t,int))732cfe60390STomohiro Kusumi ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
733cfe60390STomohiro Kusumi daddr_t (*allocator) (struct inode *, int, daddr_t, int))
734cfe60390STomohiro Kusumi {
735cfe60390STomohiro Kusumi struct m_ext2fs *fs;
736cfe60390STomohiro Kusumi e4fs_daddr_t result;
737cfe60390STomohiro Kusumi int i, icg = cg;
738cfe60390STomohiro Kusumi
739cfe60390STomohiro Kusumi mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
740cfe60390STomohiro Kusumi fs = ip->i_e2fs;
741cfe60390STomohiro Kusumi /*
742cfe60390STomohiro Kusumi * 1: preferred cylinder group
743cfe60390STomohiro Kusumi */
744cfe60390STomohiro Kusumi result = (*allocator)(ip, cg, pref, size);
745cfe60390STomohiro Kusumi if (result)
746cfe60390STomohiro Kusumi return (result);
747cfe60390STomohiro Kusumi /*
748cfe60390STomohiro Kusumi * 2: quadratic rehash
749cfe60390STomohiro Kusumi */
750cfe60390STomohiro Kusumi for (i = 1; i < fs->e2fs_gcount; i *= 2) {
751cfe60390STomohiro Kusumi cg += i;
752cfe60390STomohiro Kusumi if (cg >= fs->e2fs_gcount)
753cfe60390STomohiro Kusumi cg -= fs->e2fs_gcount;
754cfe60390STomohiro Kusumi result = (*allocator)(ip, cg, 0, size);
755cfe60390STomohiro Kusumi if (result)
756cfe60390STomohiro Kusumi return (result);
757cfe60390STomohiro Kusumi }
758cfe60390STomohiro Kusumi /*
759cfe60390STomohiro Kusumi * 3: brute force search
760cfe60390STomohiro Kusumi * Note that we start at i == 2, since 0 was checked initially,
761cfe60390STomohiro Kusumi * and 1 is always checked in the quadratic rehash.
762cfe60390STomohiro Kusumi */
763cfe60390STomohiro Kusumi cg = (icg + 2) % fs->e2fs_gcount;
764cfe60390STomohiro Kusumi for (i = 2; i < fs->e2fs_gcount; i++) {
765cfe60390STomohiro Kusumi result = (*allocator)(ip, cg, 0, size);
766cfe60390STomohiro Kusumi if (result)
767cfe60390STomohiro Kusumi return (result);
768cfe60390STomohiro Kusumi cg++;
769cfe60390STomohiro Kusumi if (cg == fs->e2fs_gcount)
770cfe60390STomohiro Kusumi cg = 0;
771cfe60390STomohiro Kusumi }
772cfe60390STomohiro Kusumi return (0);
773cfe60390STomohiro Kusumi }
774cfe60390STomohiro Kusumi
775cfe60390STomohiro Kusumi static uint64_t
ext2_cg_number_gdb_nometa(struct m_ext2fs * fs,int cg)776cfe60390STomohiro Kusumi ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg)
777cfe60390STomohiro Kusumi {
778cfe60390STomohiro Kusumi
779cfe60390STomohiro Kusumi if (!ext2_cg_has_sb(fs, cg))
780cfe60390STomohiro Kusumi return (0);
781cfe60390STomohiro Kusumi
782cfe60390STomohiro Kusumi if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG))
783cfe60390STomohiro Kusumi return (le32toh(fs->e2fs->e3fs_first_meta_bg));
784cfe60390STomohiro Kusumi
785cfe60390STomohiro Kusumi return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) /
786cfe60390STomohiro Kusumi EXT2_DESCS_PER_BLOCK(fs));
787cfe60390STomohiro Kusumi }
788cfe60390STomohiro Kusumi
789cfe60390STomohiro Kusumi static uint64_t
ext2_cg_number_gdb_meta(struct m_ext2fs * fs,int cg)790cfe60390STomohiro Kusumi ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg)
791cfe60390STomohiro Kusumi {
792cfe60390STomohiro Kusumi unsigned long metagroup;
793cfe60390STomohiro Kusumi int first, last;
794cfe60390STomohiro Kusumi
795cfe60390STomohiro Kusumi metagroup = cg / EXT2_DESCS_PER_BLOCK(fs);
796cfe60390STomohiro Kusumi first = metagroup * EXT2_DESCS_PER_BLOCK(fs);
797cfe60390STomohiro Kusumi last = first + EXT2_DESCS_PER_BLOCK(fs) - 1;
798cfe60390STomohiro Kusumi
799cfe60390STomohiro Kusumi if (cg == first || cg == first + 1 || cg == last)
800cfe60390STomohiro Kusumi return (1);
801cfe60390STomohiro Kusumi
802cfe60390STomohiro Kusumi return (0);
803cfe60390STomohiro Kusumi }
804cfe60390STomohiro Kusumi
805cfe60390STomohiro Kusumi uint64_t
ext2_cg_number_gdb(struct m_ext2fs * fs,int cg)806cfe60390STomohiro Kusumi ext2_cg_number_gdb(struct m_ext2fs *fs, int cg)
807cfe60390STomohiro Kusumi {
808cfe60390STomohiro Kusumi unsigned long first_meta_bg, metagroup;
809cfe60390STomohiro Kusumi
810cfe60390STomohiro Kusumi first_meta_bg = le32toh(fs->e2fs->e3fs_first_meta_bg);
811cfe60390STomohiro Kusumi metagroup = cg / EXT2_DESCS_PER_BLOCK(fs);
812cfe60390STomohiro Kusumi
813cfe60390STomohiro Kusumi if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
814cfe60390STomohiro Kusumi metagroup < first_meta_bg)
815cfe60390STomohiro Kusumi return (ext2_cg_number_gdb_nometa(fs, cg));
816cfe60390STomohiro Kusumi
817cfe60390STomohiro Kusumi return ext2_cg_number_gdb_meta(fs, cg);
818cfe60390STomohiro Kusumi }
819cfe60390STomohiro Kusumi
820cfe60390STomohiro Kusumi static int
ext2_number_base_meta_blocks(struct m_ext2fs * fs,int cg)821cfe60390STomohiro Kusumi ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg)
822cfe60390STomohiro Kusumi {
823cfe60390STomohiro Kusumi int number;
824cfe60390STomohiro Kusumi
825cfe60390STomohiro Kusumi number = ext2_cg_has_sb(fs, cg);
826cfe60390STomohiro Kusumi
827cfe60390STomohiro Kusumi if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
828cfe60390STomohiro Kusumi cg < le32toh(fs->e2fs->e3fs_first_meta_bg) *
829cfe60390STomohiro Kusumi EXT2_DESCS_PER_BLOCK(fs)) {
830cfe60390STomohiro Kusumi if (number) {
831cfe60390STomohiro Kusumi number += ext2_cg_number_gdb(fs, cg);
832cfe60390STomohiro Kusumi number += le16toh(fs->e2fs->e2fs_reserved_ngdb);
833cfe60390STomohiro Kusumi }
834cfe60390STomohiro Kusumi } else {
835cfe60390STomohiro Kusumi number += ext2_cg_number_gdb(fs, cg);
836cfe60390STomohiro Kusumi }
837cfe60390STomohiro Kusumi
838cfe60390STomohiro Kusumi return (number);
839cfe60390STomohiro Kusumi }
840cfe60390STomohiro Kusumi
841cfe60390STomohiro Kusumi static void
ext2_mark_bitmap_end(int start_bit,int end_bit,char * bitmap)842cfe60390STomohiro Kusumi ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
843cfe60390STomohiro Kusumi {
844cfe60390STomohiro Kusumi int i;
845cfe60390STomohiro Kusumi
846cfe60390STomohiro Kusumi if (start_bit >= end_bit)
847cfe60390STomohiro Kusumi return;
848cfe60390STomohiro Kusumi
849cfe60390STomohiro Kusumi for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
850cfe60390STomohiro Kusumi setbit(bitmap, i);
851cfe60390STomohiro Kusumi if (i < end_bit)
852cfe60390STomohiro Kusumi memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
853cfe60390STomohiro Kusumi }
854cfe60390STomohiro Kusumi
855cfe60390STomohiro Kusumi static int
ext2_get_group_number(struct m_ext2fs * fs,e4fs_daddr_t block)856cfe60390STomohiro Kusumi ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block)
857cfe60390STomohiro Kusumi {
858cfe60390STomohiro Kusumi
859cfe60390STomohiro Kusumi return ((block - le32toh(fs->e2fs->e2fs_first_dblock)) /
860cfe60390STomohiro Kusumi fs->e2fs_bsize);
861cfe60390STomohiro Kusumi }
862cfe60390STomohiro Kusumi
863cfe60390STomohiro Kusumi static int
ext2_block_in_group(struct m_ext2fs * fs,e4fs_daddr_t block,int cg)864cfe60390STomohiro Kusumi ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg)
865cfe60390STomohiro Kusumi {
866cfe60390STomohiro Kusumi
867cfe60390STomohiro Kusumi return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0);
868cfe60390STomohiro Kusumi }
869cfe60390STomohiro Kusumi
870cfe60390STomohiro Kusumi static int
ext2_cg_block_bitmap_init(struct m_ext2fs * fs,int cg,struct buf * bp)871cfe60390STomohiro Kusumi ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp)
872cfe60390STomohiro Kusumi {
873cfe60390STomohiro Kusumi int bit, bit_max, inodes_per_block;
874cfe60390STomohiro Kusumi uint64_t start, tmp;
875cfe60390STomohiro Kusumi
876cfe60390STomohiro Kusumi if (!(le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_BLOCK_UNINIT))
877cfe60390STomohiro Kusumi return (0);
878cfe60390STomohiro Kusumi
879cfe60390STomohiro Kusumi memset(bp->b_data, 0, fs->e2fs_bsize);
880cfe60390STomohiro Kusumi
881cfe60390STomohiro Kusumi bit_max = ext2_number_base_meta_blocks(fs, cg);
882cfe60390STomohiro Kusumi if ((bit_max >> 3) >= fs->e2fs_bsize)
883cfe60390STomohiro Kusumi return (EINVAL);
884cfe60390STomohiro Kusumi
885cfe60390STomohiro Kusumi for (bit = 0; bit < bit_max; bit++)
886cfe60390STomohiro Kusumi setbit(bp->b_data, bit);
887cfe60390STomohiro Kusumi
888cfe60390STomohiro Kusumi start = (uint64_t)cg * fs->e2fs_bpg +
889cfe60390STomohiro Kusumi le32toh(fs->e2fs->e2fs_first_dblock);
890cfe60390STomohiro Kusumi
891cfe60390STomohiro Kusumi /* Set bits for block and inode bitmaps, and inode table. */
892cfe60390STomohiro Kusumi tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]);
893cfe60390STomohiro Kusumi if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
894cfe60390STomohiro Kusumi ext2_block_in_group(fs, tmp, cg))
895cfe60390STomohiro Kusumi setbit(bp->b_data, tmp - start);
896cfe60390STomohiro Kusumi
897cfe60390STomohiro Kusumi tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]);
898cfe60390STomohiro Kusumi if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
899cfe60390STomohiro Kusumi ext2_block_in_group(fs, tmp, cg))
900cfe60390STomohiro Kusumi setbit(bp->b_data, tmp - start);
901cfe60390STomohiro Kusumi
902cfe60390STomohiro Kusumi tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]);
903cfe60390STomohiro Kusumi inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs);
904cfe60390STomohiro Kusumi while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) +
905cfe60390STomohiro Kusumi fs->e2fs_ipg / inodes_per_block ) {
906cfe60390STomohiro Kusumi if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
907cfe60390STomohiro Kusumi ext2_block_in_group(fs, tmp, cg))
908cfe60390STomohiro Kusumi setbit(bp->b_data, tmp - start);
909cfe60390STomohiro Kusumi tmp++;
910cfe60390STomohiro Kusumi }
911cfe60390STomohiro Kusumi
912cfe60390STomohiro Kusumi /*
913cfe60390STomohiro Kusumi * Also if the number of blocks within the group is less than
914cfe60390STomohiro Kusumi * the blocksize * 8 ( which is the size of bitmap ), set rest
915cfe60390STomohiro Kusumi * of the block bitmap to 1
916cfe60390STomohiro Kusumi */
917cfe60390STomohiro Kusumi ext2_mark_bitmap_end(fs->e2fs_bpg, fs->e2fs_bsize * 8,
918cfe60390STomohiro Kusumi bp->b_data);
919cfe60390STomohiro Kusumi
920cfe60390STomohiro Kusumi /* Clean the flag */
921cfe60390STomohiro Kusumi fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh(
922cfe60390STomohiro Kusumi fs->e2fs_gd[cg].ext4bgd_flags) & ~EXT2_BG_BLOCK_UNINIT);
923cfe60390STomohiro Kusumi
924cfe60390STomohiro Kusumi return (0);
925cfe60390STomohiro Kusumi }
926cfe60390STomohiro Kusumi
927cfe60390STomohiro Kusumi static int
ext2_b_bitmap_validate(struct m_ext2fs * fs,struct buf * bp,int cg)928cfe60390STomohiro Kusumi ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg)
929cfe60390STomohiro Kusumi {
930cfe60390STomohiro Kusumi struct ext2_gd *gd;
931cfe60390STomohiro Kusumi uint64_t group_first_block;
932cfe60390STomohiro Kusumi unsigned int offset, max_bit;
933cfe60390STomohiro Kusumi
934cfe60390STomohiro Kusumi if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) {
935cfe60390STomohiro Kusumi /*
936cfe60390STomohiro Kusumi * It is not possible to check block bitmap in case of this
937cfe60390STomohiro Kusumi * feature, because the inode and block bitmaps and inode table
938cfe60390STomohiro Kusumi * blocks may not be in the group at all.
939cfe60390STomohiro Kusumi * So, skip check in this case.
940cfe60390STomohiro Kusumi */
941cfe60390STomohiro Kusumi return (0);
942cfe60390STomohiro Kusumi }
943cfe60390STomohiro Kusumi
944cfe60390STomohiro Kusumi gd = &fs->e2fs_gd[cg];
945cfe60390STomohiro Kusumi max_bit = fs->e2fs_fpg;
946cfe60390STomohiro Kusumi group_first_block = ((uint64_t)cg) * fs->e2fs_fpg +
947cfe60390STomohiro Kusumi le32toh(fs->e2fs->e2fs_first_dblock);
948cfe60390STomohiro Kusumi
949cfe60390STomohiro Kusumi /* Check block bitmap block number */
950cfe60390STomohiro Kusumi offset = e2fs_gd_get_b_bitmap(gd) - group_first_block;
951cfe60390STomohiro Kusumi if (offset >= max_bit || !isset(bp->b_data, offset)) {
952cfe60390STomohiro Kusumi SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error,
953cfe60390STomohiro Kusumi "bad block bitmap, group", cg);
954cfe60390STomohiro Kusumi return (EINVAL);
955cfe60390STomohiro Kusumi }
956cfe60390STomohiro Kusumi
957cfe60390STomohiro Kusumi /* Check inode bitmap block number */
958cfe60390STomohiro Kusumi offset = e2fs_gd_get_i_bitmap(gd) - group_first_block;
959cfe60390STomohiro Kusumi if (offset >= max_bit || !isset(bp->b_data, offset)) {
960cfe60390STomohiro Kusumi SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error,
961cfe60390STomohiro Kusumi "bad inode bitmap", cg);
962cfe60390STomohiro Kusumi return (EINVAL);
963cfe60390STomohiro Kusumi }
964cfe60390STomohiro Kusumi
965cfe60390STomohiro Kusumi /* Check inode table */
966cfe60390STomohiro Kusumi offset = e2fs_gd_get_i_tables(gd) - group_first_block;
967cfe60390STomohiro Kusumi if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) {
968cfe60390STomohiro Kusumi SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error,
969cfe60390STomohiro Kusumi "bad inode table, group", cg);
970cfe60390STomohiro Kusumi return (EINVAL);
971cfe60390STomohiro Kusumi }
972cfe60390STomohiro Kusumi
973cfe60390STomohiro Kusumi return (0);
974cfe60390STomohiro Kusumi }
975cfe60390STomohiro Kusumi
976cfe60390STomohiro Kusumi /*
977cfe60390STomohiro Kusumi * Determine whether a block can be allocated.
978cfe60390STomohiro Kusumi *
979cfe60390STomohiro Kusumi * Check to see if a block of the appropriate size is available,
980cfe60390STomohiro Kusumi * and if it is, allocate it.
981cfe60390STomohiro Kusumi */
982cfe60390STomohiro Kusumi static daddr_t
ext2_alloccg(struct inode * ip,int cg,daddr_t bpref,int size)983cfe60390STomohiro Kusumi ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
984cfe60390STomohiro Kusumi {
985cfe60390STomohiro Kusumi struct m_ext2fs *fs;
986cfe60390STomohiro Kusumi struct buf *bp;
987cfe60390STomohiro Kusumi struct ext2mount *ump;
988cfe60390STomohiro Kusumi daddr_t bno, runstart, runlen;
989cfe60390STomohiro Kusumi int bit, loc, end, error, start;
990cfe60390STomohiro Kusumi char *bbp;
991cfe60390STomohiro Kusumi /* XXX ondisk32 */
992cfe60390STomohiro Kusumi fs = ip->i_e2fs;
993cfe60390STomohiro Kusumi ump = ip->i_ump;
994cfe60390STomohiro Kusumi if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0)
995cfe60390STomohiro Kusumi return (0);
996cfe60390STomohiro Kusumi
997cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
998e5b38eb5STomohiro Kusumi error = bread(ip->i_devvp, fsbtodoff(fs,
999cfe60390STomohiro Kusumi e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])),
1000cfe60390STomohiro Kusumi (int)fs->e2fs_bsize, &bp);
1001cfe60390STomohiro Kusumi if (error)
1002cfe60390STomohiro Kusumi goto fail;
1003cfe60390STomohiro Kusumi
1004cfe60390STomohiro Kusumi if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1005cfe60390STomohiro Kusumi EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
1006cfe60390STomohiro Kusumi error = ext2_cg_block_bitmap_init(fs, cg, bp);
1007cfe60390STomohiro Kusumi if (error)
1008cfe60390STomohiro Kusumi goto fail;
1009cfe60390STomohiro Kusumi
1010cfe60390STomohiro Kusumi ext2_gd_b_bitmap_csum_set(fs, cg, bp);
1011cfe60390STomohiro Kusumi }
1012cfe60390STomohiro Kusumi error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp);
1013cfe60390STomohiro Kusumi if (error)
1014cfe60390STomohiro Kusumi goto fail;
1015cfe60390STomohiro Kusumi
1016cfe60390STomohiro Kusumi error = ext2_b_bitmap_validate(fs,bp, cg);
1017cfe60390STomohiro Kusumi if (error)
1018cfe60390STomohiro Kusumi goto fail;
1019cfe60390STomohiro Kusumi
1020cfe60390STomohiro Kusumi /*
1021cfe60390STomohiro Kusumi * Check, that another thread did not not allocate the last block in
1022cfe60390STomohiro Kusumi * this group while we were waiting for the buffer.
1023cfe60390STomohiro Kusumi */
1024cfe60390STomohiro Kusumi if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0)
1025cfe60390STomohiro Kusumi goto fail;
1026cfe60390STomohiro Kusumi
1027cfe60390STomohiro Kusumi bbp = (char *)bp->b_data;
1028cfe60390STomohiro Kusumi
1029cfe60390STomohiro Kusumi if (dtog(fs, bpref) != cg)
1030cfe60390STomohiro Kusumi bpref = 0;
1031cfe60390STomohiro Kusumi if (bpref != 0) {
1032cfe60390STomohiro Kusumi bpref = dtogd(fs, bpref);
1033cfe60390STomohiro Kusumi /*
1034cfe60390STomohiro Kusumi * if the requested block is available, use it
1035cfe60390STomohiro Kusumi */
1036cfe60390STomohiro Kusumi if (isclr(bbp, bpref)) {
1037cfe60390STomohiro Kusumi bno = bpref;
1038cfe60390STomohiro Kusumi goto gotit;
1039cfe60390STomohiro Kusumi }
1040cfe60390STomohiro Kusumi }
1041cfe60390STomohiro Kusumi /*
1042cfe60390STomohiro Kusumi * no blocks in the requested cylinder, so take next
1043cfe60390STomohiro Kusumi * available one in this cylinder group.
1044cfe60390STomohiro Kusumi * first try to get 8 contigous blocks, then fall back to a single
1045cfe60390STomohiro Kusumi * block.
1046cfe60390STomohiro Kusumi */
1047cfe60390STomohiro Kusumi if (bpref)
1048cfe60390STomohiro Kusumi start = dtogd(fs, bpref) / NBBY;
1049cfe60390STomohiro Kusumi else
1050cfe60390STomohiro Kusumi start = 0;
1051cfe60390STomohiro Kusumi end = howmany(fs->e2fs_fpg, NBBY) - start;
1052cfe60390STomohiro Kusumi retry:
1053cfe60390STomohiro Kusumi runlen = 0;
1054cfe60390STomohiro Kusumi runstart = 0;
1055cfe60390STomohiro Kusumi for (loc = start; loc < end; loc++) {
1056cfe60390STomohiro Kusumi if (bbp[loc] == (char)0xff) {
1057cfe60390STomohiro Kusumi runlen = 0;
1058cfe60390STomohiro Kusumi continue;
1059cfe60390STomohiro Kusumi }
1060cfe60390STomohiro Kusumi
1061cfe60390STomohiro Kusumi /* Start of a run, find the number of high clear bits. */
1062cfe60390STomohiro Kusumi if (runlen == 0) {
1063cfe60390STomohiro Kusumi bit = fls(bbp[loc]);
1064cfe60390STomohiro Kusumi runlen = NBBY - bit;
1065cfe60390STomohiro Kusumi runstart = loc * NBBY + bit;
1066cfe60390STomohiro Kusumi } else if (bbp[loc] == 0) {
1067cfe60390STomohiro Kusumi /* Continue a run. */
1068cfe60390STomohiro Kusumi runlen += NBBY;
1069cfe60390STomohiro Kusumi } else {
1070cfe60390STomohiro Kusumi /*
1071cfe60390STomohiro Kusumi * Finish the current run. If it isn't long
1072cfe60390STomohiro Kusumi * enough, start a new one.
1073cfe60390STomohiro Kusumi */
1074cfe60390STomohiro Kusumi bit = ffs(bbp[loc]) - 1;
1075cfe60390STomohiro Kusumi runlen += bit;
1076cfe60390STomohiro Kusumi if (runlen >= 8) {
1077cfe60390STomohiro Kusumi bno = runstart;
1078cfe60390STomohiro Kusumi goto gotit;
1079cfe60390STomohiro Kusumi }
1080cfe60390STomohiro Kusumi
1081cfe60390STomohiro Kusumi /* Run was too short, start a new one. */
1082cfe60390STomohiro Kusumi bit = fls(bbp[loc]);
1083cfe60390STomohiro Kusumi runlen = NBBY - bit;
1084cfe60390STomohiro Kusumi runstart = loc * NBBY + bit;
1085cfe60390STomohiro Kusumi }
1086cfe60390STomohiro Kusumi
1087cfe60390STomohiro Kusumi /* If the current run is long enough, use it. */
1088cfe60390STomohiro Kusumi if (runlen >= 8) {
1089cfe60390STomohiro Kusumi bno = runstart;
1090cfe60390STomohiro Kusumi goto gotit;
1091cfe60390STomohiro Kusumi }
1092cfe60390STomohiro Kusumi }
1093cfe60390STomohiro Kusumi if (start != 0) {
1094cfe60390STomohiro Kusumi end = start;
1095cfe60390STomohiro Kusumi start = 0;
1096cfe60390STomohiro Kusumi goto retry;
1097cfe60390STomohiro Kusumi }
1098cfe60390STomohiro Kusumi bno = ext2_mapsearch(fs, bbp, bpref);
1099cfe60390STomohiro Kusumi if (bno < 0)
1100cfe60390STomohiro Kusumi goto fail;
1101cfe60390STomohiro Kusumi
1102cfe60390STomohiro Kusumi gotit:
1103cfe60390STomohiro Kusumi #ifdef INVARIANTS
1104cfe60390STomohiro Kusumi if (isset(bbp, bno)) {
1105cfe60390STomohiro Kusumi printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n",
1106cfe60390STomohiro Kusumi cg, (intmax_t)bno, fs->e2fs_fsmnt);
1107cfe60390STomohiro Kusumi panic("ext2fs_alloccg: dup alloc");
1108cfe60390STomohiro Kusumi }
1109cfe60390STomohiro Kusumi #endif
1110cfe60390STomohiro Kusumi setbit(bbp, bno);
1111cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1112cfe60390STomohiro Kusumi ext2_clusteracct(fs, bbp, cg, bno, -1);
1113cfe60390STomohiro Kusumi fs->e2fs_fbcount--;
1114cfe60390STomohiro Kusumi e2fs_gd_set_nbfree(&fs->e2fs_gd[cg],
1115cfe60390STomohiro Kusumi e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1);
1116cfe60390STomohiro Kusumi fs->e2fs_fmod = 1;
1117cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
1118cfe60390STomohiro Kusumi ext2_gd_b_bitmap_csum_set(fs, cg, bp);
1119cfe60390STomohiro Kusumi bdwrite(bp);
1120cfe60390STomohiro Kusumi return (((uint64_t)cg) * fs->e2fs_fpg +
1121cfe60390STomohiro Kusumi le32toh(fs->e2fs->e2fs_first_dblock) + bno);
1122cfe60390STomohiro Kusumi
1123cfe60390STomohiro Kusumi fail:
1124e5b38eb5STomohiro Kusumi brelse(bp);
1125cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1126cfe60390STomohiro Kusumi return (0);
1127cfe60390STomohiro Kusumi }
1128cfe60390STomohiro Kusumi
1129cfe60390STomohiro Kusumi /*
1130cfe60390STomohiro Kusumi * Determine whether a cluster can be allocated.
1131cfe60390STomohiro Kusumi */
1132cfe60390STomohiro Kusumi static daddr_t
ext2_clusteralloc(struct inode * ip,int cg,daddr_t bpref,int len)1133cfe60390STomohiro Kusumi ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
1134cfe60390STomohiro Kusumi {
1135cfe60390STomohiro Kusumi struct m_ext2fs *fs;
1136cfe60390STomohiro Kusumi struct ext2mount *ump;
1137cfe60390STomohiro Kusumi struct buf *bp;
1138cfe60390STomohiro Kusumi char *bbp;
1139cfe60390STomohiro Kusumi int bit, error, got, i, loc, run;
1140cfe60390STomohiro Kusumi int32_t *lp;
1141cfe60390STomohiro Kusumi daddr_t bno;
1142cfe60390STomohiro Kusumi
1143cfe60390STomohiro Kusumi fs = ip->i_e2fs;
1144cfe60390STomohiro Kusumi ump = ip->i_ump;
1145cfe60390STomohiro Kusumi
1146cfe60390STomohiro Kusumi if (fs->e2fs_maxcluster[cg] < len)
1147cfe60390STomohiro Kusumi return (0);
1148cfe60390STomohiro Kusumi
1149cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
1150e5b38eb5STomohiro Kusumi error = bread(ip->i_devvp,
1151cfe60390STomohiro Kusumi fsbtodoff(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])),
1152cfe60390STomohiro Kusumi (int)fs->e2fs_bsize, &bp);
1153cfe60390STomohiro Kusumi if (error)
1154cfe60390STomohiro Kusumi goto fail_lock;
1155cfe60390STomohiro Kusumi
1156cfe60390STomohiro Kusumi bbp = (char *)bp->b_data;
1157cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1158cfe60390STomohiro Kusumi /*
1159cfe60390STomohiro Kusumi * Check to see if a cluster of the needed size (or bigger) is
1160cfe60390STomohiro Kusumi * available in this cylinder group.
1161cfe60390STomohiro Kusumi */
1162cfe60390STomohiro Kusumi lp = &fs->e2fs_clustersum[cg].cs_sum[len];
1163cfe60390STomohiro Kusumi for (i = len; i <= fs->e2fs_contigsumsize; i++)
1164cfe60390STomohiro Kusumi if (*lp++ > 0)
1165cfe60390STomohiro Kusumi break;
1166cfe60390STomohiro Kusumi if (i > fs->e2fs_contigsumsize) {
1167cfe60390STomohiro Kusumi /*
1168cfe60390STomohiro Kusumi * Update the cluster summary information to reflect
1169cfe60390STomohiro Kusumi * the true maximum-sized cluster so that future cluster
1170cfe60390STomohiro Kusumi * allocation requests can avoid reading the bitmap only
1171cfe60390STomohiro Kusumi * to find no cluster.
1172cfe60390STomohiro Kusumi */
1173cfe60390STomohiro Kusumi lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
1174cfe60390STomohiro Kusumi for (i = len - 1; i > 0; i--)
1175cfe60390STomohiro Kusumi if (*lp-- > 0)
1176cfe60390STomohiro Kusumi break;
1177cfe60390STomohiro Kusumi fs->e2fs_maxcluster[cg] = i;
1178cfe60390STomohiro Kusumi goto fail;
1179cfe60390STomohiro Kusumi }
1180cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
1181cfe60390STomohiro Kusumi
1182cfe60390STomohiro Kusumi /* Search the bitmap to find a big enough cluster like in FFS. */
1183cfe60390STomohiro Kusumi if (dtog(fs, bpref) != cg)
1184cfe60390STomohiro Kusumi bpref = 0;
1185cfe60390STomohiro Kusumi if (bpref != 0)
1186cfe60390STomohiro Kusumi bpref = dtogd(fs, bpref);
1187cfe60390STomohiro Kusumi loc = bpref / NBBY;
1188cfe60390STomohiro Kusumi bit = 1 << (bpref % NBBY);
1189cfe60390STomohiro Kusumi for (run = 0, got = bpref; got < fs->e2fs_fpg; got++) {
1190cfe60390STomohiro Kusumi if ((bbp[loc] & bit) != 0)
1191cfe60390STomohiro Kusumi run = 0;
1192cfe60390STomohiro Kusumi else {
1193cfe60390STomohiro Kusumi run++;
1194cfe60390STomohiro Kusumi if (run == len)
1195cfe60390STomohiro Kusumi break;
1196cfe60390STomohiro Kusumi }
1197cfe60390STomohiro Kusumi if ((got & (NBBY - 1)) != (NBBY - 1))
1198cfe60390STomohiro Kusumi bit <<= 1;
1199cfe60390STomohiro Kusumi else {
1200cfe60390STomohiro Kusumi loc++;
1201cfe60390STomohiro Kusumi bit = 1;
1202cfe60390STomohiro Kusumi }
1203cfe60390STomohiro Kusumi }
1204cfe60390STomohiro Kusumi
1205cfe60390STomohiro Kusumi if (got >= fs->e2fs_fpg)
1206cfe60390STomohiro Kusumi goto fail_lock;
1207cfe60390STomohiro Kusumi
1208cfe60390STomohiro Kusumi /* Allocate the cluster that we found. */
1209cfe60390STomohiro Kusumi for (i = 1; i < len; i++)
1210cfe60390STomohiro Kusumi if (!isclr(bbp, got - run + i))
1211cfe60390STomohiro Kusumi panic("ext2_clusteralloc: map mismatch");
1212cfe60390STomohiro Kusumi
1213cfe60390STomohiro Kusumi bno = got - run + 1;
1214cfe60390STomohiro Kusumi if (bno >= fs->e2fs_fpg)
1215cfe60390STomohiro Kusumi panic("ext2_clusteralloc: allocated out of group");
1216cfe60390STomohiro Kusumi
1217cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1218cfe60390STomohiro Kusumi for (i = 0; i < len; i += fs->e2fs_fpb) {
1219cfe60390STomohiro Kusumi setbit(bbp, bno + i);
1220cfe60390STomohiro Kusumi ext2_clusteracct(fs, bbp, cg, bno + i, -1);
1221cfe60390STomohiro Kusumi fs->e2fs_fbcount--;
1222cfe60390STomohiro Kusumi e2fs_gd_set_nbfree(&fs->e2fs_gd[cg],
1223cfe60390STomohiro Kusumi e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1);
1224cfe60390STomohiro Kusumi }
1225cfe60390STomohiro Kusumi fs->e2fs_fmod = 1;
1226cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
1227cfe60390STomohiro Kusumi
1228cfe60390STomohiro Kusumi bdwrite(bp);
1229cfe60390STomohiro Kusumi return (cg * fs->e2fs_fpg + le32toh(fs->e2fs->e2fs_first_dblock)
1230cfe60390STomohiro Kusumi + bno);
1231cfe60390STomohiro Kusumi
1232cfe60390STomohiro Kusumi fail_lock:
1233cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1234cfe60390STomohiro Kusumi fail:
1235e5b38eb5STomohiro Kusumi brelse(bp);
1236cfe60390STomohiro Kusumi return (0);
1237cfe60390STomohiro Kusumi }
1238cfe60390STomohiro Kusumi
1239cfe60390STomohiro Kusumi static int
ext2_zero_inode_table(struct inode * ip,int cg)1240cfe60390STomohiro Kusumi ext2_zero_inode_table(struct inode *ip, int cg)
1241cfe60390STomohiro Kusumi {
1242cfe60390STomohiro Kusumi struct m_ext2fs *fs;
1243cfe60390STomohiro Kusumi struct buf *bp;
1244cfe60390STomohiro Kusumi int i, all_blks, used_blks;
1245cfe60390STomohiro Kusumi
1246cfe60390STomohiro Kusumi fs = ip->i_e2fs;
1247cfe60390STomohiro Kusumi
1248cfe60390STomohiro Kusumi if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_INODE_ZEROED)
1249cfe60390STomohiro Kusumi return (0);
1250cfe60390STomohiro Kusumi
1251cfe60390STomohiro Kusumi all_blks = le16toh(fs->e2fs->e2fs_inode_size) * fs->e2fs_ipg /
1252cfe60390STomohiro Kusumi fs->e2fs_bsize;
1253cfe60390STomohiro Kusumi
1254cfe60390STomohiro Kusumi used_blks = howmany(fs->e2fs_ipg -
1255cfe60390STomohiro Kusumi e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]),
1256cfe60390STomohiro Kusumi fs->e2fs_bsize / EXT2_INODE_SIZE(fs));
1257cfe60390STomohiro Kusumi
1258cfe60390STomohiro Kusumi for (i = 0; i < all_blks - used_blks; i++) {
1259cfe60390STomohiro Kusumi bp = getblk(ip->i_devvp, fsbtodoff(fs,
1260cfe60390STomohiro Kusumi e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i),
1261cfe60390STomohiro Kusumi fs->e2fs_bsize, 0, 0);
1262cfe60390STomohiro Kusumi if (!bp)
1263cfe60390STomohiro Kusumi return (EIO);
1264cfe60390STomohiro Kusumi
1265cfe60390STomohiro Kusumi vfs_bio_clrbuf(bp);
1266cfe60390STomohiro Kusumi bawrite(bp);
1267cfe60390STomohiro Kusumi }
1268cfe60390STomohiro Kusumi
1269cfe60390STomohiro Kusumi fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh(
1270cfe60390STomohiro Kusumi fs->e2fs_gd[cg].ext4bgd_flags) | EXT2_BG_INODE_ZEROED);
1271cfe60390STomohiro Kusumi
1272cfe60390STomohiro Kusumi return (0);
1273cfe60390STomohiro Kusumi }
1274cfe60390STomohiro Kusumi
1275cfe60390STomohiro Kusumi static void
ext2_fix_bitmap_tail(unsigned char * bitmap,int first,int last)1276cfe60390STomohiro Kusumi ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last)
1277cfe60390STomohiro Kusumi {
1278cfe60390STomohiro Kusumi int i;
1279cfe60390STomohiro Kusumi
1280cfe60390STomohiro Kusumi for (i = first; i <= last; i++)
1281cfe60390STomohiro Kusumi bitmap[i] = 0xff;
1282cfe60390STomohiro Kusumi }
1283cfe60390STomohiro Kusumi
1284cfe60390STomohiro Kusumi /*
1285cfe60390STomohiro Kusumi * Determine whether an inode can be allocated.
1286cfe60390STomohiro Kusumi *
1287cfe60390STomohiro Kusumi * Check to see if an inode is available, and if it is,
1288cfe60390STomohiro Kusumi * allocate it using tode in the specified cylinder group.
1289cfe60390STomohiro Kusumi */
1290cfe60390STomohiro Kusumi static daddr_t
ext2_nodealloccg(struct inode * ip,int cg,daddr_t ipref,int mode)1291cfe60390STomohiro Kusumi ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
1292cfe60390STomohiro Kusumi {
1293cfe60390STomohiro Kusumi struct m_ext2fs *fs;
1294cfe60390STomohiro Kusumi struct buf *bp;
1295cfe60390STomohiro Kusumi struct ext2mount *ump;
1296cfe60390STomohiro Kusumi int error, start, len, ifree, ibytes;
1297cfe60390STomohiro Kusumi char *ibp, *loc;
1298cfe60390STomohiro Kusumi
1299cfe60390STomohiro Kusumi ipref--; /* to avoid a lot of (ipref -1) */
1300cfe60390STomohiro Kusumi if (ipref == -1)
1301cfe60390STomohiro Kusumi ipref = 0;
1302cfe60390STomohiro Kusumi fs = ip->i_e2fs;
1303cfe60390STomohiro Kusumi ump = ip->i_ump;
1304cfe60390STomohiro Kusumi if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0)
1305cfe60390STomohiro Kusumi return (0);
1306cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
1307e5b38eb5STomohiro Kusumi error = bread(ip->i_devvp, fsbtodoff(fs,
1308cfe60390STomohiro Kusumi e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])),
1309cfe60390STomohiro Kusumi (int)fs->e2fs_bsize, &bp);
1310cfe60390STomohiro Kusumi if (error) {
1311e5b38eb5STomohiro Kusumi brelse(bp);
1312cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1313cfe60390STomohiro Kusumi return (0);
1314cfe60390STomohiro Kusumi }
1315cfe60390STomohiro Kusumi if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1316cfe60390STomohiro Kusumi EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
1317cfe60390STomohiro Kusumi if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) &
1318cfe60390STomohiro Kusumi EXT2_BG_INODE_UNINIT) {
1319cfe60390STomohiro Kusumi ibytes = fs->e2fs_ipg / 8;
1320cfe60390STomohiro Kusumi memset(bp->b_data, 0, ibytes - 1);
1321cfe60390STomohiro Kusumi ext2_fix_bitmap_tail(bp->b_data, ibytes,
1322cfe60390STomohiro Kusumi fs->e2fs_bsize - 1);
1323cfe60390STomohiro Kusumi fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh(
1324cfe60390STomohiro Kusumi fs->e2fs_gd[cg].ext4bgd_flags) &
1325cfe60390STomohiro Kusumi ~EXT2_BG_INODE_UNINIT);
1326cfe60390STomohiro Kusumi }
1327cfe60390STomohiro Kusumi ext2_gd_i_bitmap_csum_set(fs, cg, bp);
1328cfe60390STomohiro Kusumi error = ext2_zero_inode_table(ip, cg);
1329cfe60390STomohiro Kusumi if (error) {
1330e5b38eb5STomohiro Kusumi brelse(bp);
1331cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1332cfe60390STomohiro Kusumi return (0);
1333cfe60390STomohiro Kusumi }
1334cfe60390STomohiro Kusumi }
1335cfe60390STomohiro Kusumi error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp);
1336cfe60390STomohiro Kusumi if (error) {
1337e5b38eb5STomohiro Kusumi brelse(bp);
1338cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1339cfe60390STomohiro Kusumi return (0);
1340cfe60390STomohiro Kusumi }
1341cfe60390STomohiro Kusumi if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) {
1342cfe60390STomohiro Kusumi /*
1343cfe60390STomohiro Kusumi * Another thread allocated the last i-node in this
1344cfe60390STomohiro Kusumi * group while we were waiting for the buffer.
1345cfe60390STomohiro Kusumi */
1346e5b38eb5STomohiro Kusumi brelse(bp);
1347cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1348cfe60390STomohiro Kusumi return (0);
1349cfe60390STomohiro Kusumi }
1350cfe60390STomohiro Kusumi ibp = (char *)bp->b_data;
1351cfe60390STomohiro Kusumi if (ipref) {
1352cfe60390STomohiro Kusumi ipref %= fs->e2fs_ipg;
1353cfe60390STomohiro Kusumi if (isclr(ibp, ipref))
1354cfe60390STomohiro Kusumi goto gotit;
1355cfe60390STomohiro Kusumi }
1356cfe60390STomohiro Kusumi start = ipref / NBBY;
1357cfe60390STomohiro Kusumi len = howmany(fs->e2fs_ipg - ipref, NBBY);
1358cfe60390STomohiro Kusumi loc = memcchr(&ibp[start], 0xff, len);
1359cfe60390STomohiro Kusumi if (loc == NULL) {
1360cfe60390STomohiro Kusumi len = start + 1;
1361cfe60390STomohiro Kusumi start = 0;
1362cfe60390STomohiro Kusumi loc = memcchr(&ibp[start], 0xff, len);
1363cfe60390STomohiro Kusumi if (loc == NULL) {
1364cfe60390STomohiro Kusumi SDT_PROBE3(ext2fs, , alloc,
1365cfe60390STomohiro Kusumi ext2_nodealloccg_bmap_corrupted, cg, ipref,
1366cfe60390STomohiro Kusumi fs->e2fs_fsmnt);
1367e5b38eb5STomohiro Kusumi brelse(bp);
1368cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1369cfe60390STomohiro Kusumi return (0);
1370cfe60390STomohiro Kusumi }
1371cfe60390STomohiro Kusumi }
1372cfe60390STomohiro Kusumi ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1;
1373cfe60390STomohiro Kusumi gotit:
1374cfe60390STomohiro Kusumi setbit(ibp, ipref);
1375cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1376cfe60390STomohiro Kusumi e2fs_gd_set_nifree(&fs->e2fs_gd[cg],
1377cfe60390STomohiro Kusumi e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1);
1378cfe60390STomohiro Kusumi if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
1379cfe60390STomohiro Kusumi EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
1380cfe60390STomohiro Kusumi ifree = fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]);
1381cfe60390STomohiro Kusumi if (ipref + 1 > ifree)
1382cfe60390STomohiro Kusumi e2fs_gd_set_i_unused(&fs->e2fs_gd[cg],
1383cfe60390STomohiro Kusumi fs->e2fs_ipg - (ipref + 1));
1384cfe60390STomohiro Kusumi }
1385cfe60390STomohiro Kusumi fs->e2fs_ficount--;
1386cfe60390STomohiro Kusumi fs->e2fs_fmod = 1;
1387cfe60390STomohiro Kusumi if ((mode & IFMT) == IFDIR) {
1388cfe60390STomohiro Kusumi e2fs_gd_set_ndirs(&fs->e2fs_gd[cg],
1389cfe60390STomohiro Kusumi e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1);
1390cfe60390STomohiro Kusumi fs->e2fs_total_dir++;
1391cfe60390STomohiro Kusumi }
1392cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
1393cfe60390STomohiro Kusumi ext2_gd_i_bitmap_csum_set(fs, cg, bp);
1394cfe60390STomohiro Kusumi bdwrite(bp);
1395cfe60390STomohiro Kusumi return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1);
1396cfe60390STomohiro Kusumi }
1397cfe60390STomohiro Kusumi
1398cfe60390STomohiro Kusumi /*
1399cfe60390STomohiro Kusumi * Free a block or fragment.
1400cfe60390STomohiro Kusumi *
1401cfe60390STomohiro Kusumi */
1402cfe60390STomohiro Kusumi void
ext2_blkfree(struct inode * ip,e4fs_daddr_t bno,long size)1403cfe60390STomohiro Kusumi ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size)
1404cfe60390STomohiro Kusumi {
1405cfe60390STomohiro Kusumi struct m_ext2fs *fs;
1406cfe60390STomohiro Kusumi struct buf *bp;
1407cfe60390STomohiro Kusumi struct ext2mount *ump;
1408cfe60390STomohiro Kusumi int cg, error;
1409cfe60390STomohiro Kusumi char *bbp;
1410cfe60390STomohiro Kusumi
1411cfe60390STomohiro Kusumi fs = ip->i_e2fs;
1412cfe60390STomohiro Kusumi ump = ip->i_ump;
1413cfe60390STomohiro Kusumi cg = dtog(fs, bno);
1414cfe60390STomohiro Kusumi if (bno >= fs->e2fs_bcount) {
1415cfe60390STomohiro Kusumi SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block,
1416cfe60390STomohiro Kusumi ip->i_number, bno);
1417cfe60390STomohiro Kusumi return;
1418cfe60390STomohiro Kusumi }
1419e5b38eb5STomohiro Kusumi error = bread(ip->i_devvp,
1420cfe60390STomohiro Kusumi fsbtodoff(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])),
1421cfe60390STomohiro Kusumi (int)fs->e2fs_bsize, &bp);
1422cfe60390STomohiro Kusumi if (error) {
1423e5b38eb5STomohiro Kusumi brelse(bp);
1424cfe60390STomohiro Kusumi return;
1425cfe60390STomohiro Kusumi }
1426cfe60390STomohiro Kusumi bbp = (char *)bp->b_data;
1427cfe60390STomohiro Kusumi bno = dtogd(fs, bno);
1428cfe60390STomohiro Kusumi if (isclr(bbp, bno)) {
1429cfe60390STomohiro Kusumi panic("ext2_blkfree: freeing free block %lld, fs=%s",
1430cfe60390STomohiro Kusumi (long long)bno, fs->e2fs_fsmnt);
1431cfe60390STomohiro Kusumi }
1432cfe60390STomohiro Kusumi clrbit(bbp, bno);
1433cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1434cfe60390STomohiro Kusumi ext2_clusteracct(fs, bbp, cg, bno, 1);
1435cfe60390STomohiro Kusumi fs->e2fs_fbcount++;
1436cfe60390STomohiro Kusumi e2fs_gd_set_nbfree(&fs->e2fs_gd[cg],
1437cfe60390STomohiro Kusumi e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1);
1438cfe60390STomohiro Kusumi fs->e2fs_fmod = 1;
1439cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
1440cfe60390STomohiro Kusumi ext2_gd_b_bitmap_csum_set(fs, cg, bp);
1441cfe60390STomohiro Kusumi bdwrite(bp);
1442cfe60390STomohiro Kusumi }
1443cfe60390STomohiro Kusumi
1444cfe60390STomohiro Kusumi /*
1445cfe60390STomohiro Kusumi * Free an inode.
1446cfe60390STomohiro Kusumi *
1447cfe60390STomohiro Kusumi */
1448cfe60390STomohiro Kusumi int
ext2_vfree(struct vnode * pvp,ino_t ino,int mode)1449cfe60390STomohiro Kusumi ext2_vfree(struct vnode *pvp, ino_t ino, int mode)
1450cfe60390STomohiro Kusumi {
1451cfe60390STomohiro Kusumi struct m_ext2fs *fs;
1452cfe60390STomohiro Kusumi struct inode *pip;
1453cfe60390STomohiro Kusumi struct buf *bp;
1454cfe60390STomohiro Kusumi struct ext2mount *ump;
1455cfe60390STomohiro Kusumi int error, cg;
1456cfe60390STomohiro Kusumi char *ibp;
1457cfe60390STomohiro Kusumi
1458cfe60390STomohiro Kusumi pip = VTOI(pvp);
1459cfe60390STomohiro Kusumi fs = pip->i_e2fs;
1460cfe60390STomohiro Kusumi ump = pip->i_ump;
1461cfe60390STomohiro Kusumi if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
1462cfe60390STomohiro Kusumi panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s",
1463cfe60390STomohiro Kusumi pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt);
1464cfe60390STomohiro Kusumi
1465cfe60390STomohiro Kusumi cg = ino_to_cg(fs, ino);
1466e5b38eb5STomohiro Kusumi error = bread(pip->i_devvp,
1467cfe60390STomohiro Kusumi fsbtodoff(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])),
1468cfe60390STomohiro Kusumi (int)fs->e2fs_bsize, &bp);
1469cfe60390STomohiro Kusumi if (error) {
1470e5b38eb5STomohiro Kusumi brelse(bp);
1471cfe60390STomohiro Kusumi return (0);
1472cfe60390STomohiro Kusumi }
1473cfe60390STomohiro Kusumi ibp = (char *)bp->b_data;
1474cfe60390STomohiro Kusumi ino = (ino - 1) % fs->e2fs_ipg;
1475cfe60390STomohiro Kusumi if (isclr(ibp, ino)) {
1476cfe60390STomohiro Kusumi SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree,
1477cfe60390STomohiro Kusumi fs->e2fs_fsmnt, ino);
1478cfe60390STomohiro Kusumi if (fs->e2fs_ronly == 0)
1479cfe60390STomohiro Kusumi panic("ext2_vfree: freeing free inode");
1480cfe60390STomohiro Kusumi }
1481cfe60390STomohiro Kusumi clrbit(ibp, ino);
1482cfe60390STomohiro Kusumi EXT2_LOCK(ump);
1483cfe60390STomohiro Kusumi fs->e2fs_ficount++;
1484cfe60390STomohiro Kusumi e2fs_gd_set_nifree(&fs->e2fs_gd[cg],
1485cfe60390STomohiro Kusumi e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1);
1486cfe60390STomohiro Kusumi if ((mode & IFMT) == IFDIR) {
1487cfe60390STomohiro Kusumi e2fs_gd_set_ndirs(&fs->e2fs_gd[cg],
1488cfe60390STomohiro Kusumi e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1);
1489cfe60390STomohiro Kusumi fs->e2fs_total_dir--;
1490cfe60390STomohiro Kusumi }
1491cfe60390STomohiro Kusumi fs->e2fs_fmod = 1;
1492cfe60390STomohiro Kusumi EXT2_UNLOCK(ump);
1493cfe60390STomohiro Kusumi ext2_gd_i_bitmap_csum_set(fs, cg, bp);
1494cfe60390STomohiro Kusumi bdwrite(bp);
1495cfe60390STomohiro Kusumi return (0);
1496cfe60390STomohiro Kusumi }
1497cfe60390STomohiro Kusumi
1498cfe60390STomohiro Kusumi /*
1499cfe60390STomohiro Kusumi * Find a block in the specified cylinder group.
1500cfe60390STomohiro Kusumi *
1501cfe60390STomohiro Kusumi * It is a panic if a request is made to find a block if none are
1502cfe60390STomohiro Kusumi * available.
1503cfe60390STomohiro Kusumi */
1504cfe60390STomohiro Kusumi static daddr_t
ext2_mapsearch(struct m_ext2fs * fs,char * bbp,daddr_t bpref)1505cfe60390STomohiro Kusumi ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
1506cfe60390STomohiro Kusumi {
1507cfe60390STomohiro Kusumi char *loc;
1508cfe60390STomohiro Kusumi int start, len;
1509cfe60390STomohiro Kusumi
1510cfe60390STomohiro Kusumi /*
1511cfe60390STomohiro Kusumi * find the fragment by searching through the free block
1512cfe60390STomohiro Kusumi * map for an appropriate bit pattern
1513cfe60390STomohiro Kusumi */
1514cfe60390STomohiro Kusumi if (bpref)
1515cfe60390STomohiro Kusumi start = dtogd(fs, bpref) / NBBY;
1516cfe60390STomohiro Kusumi else
1517cfe60390STomohiro Kusumi start = 0;
1518cfe60390STomohiro Kusumi len = howmany(fs->e2fs_fpg, NBBY) - start;
1519cfe60390STomohiro Kusumi loc = memcchr(&bbp[start], 0xff, len);
1520cfe60390STomohiro Kusumi if (loc == NULL) {
1521cfe60390STomohiro Kusumi len = start + 1;
1522cfe60390STomohiro Kusumi start = 0;
1523cfe60390STomohiro Kusumi loc = memcchr(&bbp[start], 0xff, len);
1524cfe60390STomohiro Kusumi if (loc == NULL) {
1525cfe60390STomohiro Kusumi panic("ext2_mapsearch: map corrupted: start=%d, len=%d,"
1526cfe60390STomohiro Kusumi "fs=%s", start, len, fs->e2fs_fsmnt);
1527cfe60390STomohiro Kusumi /* NOTREACHED */
1528cfe60390STomohiro Kusumi }
1529cfe60390STomohiro Kusumi }
1530cfe60390STomohiro Kusumi return ((loc - bbp) * NBBY + ffs(~*loc) - 1);
1531cfe60390STomohiro Kusumi }
1532cfe60390STomohiro Kusumi
1533cfe60390STomohiro Kusumi int
ext2_cg_has_sb(struct m_ext2fs * fs,int cg)1534cfe60390STomohiro Kusumi ext2_cg_has_sb(struct m_ext2fs *fs, int cg)
1535cfe60390STomohiro Kusumi {
1536cfe60390STomohiro Kusumi int a3, a5, a7;
1537cfe60390STomohiro Kusumi
1538cfe60390STomohiro Kusumi if (cg == 0)
1539cfe60390STomohiro Kusumi return (1);
1540cfe60390STomohiro Kusumi
1541cfe60390STomohiro Kusumi if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) {
1542cfe60390STomohiro Kusumi if (cg == le32toh(fs->e2fs->e4fs_backup_bgs[0]) ||
1543cfe60390STomohiro Kusumi cg == le32toh(fs->e2fs->e4fs_backup_bgs[1]))
1544cfe60390STomohiro Kusumi return (1);
1545cfe60390STomohiro Kusumi return (0);
1546cfe60390STomohiro Kusumi }
1547cfe60390STomohiro Kusumi
1548cfe60390STomohiro Kusumi if ((cg <= 1) ||
1549cfe60390STomohiro Kusumi !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER))
1550cfe60390STomohiro Kusumi return (1);
1551cfe60390STomohiro Kusumi
1552cfe60390STomohiro Kusumi if (!(cg & 1))
1553cfe60390STomohiro Kusumi return (0);
1554cfe60390STomohiro Kusumi
1555cfe60390STomohiro Kusumi for (a3 = 3, a5 = 5, a7 = 7;
1556cfe60390STomohiro Kusumi a3 <= cg || a5 <= cg || a7 <= cg;
1557cfe60390STomohiro Kusumi a3 *= 3, a5 *= 5, a7 *= 7)
1558cfe60390STomohiro Kusumi if (cg == a3 || cg == a5 || cg == a7)
1559cfe60390STomohiro Kusumi return (1);
1560cfe60390STomohiro Kusumi return (0);
1561cfe60390STomohiro Kusumi }
1562