1 /* $NetBSD: ufs_inode.c,v 1.112 2020/09/05 16:30:13 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)ufs_inode.c 8.9 (Berkeley) 5/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.112 2020/09/05 16:30:13 riastradh Exp $");
41
42 #if defined(_KERNEL_OPT)
43 #include "opt_ffs.h"
44 #include "opt_quota.h"
45 #include "opt_wapbl.h"
46 #include "opt_uvmhist.h"
47 #endif
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/mount.h>
54 #include <sys/kernel.h>
55 #include <sys/namei.h>
56 #include <sys/kauth.h>
57 #include <sys/wapbl.h>
58 #include <sys/kmem.h>
59
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufsmount.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_wapbl.h>
64 #ifdef UFS_DIRHASH
65 #include <ufs/ufs/dirhash.h>
66 #endif
67 #ifdef UFS_EXTATTR
68 #include <ufs/ufs/extattr.h>
69 #endif
70
71 #ifdef UVMHIST
72 #include <uvm/uvm.h>
73 #endif
74 #include <uvm/uvm_page.h>
75 #include <uvm/uvm_stat.h>
76
77 /*
78 * Last reference to an inode. If necessary, write or delete it.
79 */
80 int
ufs_inactive(void * v)81 ufs_inactive(void *v)
82 {
83 struct vop_inactive_v2_args /* {
84 struct vnode *a_vp;
85 struct bool *a_recycle;
86 } */ *ap = v;
87 struct vnode *vp = ap->a_vp;
88 struct inode *ip = VTOI(vp);
89 struct mount *mp = vp->v_mount;
90 mode_t mode;
91 int allerror = 0, error;
92 bool wapbl_locked = false;
93
94 UFS_WAPBL_JUNLOCK_ASSERT(mp);
95
96 /*
97 * Ignore inodes related to stale file handles.
98 */
99 if (ip->i_mode == 0)
100 goto out;
101
102 if (ip->i_nlink <= 0 && (mp->mnt_flag & MNT_RDONLY) == 0) {
103 #ifdef UFS_EXTATTR
104 ufs_extattr_vnode_inactive(vp, curlwp);
105 #endif
106 /*
107 * All file blocks must be freed before we can let the vnode
108 * be reclaimed, so can't postpone full truncating any further.
109 */
110 ufs_truncate_all(vp);
111
112 #if defined(QUOTA) || defined(QUOTA2)
113 error = UFS_WAPBL_BEGIN(mp);
114 if (error) {
115 allerror = error;
116 } else {
117 wapbl_locked = true;
118 (void)chkiq(ip, -1, NOCRED, 0);
119 }
120 #endif
121 DIP_ASSIGN(ip, rdev, 0);
122 mode = ip->i_mode;
123 ip->i_mode = 0;
124 ip->i_omode = mode;
125 DIP_ASSIGN(ip, mode, 0);
126 ip->i_flag |= IN_CHANGE | IN_UPDATE;
127 /*
128 * Defer final inode free and update to ufs_reclaim().
129 */
130 }
131
132 if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
133 if (! wapbl_locked) {
134 error = UFS_WAPBL_BEGIN(mp);
135 if (error) {
136 allerror = error;
137 goto out;
138 }
139 wapbl_locked = true;
140 }
141 UFS_UPDATE(vp, NULL, NULL, 0);
142 }
143 out:
144 if (wapbl_locked)
145 UFS_WAPBL_END(mp);
146 /*
147 * If we are done with the inode, reclaim it
148 * so that it can be reused immediately.
149 */
150 *ap->a_recycle = (ip->i_mode == 0);
151
152 if (ip->i_mode == 0 && (DIP(ip, size) != 0 || DIP(ip, blocks) != 0)) {
153 printf("%s: unlinked ino %" PRId64 " on \"%s\" has"
154 " non zero size %" PRIx64 " or blocks %" PRIx64
155 " with allerror %d\n",
156 __func__, ip->i_number, mp->mnt_stat.f_mntonname,
157 DIP(ip, size), DIP(ip, blocks), allerror);
158 panic("%s: dirty filesystem?", __func__);
159 }
160
161 return (allerror);
162 }
163
164 /*
165 * Reclaim an inode so that it can be used for other purposes.
166 */
167 int
ufs_reclaim(struct vnode * vp)168 ufs_reclaim(struct vnode *vp)
169 {
170 struct inode *ip = VTOI(vp);
171
172 if (!UFS_WAPBL_BEGIN(vp->v_mount)) {
173 UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
174 UFS_WAPBL_END(vp->v_mount);
175 }
176 UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
177
178 if (ip->i_devvp) {
179 vrele(ip->i_devvp);
180 ip->i_devvp = 0;
181 }
182 #if defined(QUOTA) || defined(QUOTA2)
183 ufsquota_free(ip);
184 #endif
185 #ifdef UFS_DIRHASH
186 if (ip->i_dirhash != NULL)
187 ufsdirhash_free(ip);
188 #endif
189 return (0);
190 }
191
192 /*
193 * allocate a range of blocks in a file.
194 * after this function returns, any page entirely contained within the range
195 * will map to invalid data and thus must be overwritten before it is made
196 * accessible to others.
197 */
198
199 int
ufs_balloc_range(struct vnode * vp,off_t off,off_t len,kauth_cred_t cred,int flags)200 ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred,
201 int flags)
202 {
203 off_t neweof; /* file size after the operation */
204 off_t neweob; /* offset next to the last block after the operation */
205 off_t pagestart; /* starting offset of range covered by pgs */
206 off_t eob; /* offset next to allocated blocks */
207 struct uvm_object *uobj;
208 int i, delta, error, npages;
209 int bshift = vp->v_mount->mnt_fs_bshift;
210 int bsize = 1 << bshift;
211 int ppb = MAX(bsize >> PAGE_SHIFT, 1);
212 struct vm_page **pgs;
213 size_t pgssize;
214 UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist);
215 UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx len 0x%jx u_size 0x%jx",
216 (uintptr_t)vp, off, len, vp->v_size);
217
218 neweof = MAX(vp->v_size, off + len);
219 GOP_SIZE(vp, neweof, &neweob, 0);
220
221 error = 0;
222 uobj = &vp->v_uobj;
223
224 /*
225 * read or create pages covering the range of the allocation and
226 * keep them locked until the new block is allocated, so there
227 * will be no window where the old contents of the new block are
228 * visible to racing threads.
229 */
230
231 pagestart = trunc_page(off) & ~(bsize - 1);
232 npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT);
233 pgssize = npages * sizeof(struct vm_page *);
234 pgs = kmem_zalloc(pgssize, KM_SLEEP);
235
236 /*
237 * adjust off to be block-aligned.
238 */
239
240 delta = off & (bsize - 1);
241 off -= delta;
242 len += delta;
243
244 genfs_node_wrlock(vp);
245 rw_enter(uobj->vmobjlock, RW_WRITER);
246 error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
247 VM_PROT_WRITE, 0, PGO_SYNCIO | PGO_PASTEOF | PGO_NOBLOCKALLOC |
248 PGO_NOTIMESTAMP | PGO_GLOCKHELD);
249 if (error) {
250 genfs_node_unlock(vp);
251 goto out;
252 }
253
254 /*
255 * now allocate the range.
256 */
257
258 error = GOP_ALLOC(vp, off, len, flags, cred);
259 genfs_node_unlock(vp);
260
261 /*
262 * if the allocation succeeded, mark all the pages dirty
263 * and clear PG_RDONLY on any pages that are now fully backed
264 * by disk blocks. if the allocation failed, we do not invalidate
265 * the pages since they might have already existed and been dirty,
266 * in which case we need to keep them around. if we created the pages,
267 * they will be clean and read-only, and leaving such pages
268 * in the cache won't cause any problems.
269 */
270
271 GOP_SIZE(vp, off + len, &eob, 0);
272 rw_enter(uobj->vmobjlock, RW_WRITER);
273 for (i = 0; i < npages; i++) {
274 KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
275 if (!error) {
276 if (off <= pagestart + (i << PAGE_SHIFT) &&
277 pagestart + ((i + 1) << PAGE_SHIFT) <= eob) {
278 pgs[i]->flags &= ~PG_RDONLY;
279 }
280 uvm_pagemarkdirty(pgs[i], UVM_PAGE_STATUS_DIRTY);
281 }
282 uvm_pagelock(pgs[i]);
283 uvm_pageactivate(pgs[i]);
284 uvm_pageunlock(pgs[i]);
285 }
286 uvm_page_unbusy(pgs, npages);
287 rw_exit(uobj->vmobjlock);
288
289 out:
290 kmem_free(pgs, pgssize);
291 return error;
292 }
293
294 int
ufs_truncate_retry(struct vnode * vp,int ioflag,uint64_t newsize,kauth_cred_t cred)295 ufs_truncate_retry(struct vnode *vp, int ioflag, uint64_t newsize,
296 kauth_cred_t cred)
297 {
298 struct inode *ip = VTOI(vp);
299 struct mount *mp = vp->v_mount;
300 int error = 0;
301
302 UFS_WAPBL_JUNLOCK_ASSERT(mp);
303
304 /*
305 * Truncate might temporarily fail, loop until done.
306 */
307 do {
308 error = UFS_WAPBL_BEGIN(mp);
309 if (error)
310 goto out;
311
312 error = UFS_TRUNCATE(vp, newsize, ioflag, cred);
313 UFS_WAPBL_END(mp);
314
315 if (error != 0 && error != EAGAIN)
316 goto out;
317 } while (ip->i_size != newsize);
318
319 out:
320 return error;
321 }
322
323 /* truncate all the data of the inode including extended attributes */
324 int
ufs_truncate_all(struct vnode * vp)325 ufs_truncate_all(struct vnode *vp)
326 {
327 struct inode *ip = VTOI(vp);
328 off_t isize = ip->i_size;
329
330 if (ip->i_ump->um_fstype == UFS2)
331 isize += ip->i_ffs2_extsize;
332
333 if (isize == 0)
334 return 0;
335 return ufs_truncate_retry(vp, IO_NORMAL | IO_EXT, 0, NOCRED);
336 }
337