xref: /dflybsd-src/sys/vfs/hammer/hammer_vfsops.c (revision c4bf625e67439f34b29bfd33c4e2555ffea63ce9)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.25 2008/04/22 19:00:15 dillon Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/vnode.h>
41 #include <sys/mount.h>
42 #include <sys/malloc.h>
43 #include <sys/nlookup.h>
44 #include <sys/fcntl.h>
45 #include <sys/sysctl.h>
46 #include <sys/buf.h>
47 #include <sys/buf2.h>
48 #include "hammer.h"
49 
50 int hammer_debug_general;
51 int hammer_debug_locks;
52 int hammer_debug_btree;
53 int hammer_debug_tid;
54 int hammer_debug_recover;	/* -1 will disable, +1 will force */
55 int hammer_debug_recover_faults;
56 int hammer_count_inodes;
57 int hammer_count_records;
58 int hammer_count_record_datas;
59 int hammer_count_volumes;
60 int hammer_count_buffers;
61 int hammer_count_nodes;
62 int64_t hammer_contention_count;
63 int64_t hammer_zone_limit;
64 
65 SYSCTL_NODE(_vfs, OID_AUTO, hammer, CTLFLAG_RW, 0, "HAMMER filesystem");
66 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_general, CTLFLAG_RW,
67 	   &hammer_debug_general, 0, "");
68 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_locks, CTLFLAG_RW,
69 	   &hammer_debug_locks, 0, "");
70 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_btree, CTLFLAG_RW,
71 	   &hammer_debug_btree, 0, "");
72 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_tid, CTLFLAG_RW,
73 	   &hammer_debug_tid, 0, "");
74 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover, CTLFLAG_RW,
75 	   &hammer_debug_recover, 0, "");
76 SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover_faults, CTLFLAG_RW,
77 	   &hammer_debug_recover_faults, 0, "");
78 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_inodes, CTLFLAG_RD,
79 	   &hammer_count_inodes, 0, "");
80 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_records, CTLFLAG_RD,
81 	   &hammer_count_records, 0, "");
82 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_record_datas, CTLFLAG_RD,
83 	   &hammer_count_record_datas, 0, "");
84 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_volumes, CTLFLAG_RD,
85 	   &hammer_count_volumes, 0, "");
86 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_buffers, CTLFLAG_RD,
87 	   &hammer_count_buffers, 0, "");
88 SYSCTL_INT(_vfs_hammer, OID_AUTO, count_nodes, CTLFLAG_RD,
89 	   &hammer_count_nodes, 0, "");
90 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, zone_limit, CTLFLAG_RW,
91 	   &hammer_zone_limit, 0, "");
92 SYSCTL_QUAD(_vfs_hammer, OID_AUTO, contention_count, CTLFLAG_RW,
93 	   &hammer_contention_count, 0, "");
94 
95 /*
96  * VFS ABI
97  */
98 static void	hammer_free_hmp(struct mount *mp);
99 
100 static int	hammer_vfs_mount(struct mount *mp, char *path, caddr_t data,
101 				struct ucred *cred);
102 static int	hammer_vfs_unmount(struct mount *mp, int mntflags);
103 static int	hammer_vfs_root(struct mount *mp, struct vnode **vpp);
104 static int	hammer_vfs_statfs(struct mount *mp, struct statfs *sbp,
105 				struct ucred *cred);
106 static int	hammer_vfs_sync(struct mount *mp, int waitfor);
107 static int	hammer_vfs_vget(struct mount *mp, ino_t ino,
108 				struct vnode **vpp);
109 static int	hammer_vfs_init(struct vfsconf *conf);
110 static int	hammer_vfs_fhtovp(struct mount *mp, struct fid *fhp,
111 				struct vnode **vpp);
112 static int	hammer_vfs_vptofh(struct vnode *vp, struct fid *fhp);
113 static int	hammer_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
114 				int *exflagsp, struct ucred **credanonp);
115 
116 
117 static struct vfsops hammer_vfsops = {
118 	.vfs_mount	= hammer_vfs_mount,
119 	.vfs_unmount	= hammer_vfs_unmount,
120 	.vfs_root 	= hammer_vfs_root,
121 	.vfs_statfs	= hammer_vfs_statfs,
122 	.vfs_sync	= hammer_vfs_sync,
123 	.vfs_vget	= hammer_vfs_vget,
124 	.vfs_init	= hammer_vfs_init,
125 	.vfs_vptofh	= hammer_vfs_vptofh,
126 	.vfs_fhtovp	= hammer_vfs_fhtovp,
127 	.vfs_checkexp	= hammer_vfs_checkexp
128 };
129 
130 MALLOC_DEFINE(M_HAMMER, "hammer-mount", "hammer mount");
131 
132 VFS_SET(hammer_vfsops, hammer, 0);
133 MODULE_VERSION(hammer, 1);
134 
135 static int
136 hammer_vfs_init(struct vfsconf *conf)
137 {
138 	/*hammer_init_alist_config();*/
139 	return(0);
140 }
141 
142 static int
143 hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data,
144 		 struct ucred *cred)
145 {
146 	struct hammer_mount_info info;
147 	hammer_mount_t hmp;
148 	hammer_volume_t rootvol;
149 	struct vnode *rootvp;
150 	const char *upath;	/* volume name in userspace */
151 	char *path;		/* volume name in system space */
152 	int error;
153 	int i;
154 
155 	if ((error = copyin(data, &info, sizeof(info))) != 0)
156 		return (error);
157 	if (info.nvolumes <= 0 || info.nvolumes >= 32768)
158 		return (EINVAL);
159 
160 	/*
161 	 * Interal mount data structure
162 	 */
163 	if (mp->mnt_flag & MNT_UPDATE) {
164 		hmp = (void *)mp->mnt_data;
165 		KKASSERT(hmp != NULL);
166 	} else {
167 		hmp = kmalloc(sizeof(*hmp), M_HAMMER, M_WAITOK | M_ZERO);
168 		mp->mnt_data = (qaddr_t)hmp;
169 		hmp->mp = mp;
170 		hmp->zbuf = kmalloc(HAMMER_BUFSIZE, M_HAMMER, M_WAITOK|M_ZERO);
171 		hmp->namekey_iterator = mycpu->gd_time_seconds;
172 		/*TAILQ_INIT(&hmp->recycle_list);*/
173 
174 		hmp->root_btree_beg.obj_id = -0x8000000000000000LL;
175 		hmp->root_btree_beg.key = -0x8000000000000000LL;
176 		hmp->root_btree_beg.create_tid = 1;
177 		hmp->root_btree_beg.delete_tid = 1;
178 		hmp->root_btree_beg.rec_type = 0;
179 		hmp->root_btree_beg.obj_type = 0;
180 
181 		hmp->root_btree_end.obj_id = 0x7FFFFFFFFFFFFFFFLL;
182 		hmp->root_btree_end.key = 0x7FFFFFFFFFFFFFFFLL;
183 		hmp->root_btree_end.create_tid = 0xFFFFFFFFFFFFFFFFULL;
184 		hmp->root_btree_end.delete_tid = 0;   /* special case */
185 		hmp->root_btree_end.rec_type = 0xFFFFU;
186 		hmp->root_btree_end.obj_type = 0;
187 		lockinit(&hmp->blockmap_lock, "blkmap", 0, 0);
188 
189 		TAILQ_INIT(&hmp->flush_list);
190 
191 		for (i = 0; i < HAMMER_MAX_ZONES; ++i) {
192 			hmp->zone_limits[i] =
193 				HAMMER_ZONE_ENCODE(i, HAMMER_ZONE_LIMIT);
194 			/*
195 			 * Sysctl override for debugging (force the zone
196 			 * the cycle more quickly then every 2^60 bytes).
197 			 */
198 			if (hammer_zone_limit) {
199 				hmp->zone_limits[i] =
200 				    HAMMER_ZONE_ENCODE(i, hammer_zone_limit);
201 			}
202 			hammer_init_holes(hmp, &hmp->holes[i]);
203 		}
204 	}
205 	hmp->hflags = info.hflags;
206 	if (info.asof) {
207 		mp->mnt_flag |= MNT_RDONLY;
208 		hmp->asof = info.asof;
209 	} else {
210 		hmp->asof = HAMMER_MAX_TID;
211 	}
212 
213 	/*
214 	 * Re-open read-write if originally read-only, or vise-versa XXX
215 	 */
216 	if (mp->mnt_flag & MNT_UPDATE) {
217 		if (hmp->ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
218 			kprintf("HAMMER read-write -> read-only XXX\n");
219 			hmp->ronly = 1;
220 		} else if (hmp->ronly && (mp->mnt_flag & MNT_RDONLY) == 0) {
221 			kprintf("HAMMER read-only -> read-write XXX\n");
222 			hmp->ronly = 0;
223 		}
224 		return(0);
225 	}
226 
227 	RB_INIT(&hmp->rb_vols_root);
228 	RB_INIT(&hmp->rb_inos_root);
229 	RB_INIT(&hmp->rb_nods_root);
230 	hmp->ronly = ((mp->mnt_flag & MNT_RDONLY) != 0);
231 
232 	/*
233 	 * Load volumes
234 	 */
235 	path = objcache_get(namei_oc, M_WAITOK);
236 	hmp->nvolumes = info.nvolumes;
237 	for (i = 0; i < info.nvolumes; ++i) {
238 		error = copyin(&info.volumes[i], &upath, sizeof(char *));
239 		if (error == 0)
240 			error = copyinstr(upath, path, MAXPATHLEN, NULL);
241 		if (error == 0)
242 			error = hammer_install_volume(hmp, path);
243 		if (error)
244 			break;
245 	}
246 	objcache_put(namei_oc, path);
247 
248 	/*
249 	 * Make sure we found a root volume
250 	 */
251 	if (error == 0 && hmp->rootvol == NULL) {
252 		kprintf("hammer_mount: No root volume found!\n");
253 		error = EINVAL;
254 	}
255 	if (error) {
256 		hammer_free_hmp(mp);
257 		return (error);
258 	}
259 
260 	/*
261 	 * No errors, setup enough of the mount point so we can lookup the
262 	 * root vnode.
263 	 */
264 	mp->mnt_iosize_max = MAXPHYS;
265 	mp->mnt_kern_flag |= MNTK_FSMID;
266 
267 	/*
268 	 * note: f_iosize is used by vnode_pager_haspage() when constructing
269 	 * its VOP_BMAP call.
270 	 */
271 	mp->mnt_stat.f_iosize = HAMMER_BUFSIZE;
272 	mp->mnt_stat.f_bsize = HAMMER_BUFSIZE;
273 	mp->mnt_maxsymlinklen = 255;
274 	mp->mnt_flag |= MNT_LOCAL;
275 
276 	vfs_add_vnodeops(mp, &hammer_vnode_vops, &mp->mnt_vn_norm_ops);
277 	vfs_add_vnodeops(mp, &hammer_spec_vops, &mp->mnt_vn_spec_ops);
278 	vfs_add_vnodeops(mp, &hammer_fifo_vops, &mp->mnt_vn_fifo_ops);
279 
280 	/*
281 	 * The root volume's ondisk pointer is only valid if we hold a
282 	 * reference to it.
283 	 */
284 	rootvol = hammer_get_root_volume(hmp, &error);
285 	if (error)
286 		goto done;
287 	ksnprintf(mp->mnt_stat.f_mntfromname,
288 		  sizeof(mp->mnt_stat.f_mntfromname), "%s",
289 		  rootvol->ondisk->vol_name);
290 	mp->mnt_stat.f_fsid.val[0] =
291 		crc32((char *)&rootvol->ondisk->vol_fsid + 0, 8);
292 	mp->mnt_stat.f_fsid.val[1] =
293 		crc32((char *)&rootvol->ondisk->vol_fsid + 8, 8);
294 	hammer_rel_volume(rootvol, 0);
295 
296 	hammer_flusher_create(hmp);
297 
298 	/*
299 	 * Locate the root directory using the root cluster's B-Tree as a
300 	 * starting point.  The root directory uses an obj_id of 1.
301 	 *
302 	 * FUTURE: Leave the root directory cached referenced but unlocked
303 	 * in hmp->rootvp (need to flush it on unmount).
304 	 */
305 	error = hammer_vfs_vget(mp, 1, &rootvp);
306 	if (error)
307 		goto done;
308 	vput(rootvp);
309 	/*vn_unlock(hmp->rootvp);*/
310 
311 done:
312 	/*
313 	 * Cleanup and return.
314 	 */
315 	if (error)
316 		hammer_free_hmp(mp);
317 	return (error);
318 }
319 
320 static int
321 hammer_vfs_unmount(struct mount *mp, int mntflags)
322 {
323 #if 0
324 	struct hammer_mount *hmp = (void *)mp->mnt_data;
325 #endif
326 	int flags;
327 	int error;
328 
329 	/*
330 	 * Clean out the vnodes
331 	 */
332 	flags = 0;
333 	if (mntflags & MNT_FORCE)
334 		flags |= FORCECLOSE;
335 	if ((error = vflush(mp, 0, flags)) != 0)
336 		return (error);
337 
338 	/*
339 	 * Clean up the internal mount structure and related entities.  This
340 	 * may issue I/O.
341 	 */
342 	hammer_free_hmp(mp);
343 	return(0);
344 }
345 
346 /*
347  * Clean up the internal mount structure and disassociate it from the mount.
348  * This may issue I/O.
349  */
350 static void
351 hammer_free_hmp(struct mount *mp)
352 {
353 	struct hammer_mount *hmp = (void *)mp->mnt_data;
354 	int i;
355 
356 #if 0
357 	/*
358 	 * Clean up the root vnode
359 	 */
360 	if (hmp->rootvp) {
361 		vrele(hmp->rootvp);
362 		hmp->rootvp = NULL;
363 	}
364 #endif
365 	hammer_flusher_sync(hmp);
366 	hammer_flusher_destroy(hmp);
367 
368 	/*
369 	 * Unload & flush inodes
370 	 */
371 	RB_SCAN(hammer_ino_rb_tree, &hmp->rb_inos_root, NULL,
372 		hammer_unload_inode, (void *)MNT_WAIT);
373 
374 	/*
375 	 * Unload & flush volumes
376 	 */
377 	RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
378 		hammer_unload_volume, NULL);
379 
380 	mp->mnt_data = NULL;
381 	mp->mnt_flag &= ~MNT_LOCAL;
382 	hmp->mp = NULL;
383 	kfree(hmp->zbuf, M_HAMMER);
384 	lockuninit(&hmp->blockmap_lock);
385 
386 	for (i = 0; i < HAMMER_MAX_ZONES; ++i)
387 		hammer_free_holes(hmp, &hmp->holes[i]);
388 
389 	kfree(hmp, M_HAMMER);
390 }
391 
392 /*
393  * Obtain a vnode for the specified inode number.  An exclusively locked
394  * vnode is returned.
395  */
396 int
397 hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
398 {
399 	struct hammer_transaction trans;
400 	struct hammer_mount *hmp = (void *)mp->mnt_data;
401 	struct hammer_inode *ip;
402 	int error;
403 
404 	hammer_simple_transaction(&trans, hmp);
405 
406 	/*
407 	 * Lookup the requested HAMMER inode.  The structure must be
408 	 * left unlocked while we manipulate the related vnode to avoid
409 	 * a deadlock.
410 	 */
411 	ip = hammer_get_inode(&trans, NULL, ino, hmp->asof, 0, &error);
412 	if (ip == NULL) {
413 		*vpp = NULL;
414 		return(error);
415 	}
416 	error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp);
417 	hammer_rel_inode(ip, 0);
418 	hammer_commit_transaction(&trans);
419 	return (error);
420 }
421 
422 /*
423  * Return the root vnode for the filesystem.
424  *
425  * HAMMER stores the root vnode in the hammer_mount structure so
426  * getting it is easy.
427  */
428 static int
429 hammer_vfs_root(struct mount *mp, struct vnode **vpp)
430 {
431 #if 0
432 	struct hammer_mount *hmp = (void *)mp->mnt_data;
433 #endif
434 	int error;
435 
436 	error = hammer_vfs_vget(mp, 1, vpp);
437 	return (error);
438 }
439 
440 static int
441 hammer_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
442 {
443 	struct hammer_mount *hmp = (void *)mp->mnt_data;
444 	hammer_volume_t volume;
445 	hammer_volume_ondisk_t ondisk;
446 	int error;
447 	int64_t bfree;
448 
449 	volume = hammer_get_root_volume(hmp, &error);
450 	if (error)
451 		return(error);
452 	ondisk = volume->ondisk;
453 
454 	/*
455 	 * Basic stats
456 	 */
457 	mp->mnt_stat.f_files = ondisk->vol0_stat_inodes;
458 	bfree = ondisk->vol0_stat_freebigblocks * HAMMER_LARGEBLOCK_SIZE;
459 	hammer_rel_volume(volume, 0);
460 
461 	mp->mnt_stat.f_bfree = bfree / HAMMER_BUFSIZE;
462 	mp->mnt_stat.f_bavail = mp->mnt_stat.f_bfree;
463 	if (mp->mnt_stat.f_files < 0)
464 		mp->mnt_stat.f_files = 0;
465 
466 	*sbp = mp->mnt_stat;
467 	return(0);
468 }
469 
470 static int
471 hammer_vfs_sync(struct mount *mp, int waitfor)
472 {
473 	struct hammer_mount *hmp = (void *)mp->mnt_data;
474 	int error;
475 
476 	error = hammer_sync_hmp(hmp, waitfor);
477 	return(error);
478 }
479 
480 /*
481  * Convert a vnode to a file handle.
482  */
483 static int
484 hammer_vfs_vptofh(struct vnode *vp, struct fid *fhp)
485 {
486 	hammer_inode_t ip;
487 
488 	KKASSERT(MAXFIDSZ >= 16);
489 	ip = VTOI(vp);
490 	fhp->fid_len = offsetof(struct fid, fid_data[16]);
491 	fhp->fid_reserved = 0;
492 	bcopy(&ip->obj_id, fhp->fid_data + 0, sizeof(ip->obj_id));
493 	bcopy(&ip->obj_asof, fhp->fid_data + 8, sizeof(ip->obj_asof));
494 	return(0);
495 }
496 
497 
498 /*
499  * Convert a file handle back to a vnode.
500  */
501 static int
502 hammer_vfs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
503 {
504 	struct hammer_transaction trans;
505 	struct hammer_inode *ip;
506 	struct hammer_inode_info info;
507 	int error;
508 
509 	bcopy(fhp->fid_data + 0, &info.obj_id, sizeof(info.obj_id));
510 	bcopy(fhp->fid_data + 8, &info.obj_asof, sizeof(info.obj_asof));
511 
512 	hammer_simple_transaction(&trans, (void *)mp->mnt_data);
513 
514 	/*
515 	 * Get/allocate the hammer_inode structure.  The structure must be
516 	 * unlocked while we manipulate the related vnode to avoid a
517 	 * deadlock.
518 	 */
519 	ip = hammer_get_inode(&trans, NULL, info.obj_id, info.obj_asof,
520 			      0, &error);
521 	if (ip == NULL) {
522 		*vpp = NULL;
523 		return(error);
524 	}
525 	error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp);
526 	hammer_rel_inode(ip, 0);
527 	hammer_commit_transaction(&trans);
528 	return (error);
529 }
530 
531 static int
532 hammer_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
533 		    int *exflagsp, struct ucred **credanonp)
534 {
535 	hammer_mount_t hmp = (void *)mp->mnt_data;
536 	struct netcred *np;
537 	int error;
538 
539 	np = vfs_export_lookup(mp, &hmp->export, nam);
540 	if (np) {
541 		*exflagsp = np->netc_exflags;
542 		*credanonp = &np->netc_anon;
543 		error = 0;
544 	} else {
545 		error = EACCES;
546 	}
547 	return (error);
548 
549 }
550 
551 int
552 hammer_vfs_export(struct mount *mp, int op, const struct export_args *export)
553 {
554 	hammer_mount_t hmp = (void *)mp->mnt_data;
555 	int error;
556 
557 	switch(op) {
558 	case MOUNTCTL_SET_EXPORT:
559 		error = vfs_export(mp, &hmp->export, export);
560 		break;
561 	default:
562 		error = EOPNOTSUPP;
563 		break;
564 	}
565 	return(error);
566 }
567 
568