xref: /dflybsd-src/sys/vfs/hammer2/hammer2_inode.c (revision 874e15d007943a40fad40d5e25620e2bf00235a1)
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41 
42 #include "hammer2.h"
43 
44 #define INODE_DEBUG	0
45 
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47 					 hammer2_cluster_t **cparentp,
48 					 hammer2_cluster_t **clusterp,
49 					 hammer2_tid_t inum);
50 
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52 	     hammer2_tid_t, inum);
53 
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57 	if (ip1->inum < ip2->inum)
58 		return(-1);
59 	if (ip1->inum > ip2->inum)
60 		return(1);
61 	return(0);
62 }
63 
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared locks and exclusive locks on inodes.
68  *
69  * The standard exclusive inode lock always resolves the inode meta-data,
70  * but there is a bypass version used by the vnode reclamation code that
71  * avoids the I/O.
72  *
73  * The inode locking function locks the inode itself, resolves any stale
74  * chains in the inode's cluster, and allocates a fresh copy of the
75  * cluster with 1 ref and all the underlying chains locked.  Duplication
76  * races are handled by this function.
77  *
78  * ip->cluster will be stable while the inode is locked.
79  *
80  * NOTE: We don't combine the inode/chain lock because putting away an
81  *       inode would otherwise confuse multiple lock holders of the inode.
82  *
83  * NOTE: In-memory inodes always point to hardlink targets (the actual file),
84  *	 and never point to a hardlink pointer.
85  *
86  * NOTE: Caller must not passed HAMMER2_RESOLVE_NOREF because we use it
87  *	 internally and refs confusion will ensue.
88  *
89  * NOTE: If caller passes HAMMER2_RESOLVE_RDONLY the exclusive locking code
90  *	 will feel free to reduce the chain set in the cluster as an
91  *	 optimization.  It will still be validated against the quorum if
92  *	 appropriate, but the optimization might be able to reduce data
93  *	 accesses to one node.
94  */
95 hammer2_cluster_t *
96 hammer2_inode_lock_ex(hammer2_inode_t *ip)
97 {
98 	return hammer2_inode_lock_nex(ip, HAMMER2_RESOLVE_ALWAYS);
99 }
100 
101 hammer2_cluster_t *
102 hammer2_inode_lock_nex(hammer2_inode_t *ip, int how)
103 {
104 	hammer2_cluster_t *cluster;
105 
106 	KKASSERT((how & HAMMER2_RESOLVE_NOREF) == 0);
107 
108 	hammer2_inode_ref(ip);
109 	hammer2_mtx_ex(&ip->lock);
110 
111 	/*
112 	 * Create a copy of ip->cluster and lock it.  Note that the copy
113 	 * will have a ref on the cluster AND its chains and we don't want
114 	 * a second ref to either when we lock it.
115 	 *
116 	 * The copy will not have a focus until it is locked.
117 	 *
118 	 * Exclusive inode locks set the template focus chain in (ip)
119 	 * as a hint.  Cluster locks can ALWAYS replace the focus in the
120 	 * working copy if the hint does not work out, so beware.
121 	 */
122 	cluster = hammer2_cluster_copy(&ip->cluster);
123 	hammer2_cluster_lock(cluster, how | HAMMER2_RESOLVE_NOREF);
124 	ip->cluster.focus = cluster->focus;
125 
126 	/*
127 	 * Returned cluster must resolve hardlink pointers
128 	 */
129 	if ((how & HAMMER2_RESOLVE_MASK) == HAMMER2_RESOLVE_ALWAYS) {
130 		const hammer2_inode_data_t *ripdata;
131 		ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
132 		KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
133 #if 0
134 		if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
135 		    (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
136 			error = hammer2_hardlink_find(ip->pip, NULL, &cluster);
137 			KKASSERT(error == 0);
138 		}
139 #endif
140 	}
141 	return (cluster);
142 }
143 
144 void
145 hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
146 {
147 	if (cluster)
148 		hammer2_cluster_unlock(cluster);
149 	hammer2_mtx_unlock(&ip->lock);
150 	hammer2_inode_drop(ip);
151 }
152 
153 /*
154  * Standard shared inode lock always resolves the inode meta-data.
155  *
156  * This type of inode lock may be used only when the overall operation is
157  * non-modifying.  It will also optimize cluster accesses for non-modifying
158  * operations.
159  *
160  * NOTE: We don't combine the inode/chain lock because putting away an
161  *       inode would otherwise confuse multiple lock holders of the inode.
162  *
163  *	 Shared locks are especially sensitive to having too many shared
164  *	 lock counts (from the same thread) on certain paths which might
165  *	 need to upgrade them.  Only one count of a shared lock can be
166  *	 upgraded.
167  */
168 hammer2_cluster_t *
169 hammer2_inode_lock_sh(hammer2_inode_t *ip)
170 {
171 	const hammer2_inode_data_t *ripdata;
172 	hammer2_cluster_t *cluster;
173 
174 	hammer2_inode_ref(ip);
175 	hammer2_mtx_sh(&ip->lock);
176 
177 	/*
178 	 * Create a copy of ip->cluster and lock it.  Note that the copy
179 	 * will have a ref on the cluster AND its chains and we don't want
180 	 * a second ref to either when we lock it.
181 	 *
182 	 * The copy will not have a focus until it is locked.
183 	 *
184 	 * Chains available in the cluster may be reduced once a quorum is
185 	 * acquired, and can be reduced further as an optimization due to
186 	 * RDONLY being set.
187 	 */
188 	cluster = hammer2_cluster_copy(&ip->cluster);
189 	hammer2_cluster_lock(cluster, HAMMER2_RESOLVE_ALWAYS |
190 				      HAMMER2_RESOLVE_SHARED |
191 				      HAMMER2_RESOLVE_NOREF |
192 				      HAMMER2_RESOLVE_RDONLY);
193 	/* do not update ip->cluster.focus on a shared inode lock! */
194 	/*ip->cluster.focus = cluster->focus;*/
195 
196 	/*
197 	 * Returned cluster must resolve hardlink pointers
198 	 */
199 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
200 	KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
201 #if 0
202 	if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
203 	    (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
204 		error = hammer2_hardlink_find(ip->pip, NULL, &cluster);
205 		KKASSERT(error == 0);
206 	}
207 #endif
208 
209 	return (cluster);
210 }
211 
212 void
213 hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
214 {
215 	if (cluster)
216 		hammer2_cluster_unlock(cluster);
217 	hammer2_mtx_unlock(&ip->lock);
218 	hammer2_inode_drop(ip);
219 }
220 
221 /*
222  * Temporarily release a lock held shared or exclusive.  Caller must
223  * hold the lock shared or exclusive on call and lock will be released
224  * on return.
225  *
226  * Restore a lock that was temporarily released.
227  */
228 hammer2_mtx_state_t
229 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
230 {
231 	return hammer2_mtx_temp_release(&ip->lock);
232 }
233 
234 void
235 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, hammer2_mtx_state_t ostate)
236 {
237 	hammer2_mtx_temp_restore(&ip->lock, ostate);
238 }
239 
240 /*
241  * Upgrade a shared inode lock to exclusive and return.  If the inode lock
242  * is already held exclusively this is a NOP.
243  *
244  * The caller MUST hold the inode lock either shared or exclusive on call
245  * and will own the lock exclusively on return.
246  *
247  * Returns non-zero if the lock was already exclusive prior to the upgrade.
248  */
249 int
250 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
251 {
252 	int wasexclusive;
253 
254 	if (mtx_islocked_ex(&ip->lock)) {
255 		wasexclusive = 1;
256 	} else {
257 		hammer2_mtx_unlock(&ip->lock);
258 		hammer2_mtx_ex(&ip->lock);
259 		wasexclusive = 0;
260 	}
261 	return wasexclusive;
262 }
263 
264 /*
265  * Downgrade an inode lock from exclusive to shared only if the inode
266  * lock was previously shared.  If the inode lock was previously exclusive,
267  * this is a NOP.
268  */
269 void
270 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive)
271 {
272 	if (wasexclusive == 0)
273 		mtx_downgrade(&ip->lock);
274 }
275 
276 /*
277  * Lookup an inode by inode number
278  */
279 hammer2_inode_t *
280 hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum)
281 {
282 	hammer2_inode_t *ip;
283 
284 	KKASSERT(pmp);
285 	if (pmp->spmp_hmp) {
286 		ip = NULL;
287 	} else {
288 		hammer2_spin_ex(&pmp->inum_spin);
289 		ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
290 		if (ip)
291 			hammer2_inode_ref(ip);
292 		hammer2_spin_unex(&pmp->inum_spin);
293 	}
294 	return(ip);
295 }
296 
297 /*
298  * Adding a ref to an inode is only legal if the inode already has at least
299  * one ref.
300  *
301  * (can be called with spinlock held)
302  */
303 void
304 hammer2_inode_ref(hammer2_inode_t *ip)
305 {
306 	atomic_add_int(&ip->refs, 1);
307 }
308 
309 /*
310  * Drop an inode reference, freeing the inode when the last reference goes
311  * away.
312  */
313 void
314 hammer2_inode_drop(hammer2_inode_t *ip)
315 {
316 	hammer2_pfs_t *pmp;
317 	hammer2_inode_t *pip;
318 	u_int refs;
319 
320 	while (ip) {
321 		refs = ip->refs;
322 		cpu_ccfence();
323 		if (refs == 1) {
324 			/*
325 			 * Transition to zero, must interlock with
326 			 * the inode inumber lookup tree (if applicable).
327 			 * It should not be possible for anyone to race
328 			 * the transition to 0.
329 			 *
330 			 */
331 			pmp = ip->pmp;
332 			KKASSERT(pmp);
333 			hammer2_spin_ex(&pmp->inum_spin);
334 
335 			if (atomic_cmpset_int(&ip->refs, 1, 0)) {
336 				KKASSERT(hammer2_mtx_refs(&ip->lock) == 0);
337 				if (ip->flags & HAMMER2_INODE_ONRBTREE) {
338 					atomic_clear_int(&ip->flags,
339 						     HAMMER2_INODE_ONRBTREE);
340 					RB_REMOVE(hammer2_inode_tree,
341 						  &pmp->inum_tree, ip);
342 				}
343 				hammer2_spin_unex(&pmp->inum_spin);
344 
345 				pip = ip->pip;
346 				ip->pip = NULL;
347 				ip->pmp = NULL;
348 
349 				/*
350 				 * Cleaning out ip->cluster isn't entirely
351 				 * trivial.
352 				 */
353 				hammer2_inode_repoint(ip, NULL, NULL);
354 
355 				/*
356 				 * We have to drop pip (if non-NULL) to
357 				 * dispose of our implied reference from
358 				 * ip->pip.  We can simply loop on it.
359 				 */
360 				kfree(ip, pmp->minode);
361 				atomic_add_long(&pmp->inmem_inodes, -1);
362 				ip = pip;
363 				/* continue with pip (can be NULL) */
364 			} else {
365 				hammer2_spin_unex(&ip->pmp->inum_spin);
366 			}
367 		} else {
368 			/*
369 			 * Non zero transition
370 			 */
371 			if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
372 				break;
373 		}
374 	}
375 }
376 
377 /*
378  * Get the vnode associated with the given inode, allocating the vnode if
379  * necessary.  The vnode will be returned exclusively locked.
380  *
381  * The caller must lock the inode (shared or exclusive).
382  *
383  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
384  * races.
385  */
386 struct vnode *
387 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
388 {
389 	const hammer2_inode_data_t *ripdata;
390 	hammer2_pfs_t *pmp;
391 	struct vnode *vp;
392 
393 	pmp = ip->pmp;
394 	KKASSERT(pmp != NULL);
395 	*errorp = 0;
396 
397 	ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
398 
399 	for (;;) {
400 		/*
401 		 * Attempt to reuse an existing vnode assignment.  It is
402 		 * possible to race a reclaim so the vget() may fail.  The
403 		 * inode must be unlocked during the vget() to avoid a
404 		 * deadlock against a reclaim.
405 		 */
406 		int wasexclusive;
407 
408 		vp = ip->vp;
409 		if (vp) {
410 			/*
411 			 * Inode must be unlocked during the vget() to avoid
412 			 * possible deadlocks, but leave the ip ref intact.
413 			 *
414 			 * vnode is held to prevent destruction during the
415 			 * vget().  The vget() can still fail if we lost
416 			 * a reclaim race on the vnode.
417 			 */
418 			hammer2_mtx_state_t ostate;
419 
420 			vhold(vp);
421 			ostate = hammer2_inode_lock_temp_release(ip);
422 			if (vget(vp, LK_EXCLUSIVE)) {
423 				vdrop(vp);
424 				hammer2_inode_lock_temp_restore(ip, ostate);
425 				continue;
426 			}
427 			hammer2_inode_lock_temp_restore(ip, ostate);
428 			vdrop(vp);
429 			/* vp still locked and ref from vget */
430 			if (ip->vp != vp) {
431 				kprintf("hammer2: igetv race %p/%p\n",
432 					ip->vp, vp);
433 				vput(vp);
434 				continue;
435 			}
436 			*errorp = 0;
437 			break;
438 		}
439 
440 		/*
441 		 * No vnode exists, allocate a new vnode.  Beware of
442 		 * allocation races.  This function will return an
443 		 * exclusively locked and referenced vnode.
444 		 */
445 		*errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
446 		if (*errorp) {
447 			kprintf("hammer2: igetv getnewvnode failed %d\n",
448 				*errorp);
449 			vp = NULL;
450 			break;
451 		}
452 
453 		/*
454 		 * Lock the inode and check for an allocation race.
455 		 */
456 		wasexclusive = hammer2_inode_lock_upgrade(ip);
457 		if (ip->vp != NULL) {
458 			vp->v_type = VBAD;
459 			vx_put(vp);
460 			hammer2_inode_lock_downgrade(ip, wasexclusive);
461 			continue;
462 		}
463 
464 		switch (ripdata->type) {
465 		case HAMMER2_OBJTYPE_DIRECTORY:
466 			vp->v_type = VDIR;
467 			break;
468 		case HAMMER2_OBJTYPE_REGFILE:
469 			vp->v_type = VREG;
470 			vinitvmio(vp, ripdata->size,
471 				  HAMMER2_LBUFSIZE,
472 				  (int)ripdata->size & HAMMER2_LBUFMASK);
473 			break;
474 		case HAMMER2_OBJTYPE_SOFTLINK:
475 			/*
476 			 * XXX for now we are using the generic file_read
477 			 * and file_write code so we need a buffer cache
478 			 * association.
479 			 */
480 			vp->v_type = VLNK;
481 			vinitvmio(vp, ripdata->size,
482 				  HAMMER2_LBUFSIZE,
483 				  (int)ripdata->size & HAMMER2_LBUFMASK);
484 			break;
485 		case HAMMER2_OBJTYPE_CDEV:
486 			vp->v_type = VCHR;
487 			/* fall through */
488 		case HAMMER2_OBJTYPE_BDEV:
489 			vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
490 			if (ripdata->type != HAMMER2_OBJTYPE_CDEV)
491 				vp->v_type = VBLK;
492 			addaliasu(vp, ripdata->rmajor, ripdata->rminor);
493 			break;
494 		case HAMMER2_OBJTYPE_FIFO:
495 			vp->v_type = VFIFO;
496 			vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
497 			break;
498 		default:
499 			panic("hammer2: unhandled objtype %d", ripdata->type);
500 			break;
501 		}
502 
503 		if (ip == pmp->iroot)
504 			vsetflags(vp, VROOT);
505 
506 		vp->v_data = ip;
507 		ip->vp = vp;
508 		hammer2_inode_ref(ip);		/* vp association */
509 		hammer2_inode_lock_downgrade(ip, wasexclusive);
510 		break;
511 	}
512 
513 	/*
514 	 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
515 	 */
516 	if (hammer2_debug & 0x0002) {
517 		kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
518 			vp, vp->v_refcnt, vp->v_auxrefs);
519 	}
520 	return (vp);
521 }
522 
523 /*
524  * Returns the inode associated with the passed-in cluster, creating the
525  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
526  *
527  * The passed-in cluster must be locked and will remain locked on return.
528  * The returned inode will be locked and the caller may dispose of both
529  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
530  * a hardlink it must ref/unlock/relock/drop the inode.
531  *
532  * The hammer2_inode structure regulates the interface between the high level
533  * kernel VNOPS API and the filesystem backend (the chains).
534  *
535  * On return the inode is locked with the supplied cluster.
536  */
537 hammer2_inode_t *
538 hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip,
539 		  hammer2_cluster_t *cluster)
540 {
541 	hammer2_inode_t *nip;
542 	const hammer2_inode_data_t *iptmp;
543 	const hammer2_inode_data_t *nipdata;
544 
545 	KKASSERT(cluster == NULL ||
546 		 hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
547 	KKASSERT(pmp);
548 
549 	/*
550 	 * Interlocked lookup/ref of the inode.  This code is only needed
551 	 * when looking up inodes with nlinks != 0 (TODO: optimize out
552 	 * otherwise and test for duplicates).
553 	 *
554 	 * Cluster can be NULL during the initial pfs allocation.
555 	 */
556 again:
557 	while (cluster) {
558 		iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
559 		nip = hammer2_inode_lookup(pmp, iptmp->inum);
560 		if (nip == NULL)
561 			break;
562 
563 		hammer2_mtx_ex(&nip->lock);
564 
565 		/*
566 		 * Handle SMP race (not applicable to the super-root spmp
567 		 * which can't index inodes due to duplicative inode numbers).
568 		 */
569 		if (pmp->spmp_hmp == NULL &&
570 		    (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
571 			hammer2_mtx_unlock(&nip->lock);
572 			hammer2_inode_drop(nip);
573 			continue;
574 		}
575 		hammer2_inode_repoint(nip, NULL, cluster);
576 
577 		return nip;
578 	}
579 
580 	/*
581 	 * We couldn't find the inode number, create a new inode.
582 	 */
583 	nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
584 	atomic_add_long(&pmp->inmem_inodes, 1);
585 	hammer2_pfs_memory_inc(pmp);
586 	hammer2_pfs_memory_wakeup(pmp);
587 	if (pmp->spmp_hmp)
588 		nip->flags = HAMMER2_INODE_SROOT;
589 
590 	/*
591 	 * Initialize nip's cluster.  A cluster is provided for normal
592 	 * inodes but typically not for the super-root or PFS inodes.
593 	 */
594 	nip->cluster.refs = 1;
595 	nip->cluster.pmp = pmp;
596 	nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
597 	if (cluster) {
598 		nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
599 		nip->inum = nipdata->inum;
600 		nip->size = nipdata->size;
601 		nip->mtime = nipdata->mtime;
602 		hammer2_inode_repoint(nip, NULL, cluster);
603 	} else {
604 		nip->inum = 1;			/* PFS inum is always 1 XXX */
605 		/* mtime will be updated when a cluster is available */
606 	}
607 
608 	nip->pip = dip;				/* can be NULL */
609 	if (dip)
610 		hammer2_inode_ref(dip);	/* ref dip for nip->pip */
611 
612 	nip->pmp = pmp;
613 
614 	/*
615 	 * ref and lock on nip gives it state compatible to after a
616 	 * hammer2_inode_lock_ex() call.
617 	 */
618 	nip->refs = 1;
619 	hammer2_mtx_init(&nip->lock, "h2inode");
620 	hammer2_mtx_ex(&nip->lock);
621 	/* combination of thread lock and chain lock == inode lock */
622 
623 	/*
624 	 * Attempt to add the inode.  If it fails we raced another inode
625 	 * get.  Undo all the work and try again.
626 	 */
627 	if (pmp->spmp_hmp == NULL) {
628 		hammer2_spin_ex(&pmp->inum_spin);
629 		if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
630 			hammer2_spin_unex(&pmp->inum_spin);
631 			hammer2_mtx_unlock(&nip->lock);
632 			hammer2_inode_drop(nip);
633 			goto again;
634 		}
635 		atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
636 		hammer2_spin_unex(&pmp->inum_spin);
637 	}
638 
639 	return (nip);
640 }
641 
642 /*
643  * Create a new inode in the specified directory using the vattr to
644  * figure out the type of inode.
645  *
646  * If no error occurs the new inode with its cluster locked is returned in
647  * *nipp, otherwise an error is returned and *nipp is set to NULL.
648  *
649  * If vap and/or cred are NULL the related fields are not set and the
650  * inode type defaults to a directory.  This is used when creating PFSs
651  * under the super-root, so the inode number is set to 1 in this case.
652  *
653  * dip is not locked on entry.
654  *
655  * NOTE: When used to create a snapshot, the inode is temporarily associated
656  *	 with the super-root spmp. XXX should pass new pmp for snapshot.
657  */
658 hammer2_inode_t *
659 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
660 		     struct vattr *vap, struct ucred *cred,
661 		     const uint8_t *name, size_t name_len,
662 		     hammer2_cluster_t **clusterp,
663 		     int flags, int *errorp)
664 {
665 	const hammer2_inode_data_t *dipdata;
666 	hammer2_inode_data_t *nipdata;
667 	hammer2_cluster_t *cluster;
668 	hammer2_cluster_t *cparent;
669 	hammer2_inode_t *nip;
670 	hammer2_key_t key_dummy;
671 	hammer2_key_t lhc;
672 	int error;
673 	uid_t xuid;
674 	uuid_t dip_uid;
675 	uuid_t dip_gid;
676 	uint32_t dip_mode;
677 	uint8_t dip_comp_algo;
678 	uint8_t dip_check_algo;
679 
680 	lhc = hammer2_dirhash(name, name_len);
681 	*errorp = 0;
682 
683 	/*
684 	 * Locate the inode or indirect block to create the new
685 	 * entry in.  At the same time check for key collisions
686 	 * and iterate until we don't get one.
687 	 *
688 	 * NOTE: hidden inodes do not have iterators.
689 	 */
690 retry:
691 	cparent = hammer2_inode_lock_ex(dip);
692 	dipdata = &hammer2_cluster_rdata(cparent)->ipdata;
693 	dip_uid = dipdata->uid;
694 	dip_gid = dipdata->gid;
695 	dip_mode = dipdata->mode;
696 	dip_comp_algo = dipdata->comp_algo;
697 	dip_check_algo = dipdata->check_algo;
698 
699 	error = 0;
700 	while (error == 0) {
701 		cluster = hammer2_cluster_lookup(cparent, &key_dummy,
702 						 lhc, lhc, 0);
703 		if (cluster == NULL)
704 			break;
705 		if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
706 			error = ENOSPC;
707 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
708 			error = ENOSPC;
709 		hammer2_cluster_unlock(cluster);
710 		cluster = NULL;
711 		++lhc;
712 	}
713 
714 	if (error == 0) {
715 		error = hammer2_cluster_create(trans, cparent, &cluster,
716 					     lhc, 0,
717 					     HAMMER2_BREF_TYPE_INODE,
718 					     HAMMER2_INODE_BYTES,
719 					     flags);
720 	}
721 #if INODE_DEBUG
722 	kprintf("CREATE INODE %*.*s chain=%p\n",
723 		(int)name_len, (int)name_len, name,
724 		(cluster ? cluster->focus : NULL));
725 #endif
726 
727 	/*
728 	 * Cleanup and handle retries.
729 	 */
730 	if (error == EAGAIN) {
731 		hammer2_cluster_ref(cparent);
732 		hammer2_inode_unlock_ex(dip, cparent);
733 		hammer2_cluster_wait(cparent);
734 		hammer2_cluster_drop(cparent);
735 		goto retry;
736 	}
737 	hammer2_inode_unlock_ex(dip, cparent);
738 	cparent = NULL;
739 
740 	if (error) {
741 		KKASSERT(cluster == NULL);
742 		*errorp = error;
743 		return (NULL);
744 	}
745 
746 	/*
747 	 * Set up the new inode.
748 	 *
749 	 * NOTE: *_get() integrates chain's lock into the inode lock.
750 	 *
751 	 * NOTE: Only one new inode can currently be created per
752 	 *	 transaction.  If the need arises we can adjust
753 	 *	 hammer2_trans_init() to allow more.
754 	 *
755 	 * NOTE: nipdata will have chain's blockset data.
756 	 */
757 	KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
758 	nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
759 	nipdata->inum = trans->inode_tid;
760 	hammer2_cluster_modsync(cluster);
761 	nip = hammer2_inode_get(dip->pmp, dip, cluster);
762 	nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
763 
764 	if (vap) {
765 		KKASSERT(trans->inodes_created == 0);
766 		nipdata->type = hammer2_get_obj_type(vap->va_type);
767 		nipdata->inum = trans->inode_tid;
768 		++trans->inodes_created;
769 
770 		switch (nipdata->type) {
771 		case HAMMER2_OBJTYPE_CDEV:
772 		case HAMMER2_OBJTYPE_BDEV:
773 			nipdata->rmajor = vap->va_rmajor;
774 			nipdata->rminor = vap->va_rminor;
775 			break;
776 		default:
777 			break;
778 		}
779 	} else {
780 		nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
781 		nipdata->inum = 1;
782 	}
783 
784 	/* Inherit parent's inode compression mode. */
785 	nip->comp_heuristic = 0;
786 	nipdata->comp_algo = dip_comp_algo;
787 	nipdata->check_algo = dip_check_algo;
788 	nipdata->version = HAMMER2_INODE_VERSION_ONE;
789 	hammer2_update_time(&nipdata->ctime);
790 	nipdata->mtime = nipdata->ctime;
791 	if (vap)
792 		nipdata->mode = vap->va_mode;
793 	nipdata->nlinks = 1;
794 	if (vap) {
795 		if (dip && dip->pmp) {
796 			xuid = hammer2_to_unix_xid(&dip_uid);
797 			xuid = vop_helper_create_uid(dip->pmp->mp,
798 						     dip_mode,
799 						     xuid,
800 						     cred,
801 						     &vap->va_mode);
802 		} else {
803 			/* super-root has no dip and/or pmp */
804 			xuid = 0;
805 		}
806 		if (vap->va_vaflags & VA_UID_UUID_VALID)
807 			nipdata->uid = vap->va_uid_uuid;
808 		else if (vap->va_uid != (uid_t)VNOVAL)
809 			hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
810 		else
811 			hammer2_guid_to_uuid(&nipdata->uid, xuid);
812 
813 		if (vap->va_vaflags & VA_GID_UUID_VALID)
814 			nipdata->gid = vap->va_gid_uuid;
815 		else if (vap->va_gid != (gid_t)VNOVAL)
816 			hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
817 		else if (dip)
818 			nipdata->gid = dip_gid;
819 	}
820 
821 	/*
822 	 * Regular files and softlinks allow a small amount of data to be
823 	 * directly embedded in the inode.  This flag will be cleared if
824 	 * the size is extended past the embedded limit.
825 	 */
826 	if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
827 	    nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
828 		nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
829 	}
830 
831 	KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
832 	bcopy(name, nipdata->filename, name_len);
833 	nipdata->name_key = lhc;
834 	nipdata->name_len = name_len;
835 	hammer2_cluster_modsync(cluster);
836 	*clusterp = cluster;
837 
838 	return (nip);
839 }
840 
841 /*
842  * The cluster has been removed from the original directory and replaced
843  * with a hardlink pointer.  Move the cluster to the specified parent
844  * directory, change the filename to "0xINODENUMBER", and adjust the key.
845  * The cluster becomes our invisible hardlink target.
846  *
847  * The original cluster must be deleted on entry.
848  */
849 static
850 void
851 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
852 			hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
853 			int nlinks, int *errorp)
854 {
855 	const hammer2_inode_data_t *iptmp;
856 	hammer2_inode_data_t *nipdata;
857 	hammer2_cluster_t *xcluster;
858 	hammer2_key_t key_dummy;
859 	hammer2_key_t lhc;
860 	hammer2_blockref_t bref;
861 
862 	iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
863 	lhc = iptmp->inum;
864 	KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
865 
866 	/*
867 	 * Locate the inode or indirect block to create the new
868 	 * entry in.  lhc represents the inode number so there is
869 	 * no collision iteration.
870 	 *
871 	 * There should be no key collisions with invisible inode keys.
872 	 *
873 	 * WARNING! Must use inode_lock_ex() on dip to handle a stale
874 	 *	    dip->cluster cache.
875 	 */
876 	*errorp = 0;
877 	xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
878 				      lhc, lhc, 0);
879 	if (xcluster) {
880 		kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
881 			xcluster->focus, dip, dcluster->focus,
882 			dip->cluster.focus);
883 		hammer2_cluster_unlock(xcluster);
884 		xcluster = NULL;
885 		*errorp = ENOSPC;
886 #if 0
887 		Debugger("X3");
888 #endif
889 	}
890 
891 	/*
892 	 * Handle the error case
893 	 */
894 	if (*errorp) {
895 		panic("error2");
896 		KKASSERT(xcluster == NULL);
897 		return;
898 	}
899 
900 	/*
901 	 * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
902 	 * same target bref as xcluster and then delete xcluster.  The
903 	 * duplication occurs after xcluster in flush order even though
904 	 * xcluster is deleted after the duplication. XXX
905 	 *
906 	 * WARNING! Duplications (to a different parent) can cause indirect
907 	 *	    blocks to be inserted, refactor xcluster.
908 	 *
909 	 * WARNING! Only key and keybits is extracted from a passed-in bref.
910 	 */
911 	hammer2_cluster_bref(cluster, &bref);
912 	bref.key = lhc;			/* invisible dir entry key */
913 	bref.keybits = 0;
914 	hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
915 
916 	/*
917 	 * cluster is now 'live' again.. adjust the filename.
918 	 *
919 	 * Directory entries are inodes but this is a hidden hardlink
920 	 * target.  The name isn't used but to ease debugging give it
921 	 * a name after its inode number.
922 	 */
923 	hammer2_cluster_modify(trans, cluster, 0);
924 	nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
925 	ksnprintf(nipdata->filename, sizeof(nipdata->filename),
926 		  "0x%016jx", (intmax_t)nipdata->inum);
927 	nipdata->name_len = strlen(nipdata->filename);
928 	nipdata->name_key = lhc;
929 	nipdata->nlinks += nlinks;
930 	hammer2_cluster_modsync(cluster);
931 }
932 
933 /*
934  * Connect the target inode represented by (cluster) to the media topology
935  * at (dip, name, len).  The caller can pass a rough *chainp, this function
936  * will issue lookup()s to position the parent chain properly for the
937  * chain insertion.
938  *
939  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
940  * entry instead of connecting (cluster).
941  *
942  * If hlink is FALSE this function expects (cluster) to be unparented.
943  */
944 int
945 hammer2_inode_connect(hammer2_trans_t *trans,
946 		      hammer2_cluster_t **clusterp, int hlink,
947 		      hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
948 		      const uint8_t *name, size_t name_len,
949 		      hammer2_key_t lhc)
950 {
951 	hammer2_inode_data_t *wipdata;
952 	hammer2_cluster_t *ocluster;
953 	hammer2_cluster_t *ncluster;
954 	hammer2_key_t key_dummy;
955 	int error;
956 
957 	/*
958 	 * Since ocluster is either disconnected from the topology or
959 	 * represents a hardlink terminus which is always a parent of or
960 	 * equal to dip, we should be able to safely lock dip->chain for
961 	 * our setup.
962 	 *
963 	 * WARNING! Must use inode_lock_ex() on dip to handle a stale
964 	 *	    dip->cluster.
965 	 *
966 	 * If name is non-NULL we calculate lhc, else we use the passed-in
967 	 * lhc.
968 	 */
969 	ocluster = *clusterp;
970 
971 	if (name) {
972 		lhc = hammer2_dirhash(name, name_len);
973 
974 		/*
975 		 * Locate the inode or indirect block to create the new
976 		 * entry in.  At the same time check for key collisions
977 		 * and iterate until we don't get one.
978 		 */
979 		error = 0;
980 		while (error == 0) {
981 			ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
982 						      lhc, lhc, 0);
983 			if (ncluster == NULL)
984 				break;
985 			if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
986 			    HAMMER2_DIRHASH_LOMASK) {
987 				error = ENOSPC;
988 			}
989 			hammer2_cluster_unlock(ncluster);
990 			ncluster = NULL;
991 			++lhc;
992 		}
993 	} else {
994 		/*
995 		 * Reconnect to specific key (used when moving
996 		 * unlinked-but-open files into the hidden directory).
997 		 */
998 		ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
999 						  lhc, lhc, 0);
1000 		KKASSERT(ncluster == NULL);
1001 	}
1002 
1003 	if (error == 0) {
1004 		if (hlink) {
1005 			/*
1006 			 * Hardlink pointer needed, create totally fresh
1007 			 * directory entry.
1008 			 *
1009 			 * We must refactor ocluster because it might have
1010 			 * been shifted into an indirect cluster by the
1011 			 * create.
1012 			 */
1013 			KKASSERT(ncluster == NULL);
1014 			error = hammer2_cluster_create(trans,
1015 						       dcluster, &ncluster,
1016 						       lhc, 0,
1017 						       HAMMER2_BREF_TYPE_INODE,
1018 						       HAMMER2_INODE_BYTES,
1019 						       0);
1020 		} else {
1021 			/*
1022 			 * Reconnect the original cluster under the new name.
1023 			 * Original cluster must have already been deleted by
1024 			 * teh caller.
1025 			 *
1026 			 * WARNING! Can cause held-over clusters to require a
1027 			 *	    refactor.  Fortunately we have none (our
1028 			 *	    locked clusters are passed into and
1029 			 *	    modified by the call).
1030 			 */
1031 			ncluster = ocluster;
1032 			ocluster = NULL;
1033 			error = hammer2_cluster_create(trans,
1034 						       dcluster, &ncluster,
1035 						       lhc, 0,
1036 						       HAMMER2_BREF_TYPE_INODE,
1037 						       HAMMER2_INODE_BYTES,
1038 						       0);
1039 		}
1040 	}
1041 
1042 	/*
1043 	 * Unlock stuff.
1044 	 */
1045 	KKASSERT(error != EAGAIN);
1046 
1047 	/*
1048 	 * ncluster should be NULL on error, leave ocluster
1049 	 * (ocluster == *clusterp) alone.
1050 	 */
1051 	if (error) {
1052 		KKASSERT(ncluster == NULL);
1053 		return (error);
1054 	}
1055 
1056 	/*
1057 	 * Directory entries are inodes so if the name has changed we have
1058 	 * to update the inode.
1059 	 *
1060 	 * When creating an OBJTYPE_HARDLINK entry remember to unlock the
1061 	 * cluster, the caller will access the hardlink via the actual hardlink
1062 	 * target file and not the hardlink pointer entry, so we must still
1063 	 * return ocluster.
1064 	 */
1065 	if (hlink && hammer2_hardlink_enable >= 0) {
1066 		/*
1067 		 * Create the HARDLINK pointer.  oip represents the hardlink
1068 		 * target in this situation.
1069 		 *
1070 		 * We will return ocluster (the hardlink target).
1071 		 */
1072 		hammer2_cluster_modify(trans, ncluster, 0);
1073 		hammer2_cluster_clr_chainflags(ncluster,
1074 					       HAMMER2_CHAIN_UNLINKED);
1075 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1076 		wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1077 		bcopy(name, wipdata->filename, name_len);
1078 		wipdata->name_key = lhc;
1079 		wipdata->name_len = name_len;
1080 		wipdata->target_type =
1081 				hammer2_cluster_rdata(ocluster)->ipdata.type;
1082 		wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1083 		wipdata->inum = hammer2_cluster_rdata(ocluster)->ipdata.inum;
1084 		wipdata->version = HAMMER2_INODE_VERSION_ONE;
1085 		wipdata->nlinks = 1;
1086 		wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1087 		hammer2_cluster_modsync(ncluster);
1088 		hammer2_cluster_unlock(ncluster);
1089 		ncluster = ocluster;
1090 		ocluster = NULL;
1091 	} else {
1092 		/*
1093 		 * ncluster is a duplicate of ocluster at the new location.
1094 		 * We must fixup the name stored in the inode data.
1095 		 * The bref key has already been adjusted by inode_connect().
1096 		 */
1097 		hammer2_cluster_modify(trans, ncluster, 0);
1098 		hammer2_cluster_clr_chainflags(ncluster,
1099 					       HAMMER2_CHAIN_UNLINKED);
1100 		wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1101 
1102 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1103 		bcopy(name, wipdata->filename, name_len);
1104 		wipdata->name_key = lhc;
1105 		wipdata->name_len = name_len;
1106 		wipdata->nlinks = 1;
1107 		hammer2_cluster_modsync(ncluster);
1108 	}
1109 
1110 	/*
1111 	 * We are replacing ocluster with ncluster, unlock ocluster.  In the
1112 	 * case where ocluster is left unchanged the code above sets
1113 	 * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1114 	 */
1115 	if (ocluster)
1116 		hammer2_cluster_unlock(ocluster);
1117 	*clusterp = ncluster;
1118 
1119 	return (0);
1120 }
1121 
1122 /*
1123  * Repoint ip->cluster's chains to cluster's chains and fixup the default
1124  * focus.
1125  *
1126  * Caller must hold the inode and cluster exclusive locked, if not NULL,
1127  * must also be locked.
1128  *
1129  * Cluster may be NULL to clean out any chains in ip->cluster.
1130  */
1131 void
1132 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1133 		      hammer2_cluster_t *cluster)
1134 {
1135 	hammer2_chain_t *ochain;
1136 	hammer2_chain_t *nchain;
1137 	hammer2_inode_t *opip;
1138 	int i;
1139 
1140 	/*
1141 	 * Replace chains in ip->cluster with chains from cluster and
1142 	 * adjust the focus if necessary.
1143 	 *
1144 	 * NOTE: nchain and/or ochain can be NULL due to gaps
1145 	 *	 in the cluster arrays.
1146 	 */
1147 	for (i = 0; cluster && i < cluster->nchains; ++i) {
1148 		nchain = cluster->array[i].chain;
1149 		if (i < ip->cluster.nchains) {
1150 			ochain = ip->cluster.array[i].chain;
1151 			if (ochain == nchain)
1152 				continue;
1153 		} else {
1154 			ochain = NULL;
1155 		}
1156 
1157 		/*
1158 		 * Make adjustments
1159 		 */
1160 		ip->cluster.array[i].chain = nchain;
1161 		if (nchain)
1162 			hammer2_chain_ref(nchain);
1163 		if (ochain)
1164 			hammer2_chain_drop(ochain);
1165 	}
1166 
1167 	/*
1168 	 * Release any left-over chains in ip->cluster.
1169 	 */
1170 	while (i < ip->cluster.nchains) {
1171 		nchain = ip->cluster.array[i].chain;
1172 		if (nchain) {
1173 			ip->cluster.array[i].chain = NULL;
1174 			hammer2_chain_drop(nchain);
1175 		}
1176 		++i;
1177 	}
1178 
1179 	/*
1180 	 * Fixup fields.  Note that the inode-embedded cluster is never
1181 	 * directly locked.
1182 	 */
1183 	if (cluster) {
1184 		ip->cluster.nchains = cluster->nchains;
1185 		ip->cluster.focus = cluster->focus;
1186 		ip->cluster.flags = cluster->flags & ~HAMMER2_CLUSTER_LOCKED;
1187 	} else {
1188 		ip->cluster.nchains = 0;
1189 		ip->cluster.focus = NULL;
1190 		ip->cluster.flags &= ~HAMMER2_CLUSTER_ZFLAGS;
1191 	}
1192 
1193 	/*
1194 	 * Repoint ip->pip if requested (non-NULL pip).
1195 	 */
1196 	if (pip && ip->pip != pip) {
1197 		opip = ip->pip;
1198 		hammer2_inode_ref(pip);
1199 		ip->pip = pip;
1200 		if (opip)
1201 			hammer2_inode_drop(opip);
1202 	}
1203 }
1204 
1205 /*
1206  * Unlink the file from the specified directory inode.  The directory inode
1207  * does not need to be locked.
1208  *
1209  * isdir determines whether a directory/non-directory check should be made.
1210  * No check is made if isdir is set to -1.
1211  *
1212  * isopen specifies whether special unlink-with-open-descriptor handling
1213  * must be performed.  If set to -1 the caller is deleting a PFS and we
1214  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1215  * implied if it is mounted.
1216  *
1217  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1218  * to a special hidden directory until last-close occurs on the file.
1219  *
1220  * NOTE!  The underlying file can still be active with open descriptors
1221  *	  or if the chain is being manually held (e.g. for rename).
1222  *
1223  *	  The caller is responsible for fixing up ip->chain if e.g. a
1224  *	  rename occurs (see chain_duplicate()).
1225  *
1226  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1227  *	  but otherwise will be deleted.
1228  */
1229 int
1230 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1231 		    const uint8_t *name, size_t name_len,
1232 		    int isdir, int *hlinkp, struct nchandle *nch,
1233 		    int nlinks)
1234 {
1235 	const hammer2_inode_data_t *ripdata;
1236 	hammer2_inode_data_t *wipdata;
1237 	hammer2_cluster_t *cparent;
1238 	hammer2_cluster_t *hcluster;
1239 	hammer2_cluster_t *hparent;
1240 	hammer2_cluster_t *cluster;
1241 	hammer2_cluster_t *dparent;
1242 	hammer2_cluster_t *dcluster;
1243 	hammer2_key_t key_dummy;
1244 	hammer2_key_t key_next;
1245 	hammer2_key_t lhc;
1246 	int error;
1247 	int hlink;
1248 	uint8_t type;
1249 
1250 	error = 0;
1251 	hlink = 0;
1252 	hcluster = NULL;
1253 	hparent = NULL;
1254 	lhc = hammer2_dirhash(name, name_len);
1255 
1256 again:
1257 	/*
1258 	 * Search for the filename in the directory
1259 	 */
1260 	cparent = hammer2_inode_lock_ex(dip);
1261 	cluster = hammer2_cluster_lookup(cparent, &key_next,
1262 				     lhc, lhc + HAMMER2_DIRHASH_LOMASK, 0);
1263 	while (cluster) {
1264 		if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1265 			ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1266 			if (ripdata->name_len == name_len &&
1267 			    bcmp(ripdata->filename, name, name_len) == 0) {
1268 				break;
1269 			}
1270 		}
1271 		cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1272 					       key_next,
1273 					       lhc + HAMMER2_DIRHASH_LOMASK,
1274 					       0);
1275 	}
1276 	hammer2_inode_unlock_ex(dip, NULL);	/* retain cparent */
1277 
1278 	/*
1279 	 * Not found or wrong type (isdir < 0 disables the type check).
1280 	 * If a hardlink pointer, type checks use the hardlink target.
1281 	 */
1282 	if (cluster == NULL) {
1283 		error = ENOENT;
1284 		goto done;
1285 	}
1286 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1287 	type = ripdata->type;
1288 	if (type == HAMMER2_OBJTYPE_HARDLINK) {
1289 		hlink = 1;
1290 		type = ripdata->target_type;
1291 	}
1292 
1293 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1294 		error = ENOTDIR;
1295 		goto done;
1296 	}
1297 	if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1298 		error = EISDIR;
1299 		goto done;
1300 	}
1301 
1302 	/*
1303 	 * Hardlink must be resolved.  We can't hold the parent locked
1304 	 * while we do this or we could deadlock.  The physical file will
1305 	 * be located at or above the current directory.
1306 	 *
1307 	 * We loop to reacquire the hardlink origination.
1308 	 *
1309 	 * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1310 	 *	 returning a modified hparent and hcluster.
1311 	 */
1312 	if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1313 		if (hcluster == NULL) {
1314 			hcluster = cluster;
1315 			cluster = NULL;	/* safety */
1316 			hammer2_cluster_unlock(cparent);
1317 			cparent = NULL; /* safety */
1318 			ripdata = NULL;	/* safety (associated w/cparent) */
1319 			error = hammer2_hardlink_find(dip, &hparent, &hcluster);
1320 
1321 			/*
1322 			 * If we couldn't find the hardlink target then some
1323 			 * parent directory containing the hardlink pointer
1324 			 * probably got renamed to above the original target,
1325 			 * a case not yet handled by H2.
1326 			 */
1327 			if (error) {
1328 				kprintf("H2 unlink_file: hardlink target for "
1329 					"\"%s\" not found\n",
1330 					name);
1331 				kprintf("(likely due to known directory "
1332 					"rename bug)\n");
1333 				goto done;
1334 			}
1335 			goto again;
1336 		}
1337 	}
1338 
1339 	/*
1340 	 * If this is a directory the directory must be empty.  However, if
1341 	 * isdir < 0 we are doing a rename and the directory does not have
1342 	 * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1343 	 * and the directory does not have to be empty.
1344 	 *
1345 	 * NOTE: We check the full key range here which covers both visible
1346 	 *	 and invisible entries.  Theoretically there should be no
1347 	 *	 invisible (hardlink target) entries if there are no visible
1348 	 *	 entries.
1349 	 */
1350 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1351 		dparent = hammer2_cluster_lookup_init(cluster, 0);
1352 		dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1353 					          0, (hammer2_key_t)-1,
1354 					          HAMMER2_LOOKUP_NODATA);
1355 		if (dcluster) {
1356 			hammer2_cluster_unlock(dcluster);
1357 			hammer2_cluster_lookup_done(dparent);
1358 			error = ENOTEMPTY;
1359 			goto done;
1360 		}
1361 		hammer2_cluster_lookup_done(dparent);
1362 		dparent = NULL;
1363 		/* dcluster NULL */
1364 	}
1365 
1366 	/*
1367 	 * If this was a hardlink then (cparent, cluster) is the hardlink
1368 	 * pointer, which we can simply destroy outright.  Discard the
1369 	 * clusters and replace with the hardlink target.
1370 	 */
1371 	if (hcluster) {
1372 		hammer2_cluster_delete(trans, cparent, cluster,
1373 				       HAMMER2_DELETE_PERMANENT);
1374 		hammer2_cluster_unlock(cparent);
1375 		hammer2_cluster_unlock(cluster);
1376 		cparent = hparent;
1377 		cluster = hcluster;
1378 		hparent = NULL;
1379 		hcluster = NULL;
1380 	}
1381 
1382 	/*
1383 	 * This leaves us with the hardlink target or non-hardlinked file
1384 	 * or directory in (cparent, cluster).
1385 	 *
1386 	 * Delete the target when nlinks reaches 0 with special handling
1387 	 * if (isopen) is set.
1388 	 *
1389 	 * NOTE! In DragonFly the vnops function calls cache_unlink() after
1390 	 *	 calling us here to clean out the namecache association,
1391 	 *	 (which does not represent a ref for the open-test), and to
1392 	 *	 force finalization of the vnode if/when the last ref gets
1393 	 *	 dropped.
1394 	 *
1395 	 * NOTE! Files are unlinked by rename and then relinked.  nch will be
1396 	 *	 passed as NULL in this situation.  hammer2_inode_connect()
1397 	 *	 will bump nlinks.
1398 	 */
1399 	KKASSERT(cluster != NULL);
1400 	hammer2_cluster_modify(trans, cluster, 0);
1401 	wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1402 	ripdata = wipdata;
1403 	wipdata->nlinks += nlinks;
1404 	if ((int64_t)wipdata->nlinks < 0) {	/* XXX debugging */
1405 		wipdata->nlinks = 0;
1406 	}
1407 	hammer2_cluster_modsync(cluster);
1408 
1409 	if (wipdata->nlinks == 0) {
1410 		/*
1411 		 * Target nlinks has reached 0, file now unlinked (but may
1412 		 * still be open).
1413 		 */
1414 		/* XXX need interlock if mounted
1415 		if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) &&
1416 		    cluster->pmp) {
1417 			error = EINVAL;
1418 			kprintf("hammer2: PFS \"%s\" cannot be deleted "
1419 				"while still mounted\n",
1420 				wipdata->filename);
1421 			goto done;
1422 		}
1423 		*/
1424 		hammer2_cluster_set_chainflags(cluster, HAMMER2_CHAIN_UNLINKED);
1425 		if (nch && cache_isopen(nch)) {
1426 			hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1427 						     wipdata->inum);
1428 		} else {
1429 			/*
1430 			 * This won't get everything if a vnode is still
1431 			 * present, but the cache_unlink() call the caller
1432 			 * makes will.
1433 			 */
1434 			hammer2_cluster_delete(trans, cparent, cluster,
1435 					       HAMMER2_DELETE_PERMANENT);
1436 		}
1437 	} else if (hlink == 0) {
1438 		/*
1439 		 * In this situation a normal non-hardlinked file (which can
1440 		 * only have nlinks == 1) still has a non-zero nlinks, the
1441 		 * caller must be doing a RENAME operation and so is passing
1442 		 * a nlinks adjustment of 0, and only wishes to remove file
1443 		 * in order to be able to reconnect it under a different name.
1444 		 *
1445 		 * In this situation we do a non-permanent deletion of the
1446 		 * chain in order to allow the file to be reconnected in
1447 		 * a different location.
1448 		 */
1449 		KKASSERT(nlinks == 0);
1450 		hammer2_cluster_delete(trans, cparent, cluster, 0);
1451 	}
1452 	error = 0;
1453 done:
1454 	if (cparent)
1455 		hammer2_cluster_unlock(cparent);
1456 	if (cluster)
1457 		hammer2_cluster_unlock(cluster);
1458 	if (hparent)
1459 		hammer2_cluster_unlock(hparent);
1460 	if (hcluster)
1461 		hammer2_cluster_unlock(hcluster);
1462 	if (hlinkp)
1463 		*hlinkp = hlink;
1464 
1465 	return error;
1466 }
1467 
1468 /*
1469  * This is called from the mount code to initialize pmp->ihidden
1470  */
1471 void
1472 hammer2_inode_install_hidden(hammer2_pfs_t *pmp)
1473 {
1474 	hammer2_trans_t trans;
1475 	hammer2_cluster_t *cparent;
1476 	hammer2_cluster_t *cluster;
1477 	hammer2_cluster_t *scan;
1478 	const hammer2_inode_data_t *ripdata;
1479 	hammer2_inode_data_t *wipdata;
1480 	hammer2_key_t key_dummy;
1481 	hammer2_key_t key_next;
1482 	int error;
1483 	int count;
1484 	int dip_check_algo;
1485 	int dip_comp_algo;
1486 
1487 	if (pmp->ihidden)
1488 		return;
1489 
1490 	/*
1491 	 * Find the hidden directory
1492 	 */
1493 	bzero(&key_dummy, sizeof(key_dummy));
1494 	hammer2_trans_init(&trans, pmp, 0);
1495 
1496 	/*
1497 	 * Setup for lookup, retrieve iroot's check and compression
1498 	 * algorithm request which was likely generated by newfs_hammer2.
1499 	 *
1500 	 * The check/comp fields will probably never be used since inodes
1501 	 * are renamed into the hidden directory and not created relative to
1502 	 * the hidden directory, chain creation inherits from bref.methods,
1503 	 * and data chains inherit from their respective file inode *_algo
1504 	 * fields.
1505 	 */
1506 	cparent = hammer2_inode_lock_ex(pmp->iroot);
1507 	ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1508 	dip_check_algo = ripdata->check_algo;
1509 	dip_comp_algo = ripdata->comp_algo;
1510 	ripdata = NULL;
1511 
1512 	cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1513 					 HAMMER2_INODE_HIDDENDIR,
1514 					 HAMMER2_INODE_HIDDENDIR,
1515 					 0);
1516 	if (cluster) {
1517 		pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1518 		hammer2_inode_ref(pmp->ihidden);
1519 
1520 		/*
1521 		 * Remove any unlinked files which were left open as-of
1522 		 * any system crash.
1523 		 *
1524 		 * Don't pass NODATA, we need the inode data so the delete
1525 		 * can do proper statistics updates.
1526 		 */
1527 		count = 0;
1528 		scan = hammer2_cluster_lookup(cluster, &key_next,
1529 					      0, HAMMER2_TID_MAX, 0);
1530 		while (scan) {
1531 			if (hammer2_cluster_type(scan) ==
1532 			    HAMMER2_BREF_TYPE_INODE) {
1533 				hammer2_cluster_delete(&trans, cluster, scan,
1534 						   HAMMER2_DELETE_PERMANENT);
1535 				++count;
1536 			}
1537 			scan = hammer2_cluster_next(cluster, scan, &key_next,
1538 						    0, HAMMER2_TID_MAX, 0);
1539 		}
1540 
1541 		hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1542 		hammer2_inode_unlock_ex(pmp->iroot, cparent);
1543 		hammer2_trans_done(&trans);
1544 		kprintf("hammer2: PFS loaded hidden dir, "
1545 			"removed %d dead entries\n", count);
1546 		return;
1547 	}
1548 
1549 	/*
1550 	 * Create the hidden directory
1551 	 */
1552 	error = hammer2_cluster_create(&trans, cparent, &cluster,
1553 				       HAMMER2_INODE_HIDDENDIR, 0,
1554 				       HAMMER2_BREF_TYPE_INODE,
1555 				       HAMMER2_INODE_BYTES,
1556 				       0);
1557 	hammer2_inode_unlock_ex(pmp->iroot, cparent);
1558 
1559 	hammer2_cluster_modify(&trans, cluster, 0);
1560 	wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1561 	wipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
1562 	wipdata->inum = HAMMER2_INODE_HIDDENDIR;
1563 	wipdata->nlinks = 1;
1564 	wipdata->comp_algo = dip_comp_algo;
1565 	wipdata->check_algo = dip_check_algo;
1566 	hammer2_cluster_modsync(cluster);
1567 	kprintf("hammer2: PFS root missing hidden directory, creating\n");
1568 
1569 	pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1570 	hammer2_inode_ref(pmp->ihidden);
1571 	hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1572 	hammer2_trans_done(&trans);
1573 }
1574 
1575 /*
1576  * If an open file is unlinked H2 needs to retain the file in the topology
1577  * to ensure that its backing store is not recovered by the bulk free scan.
1578  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1579  *
1580  * To do this the file is moved to a hidden directory in the PFS root and
1581  * renamed.  The hidden directory must be created if it does not exist.
1582  */
1583 static
1584 void
1585 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1586 			     hammer2_cluster_t **cparentp,
1587 			     hammer2_cluster_t **clusterp,
1588 			     hammer2_tid_t inum)
1589 {
1590 	hammer2_cluster_t *dcluster;
1591 	hammer2_pfs_t *pmp;
1592 	int error;
1593 
1594 	pmp = (*clusterp)->pmp;
1595 	KKASSERT(pmp != NULL);
1596 	KKASSERT(pmp->ihidden != NULL);
1597 
1598 	hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1599 	dcluster = hammer2_inode_lock_ex(pmp->ihidden);
1600 	error = hammer2_inode_connect(trans, clusterp, 0,
1601 				      pmp->ihidden, dcluster,
1602 				      NULL, 0, inum);
1603 	hammer2_inode_unlock_ex(pmp->ihidden, dcluster);
1604 	KKASSERT(error == 0);
1605 }
1606 
1607 /*
1608  * Given an exclusively locked inode and cluster we consolidate the cluster
1609  * for hardlink creation, adding (nlinks) to the file's link count and
1610  * potentially relocating the inode to (cdip) which is a parent directory
1611  * common to both the current location of the inode and the intended new
1612  * hardlink.
1613  *
1614  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1615  * and returning a new locked cluster.
1616  *
1617  * NOTE!  This function will also replace ip->cluster.
1618  */
1619 int
1620 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1621 			     hammer2_inode_t *ip,
1622 			     hammer2_cluster_t **clusterp,
1623 			     hammer2_inode_t *cdip,
1624 			     hammer2_cluster_t *cdcluster,
1625 			     int nlinks)
1626 {
1627 	const hammer2_inode_data_t *ripdata;
1628 	hammer2_inode_data_t *wipdata;
1629 	hammer2_cluster_t *cluster;
1630 	hammer2_cluster_t *cparent;
1631 	int error;
1632 
1633 	cluster = *clusterp;
1634 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1635 	if (nlinks == 0 &&			/* no hardlink needed */
1636 	    (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE)) {
1637 		return (0);
1638 	}
1639 
1640 	if (hammer2_hardlink_enable == 0) {	/* disallow hardlinks */
1641 		hammer2_cluster_unlock(cluster);
1642 		*clusterp = NULL;
1643 		return (ENOTSUP);
1644 	}
1645 
1646 	cparent = NULL;
1647 
1648 	/*
1649 	 * If no change in the hardlink's target directory is required and
1650 	 * this is already a hardlink target, all we need to do is adjust
1651 	 * the link count.
1652 	 */
1653 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1654 	if (cdip == ip->pip &&
1655 	    (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1656 		if (nlinks) {
1657 			hammer2_cluster_modify(trans, cluster, 0);
1658 			wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1659 			wipdata->nlinks += nlinks;
1660 			hammer2_cluster_modsync(cluster);
1661 			ripdata = wipdata;
1662 		}
1663 		error = 0;
1664 		goto done;
1665 	}
1666 
1667 	/*
1668 	 * Cluster is the real inode.  The originating directory is locked
1669 	 * by the caller so we can manipulate it without worrying about races
1670 	 * against other lookups.
1671 	 *
1672 	 * If cluster is visible we need to delete it from the current
1673 	 * location and create a hardlink pointer in its place.  If it is
1674 	 * not visible we need only delete it.  Then later cluster will be
1675 	 * renamed to a parent directory and converted (if necessary) to
1676 	 * a hidden inode (via shiftup).
1677 	 *
1678 	 * NOTE! We must hold cparent locked through the delete/create/rename
1679 	 *	 operation to ensure that other threads block resolving to
1680 	 *	 the same hardlink, otherwise the other threads may not see
1681 	 *	 the hardlink.
1682 	 */
1683 	KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1684 	cparent = hammer2_cluster_parent(cluster);
1685 
1686 	hammer2_cluster_delete(trans, cparent, cluster, 0);
1687 
1688 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1689 	KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
1690 	if (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) {
1691 		hammer2_cluster_t *ncluster;
1692 		hammer2_key_t lhc;
1693 
1694 		ncluster = NULL;
1695 		lhc = cluster->focus->bref.key;
1696 		error = hammer2_cluster_create(trans, cparent, &ncluster,
1697 					     lhc, 0,
1698 					     HAMMER2_BREF_TYPE_INODE,
1699 					     HAMMER2_INODE_BYTES,
1700 					     0);
1701 		hammer2_cluster_modify(trans, ncluster, 0);
1702 		wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1703 
1704 		/* wipdata->comp_algo = ripdata->comp_algo; */
1705 		wipdata->comp_algo = 0;
1706 		wipdata->check_algo = 0;
1707 		wipdata->version = HAMMER2_INODE_VERSION_ONE;
1708 		wipdata->inum = ripdata->inum;
1709 		wipdata->target_type = ripdata->type;
1710 		wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1711 		wipdata->uflags = 0;
1712 		wipdata->rmajor = 0;
1713 		wipdata->rminor = 0;
1714 		wipdata->ctime = 0;
1715 		wipdata->mtime = 0;
1716 		wipdata->atime = 0;
1717 		wipdata->btime = 0;
1718 		bzero(&wipdata->uid, sizeof(wipdata->uid));
1719 		bzero(&wipdata->gid, sizeof(wipdata->gid));
1720 		wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1721 		wipdata->cap_flags = 0;
1722 		wipdata->mode = 0;
1723 		wipdata->size = 0;
1724 		wipdata->nlinks = 1;
1725 		wipdata->iparent = 0;	/* XXX */
1726 		wipdata->pfs_type = 0;
1727 		wipdata->pfs_inum = 0;
1728 		bzero(&wipdata->pfs_clid, sizeof(wipdata->pfs_clid));
1729 		bzero(&wipdata->pfs_fsid, sizeof(wipdata->pfs_fsid));
1730 		wipdata->data_quota = 0;
1731 		wipdata->data_count = 0;
1732 		wipdata->inode_quota = 0;
1733 		wipdata->inode_count = 0;
1734 		wipdata->attr_tid = 0;
1735 		wipdata->dirent_tid = 0;
1736 		bzero(&wipdata->u, sizeof(wipdata->u));
1737 		bcopy(ripdata->filename, wipdata->filename, ripdata->name_len);
1738 		wipdata->name_key = ncluster->focus->bref.key;
1739 		wipdata->name_len = ripdata->name_len;
1740 		/* XXX transaction ids */
1741 		hammer2_cluster_modsync(ncluster);
1742 		hammer2_cluster_unlock(ncluster);
1743 	}
1744 	ripdata = wipdata;
1745 
1746 	/*
1747 	 * cluster represents the hardlink target and is now flagged deleted.
1748 	 * duplicate it to the parent directory and adjust nlinks.
1749 	 *
1750 	 * WARNING! The shiftup() call can cause ncluster to be moved into
1751 	 *	    an indirect block, and our ncluster will wind up pointing
1752 	 *	    to the older/original version.
1753 	 */
1754 	KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1755 	hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster,
1756 				 nlinks, &error);
1757 
1758 	if (error == 0)
1759 		hammer2_inode_repoint(ip, cdip, cluster);
1760 
1761 done:
1762 	/*
1763 	 * Cleanup, cluster/ncluster already dealt with.
1764 	 *
1765 	 * Return the shifted cluster in *clusterp.
1766 	 */
1767 	if (cparent)
1768 		hammer2_cluster_unlock(cparent);
1769 	*clusterp = cluster;
1770 
1771 	return (error);
1772 }
1773 
1774 /*
1775  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1776  * inode while (*chainp) points to the resolved (hidden hardlink
1777  * target) inode.  In this situation when nlinks is 1 we wish to
1778  * deconsolidate the hardlink, moving it back to the directory that now
1779  * represents the only remaining link.
1780  */
1781 int
1782 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1783 			       hammer2_inode_t *dip,
1784 			       hammer2_chain_t **chainp,
1785 			       hammer2_chain_t **ochainp)
1786 {
1787 	if (*ochainp == NULL)
1788 		return (0);
1789 	/* XXX */
1790 	return (0);
1791 }
1792 
1793 /*
1794  * The caller presents a locked cluster with an obj_type of
1795  * HAMMER2_OBJTYPE_HARDLINK in (*clusterp).  This routine will locate
1796  * the inode and replace (*clusterp) with a new locked cluster containing
1797  * the target hardlink, also locked.  The original cluster will be
1798  * unlocked and released.
1799  *
1800  * If cparentp is not NULL a locked cluster representing the hardlink's
1801  * parent is also returned.
1802  *
1803  * If we are unable to locate the hardlink target EIO is returned,
1804  * (*cparentp) is set to NULL, the original passed-in (*clusterp)
1805  * will be unlocked and released and (*clusterp) will be set to NULL
1806  * as well.
1807  */
1808 int
1809 hammer2_hardlink_find(hammer2_inode_t *dip,
1810 		      hammer2_cluster_t **cparentp,
1811 		      hammer2_cluster_t **clusterp)
1812 {
1813 	const hammer2_inode_data_t *ipdata;
1814 	hammer2_cluster_t *cluster;
1815 	hammer2_cluster_t *cparent;
1816 	hammer2_cluster_t *rcluster;
1817 	hammer2_inode_t *ip;
1818 	hammer2_inode_t *pip;
1819 	hammer2_key_t key_dummy;
1820 	hammer2_key_t lhc;
1821 
1822 	cluster = *clusterp;
1823 	pip = dip;
1824 	hammer2_inode_ref(pip);		/* for loop */
1825 
1826 	/*
1827 	 * Locate the hardlink.  pip is referenced and not locked.
1828 	 * Unlock and release (*clusterp) after extracting the needed
1829 	 * data.
1830 	 */
1831 	ipdata = &hammer2_cluster_rdata(cluster)->ipdata;
1832 	lhc = ipdata->inum;
1833 	ipdata = NULL;			/* safety */
1834 	hammer2_cluster_unlock(cluster);
1835 	*clusterp = NULL;		/* safety */
1836 
1837 	rcluster = NULL;
1838 	cparent = NULL;
1839 
1840 	while ((ip = pip) != NULL) {
1841 		cparent = hammer2_inode_lock_ex(ip);
1842 		hammer2_inode_drop(ip);			/* loop */
1843 		KKASSERT(hammer2_cluster_type(cparent) ==
1844 			 HAMMER2_BREF_TYPE_INODE);
1845 		rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1846 					     lhc, lhc, 0);
1847 		if (rcluster)
1848 			break;
1849 		hammer2_cluster_lookup_done(cparent);	/* discard parent */
1850 		cparent = NULL;				/* safety */
1851 		pip = ip->pip;		/* safe, ip held locked */
1852 		if (pip)
1853 			hammer2_inode_ref(pip);		/* loop */
1854 		hammer2_inode_unlock_ex(ip, NULL);
1855 	}
1856 
1857 	/*
1858 	 * chain is locked, ip is locked.  Unlock ip, return the locked
1859 	 * chain.  *ipp is already set w/a ref count and not locked.
1860 	 *
1861 	 * (cparent is already unlocked).
1862 	 */
1863 	*clusterp = rcluster;
1864 	if (rcluster) {
1865 		if (cparentp) {
1866 			*cparentp = cparent;
1867 			hammer2_inode_unlock_ex(ip, NULL);
1868 		} else {
1869 			hammer2_inode_unlock_ex(ip, cparent);
1870 		}
1871 		return (0);
1872 	} else {
1873 		if (cparentp)
1874 			*cparentp = NULL;
1875 		if (ip)
1876 			hammer2_inode_unlock_ex(ip, cparent);
1877 		return (EIO);
1878 	}
1879 }
1880 
1881 /*
1882  * Find the directory common to both fdip and tdip.
1883  *
1884  * Returns a held but not locked inode.  Caller typically locks the inode,
1885  * and when through unlocks AND drops it.
1886  */
1887 hammer2_inode_t *
1888 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1889 {
1890 	hammer2_inode_t *scan1;
1891 	hammer2_inode_t *scan2;
1892 
1893 	/*
1894 	 * We used to have a depth field but it complicated matters too
1895 	 * much for directory renames.  So now its ugly.  Check for
1896 	 * simple cases before giving up and doing it the expensive way.
1897 	 *
1898 	 * XXX need a bottom-up topology stability lock
1899 	 */
1900 	if (fdip == tdip || fdip == tdip->pip) {
1901 		hammer2_inode_ref(fdip);
1902 		return(fdip);
1903 	}
1904 	if (fdip->pip == tdip) {
1905 		hammer2_inode_ref(tdip);
1906 		return(tdip);
1907 	}
1908 
1909 	/*
1910 	 * XXX not MPSAFE
1911 	 */
1912 	for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1913 		scan2 = tdip;
1914 		while (scan2->pmp == tdip->pmp) {
1915 			if (scan1 == scan2) {
1916 				hammer2_inode_ref(scan1);
1917 				return(scan1);
1918 			}
1919 			scan2 = scan2->pip;
1920 			if (scan2 == NULL)
1921 				break;
1922 		}
1923 	}
1924 	panic("hammer2_inode_common_parent: no common parent %p %p\n",
1925 	      fdip, tdip);
1926 	/* NOT REACHED */
1927 	return(NULL);
1928 }
1929 
1930 /*
1931  * Synchronize the inode's frontend state with the chain state prior
1932  * to any explicit flush of the inode or any strategy write call.
1933  *
1934  * Called with a locked inode.
1935  */
1936 void
1937 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip,
1938 		    hammer2_cluster_t *cparent)
1939 {
1940 	const hammer2_inode_data_t *ripdata;
1941 	hammer2_inode_data_t *wipdata;
1942 	hammer2_cluster_t *dparent;
1943 	hammer2_cluster_t *cluster;
1944 	hammer2_key_t lbase;
1945 	hammer2_key_t key_next;
1946 	int dosync = 0;
1947 
1948 	ripdata = &hammer2_cluster_rdata(cparent)->ipdata;    /* target file */
1949 
1950 	if (ip->flags & HAMMER2_INODE_MTIME) {
1951 		wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1952 		atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
1953 		wipdata->mtime = ip->mtime;
1954 		dosync = 1;
1955 		ripdata = wipdata;
1956 	}
1957 	if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ripdata->size) {
1958 		wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1959 		wipdata->size = ip->size;
1960 		dosync = 1;
1961 		ripdata = wipdata;
1962 		atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1963 
1964 		/*
1965 		 * We must delete any chains beyond the EOF.  The chain
1966 		 * straddling the EOF will be pending in the bioq.
1967 		 */
1968 		lbase = (ripdata->size + HAMMER2_PBUFMASK64) &
1969 			~HAMMER2_PBUFMASK64;
1970 		dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
1971 		cluster = hammer2_cluster_lookup(dparent, &key_next,
1972 					         lbase, (hammer2_key_t)-1,
1973 						 HAMMER2_LOOKUP_NODATA);
1974 		while (cluster) {
1975 			/*
1976 			 * Degenerate embedded case, nothing to loop on
1977 			 */
1978 			switch (hammer2_cluster_type(cluster)) {
1979 			case HAMMER2_BREF_TYPE_INODE:
1980 				hammer2_cluster_unlock(cluster);
1981 				cluster = NULL;
1982 				break;
1983 			case HAMMER2_BREF_TYPE_DATA:
1984 				hammer2_cluster_delete(trans, dparent, cluster,
1985 						   HAMMER2_DELETE_PERMANENT);
1986 				/* fall through */
1987 			default:
1988 				cluster = hammer2_cluster_next(dparent, cluster,
1989 						   &key_next,
1990 						   key_next, (hammer2_key_t)-1,
1991 						   HAMMER2_LOOKUP_NODATA);
1992 				break;
1993 			}
1994 		}
1995 		hammer2_cluster_lookup_done(dparent);
1996 	} else
1997 	if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ripdata->size) {
1998 		wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1999 		wipdata->size = ip->size;
2000 		atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
2001 
2002 		/*
2003 		 * When resizing larger we may not have any direct-data
2004 		 * available.
2005 		 */
2006 		if ((wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
2007 		    ip->size > HAMMER2_EMBEDDED_BYTES) {
2008 			wipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
2009 			bzero(&wipdata->u.blockset,
2010 			      sizeof(wipdata->u.blockset));
2011 		}
2012 		dosync = 1;
2013 		ripdata = wipdata;
2014 	}
2015 	if (dosync)
2016 		hammer2_cluster_modsync(cparent);
2017 }
2018