xref: /dflybsd-src/sys/vfs/hammer2/hammer2_inode.c (revision 1448a966161a9420da0adf26a910473e9202cbbc)
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41 
42 #include "hammer2.h"
43 
44 #define INODE_DEBUG	0
45 
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47 					 hammer2_cluster_t **cparentp,
48 					 hammer2_cluster_t **clusterp,
49 					 hammer2_tid_t inum);
50 
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52 	     hammer2_tid_t, inum);
53 
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57 	if (ip1->inum < ip2->inum)
58 		return(-1);
59 	if (ip1->inum > ip2->inum)
60 		return(1);
61 	return(0);
62 }
63 
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared locks and exclusive locks on inodes.
68  *
69  * The standard exclusive inode lock always resolves the inode meta-data,
70  * but there is a bypass version used by the vnode reclamation code that
71  * avoids the I/O.
72  *
73  * The inode locking function locks the inode itself, resolves any stale
74  * chains in the inode's cluster, and allocates a fresh copy of the
75  * cluster with 1 ref and all the underlying chains locked.  Duplication
76  * races are handled by this function.
77  *
78  * ip->cluster will be stable while the inode is locked.
79  *
80  * NOTE: We don't combine the inode/chain lock because putting away an
81  *       inode would otherwise confuse multiple lock holders of the inode.
82  *
83  * NOTE: In-memory inodes always point to hardlink targets (the actual file),
84  *	 and never point to a hardlink pointer.
85  */
86 hammer2_cluster_t *
87 hammer2_inode_lock_ex(hammer2_inode_t *ip)
88 {
89 	return hammer2_inode_lock_nex(ip, HAMMER2_RESOLVE_ALWAYS);
90 }
91 
92 hammer2_cluster_t *
93 hammer2_inode_lock_nex(hammer2_inode_t *ip, int how)
94 {
95 	hammer2_cluster_t *cluster;
96 	hammer2_chain_t *chain;
97 	int i;
98 
99 	hammer2_inode_ref(ip);
100 	ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
101 	cluster = hammer2_cluster_copy(&ip->cluster,
102 				       HAMMER2_CLUSTER_COPY_NOCHAINS);
103 
104 	ip->cluster.focus = NULL;
105 	cluster->focus = NULL;
106 
107 	for (i = 0; i < cluster->nchains; ++i) {
108 		chain = ip->cluster.array[i];
109 		if (chain == NULL) {
110 			kprintf("inode_lock: %p: missing chain\n", ip);
111 			continue;
112 		}
113 
114 		hammer2_chain_lock(chain, how);
115 		cluster->array[i] = chain;
116 		if (cluster->focus == NULL)
117 			cluster->focus = chain;
118 		if (ip->cluster.focus == NULL)
119 			ip->cluster.focus = chain;
120 	}
121 
122 	/*
123 	 * Returned cluster must resolve hardlink pointers
124 	 */
125 	if ((how & HAMMER2_RESOLVE_MASK) == HAMMER2_RESOLVE_ALWAYS) {
126 		const hammer2_inode_data_t *ripdata;
127 		ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
128 		KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
129 		/*
130 		if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
131 		    (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
132 			error = hammer2_hardlink_find(ip->pip, NULL, cluster);
133 			KKASSERT(error == 0);
134 		}
135 		*/
136 	}
137 	return (cluster);
138 }
139 
140 void
141 hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
142 {
143 	if (cluster)
144 		hammer2_cluster_unlock(cluster);
145 	ccms_thread_unlock(&ip->topo_cst);
146 	hammer2_inode_drop(ip);
147 }
148 
149 /*
150  * Standard shared inode lock always resolves the inode meta-data.
151  *
152  * NOTE: We don't combine the inode/chain lock because putting away an
153  *       inode would otherwise confuse multiple lock holders of the inode.
154  *
155  *	 Shared locks are especially sensitive to having too many shared
156  *	 lock counts (from the same thread) on certain paths which might
157  *	 need to upgrade them.  Only one count of a shared lock can be
158  *	 upgraded.
159  */
160 hammer2_cluster_t *
161 hammer2_inode_lock_sh(hammer2_inode_t *ip)
162 {
163 	const hammer2_inode_data_t *ripdata;
164 	hammer2_cluster_t *cluster;
165 	hammer2_chain_t *chain;
166 	int i;
167 
168 	hammer2_inode_ref(ip);
169 	cluster = hammer2_cluster_copy(&ip->cluster,
170 				       HAMMER2_CLUSTER_COPY_NOCHAINS);
171 	ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
172 
173 	cluster->focus = NULL;
174 
175 	for (i = 0; i < cluster->nchains; ++i) {
176 		chain = ip->cluster.array[i];
177 
178 		if (chain == NULL) {
179 			kprintf("inode_lock: %p: missing chain\n", ip);
180 			continue;
181 		}
182 
183 		hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
184 					  HAMMER2_RESOLVE_SHARED);
185 		cluster->array[i] = chain;
186 		if (cluster->focus == NULL)
187 			cluster->focus = chain;
188 	}
189 
190 	/*
191 	 * Returned cluster must resolve hardlink pointers
192 	 */
193 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
194 	KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
195 	/*
196 	if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK &&
197 	    (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
198 		error = hammer2_hardlink_find(ip->pip, NULL, cluster);
199 		KKASSERT(error == 0);
200 	}
201 	*/
202 
203 	return (cluster);
204 }
205 
206 void
207 hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
208 {
209 	if (cluster)
210 		hammer2_cluster_unlock(cluster);
211 	ccms_thread_unlock(&ip->topo_cst);
212 	hammer2_inode_drop(ip);
213 }
214 
215 ccms_state_t
216 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
217 {
218 	return(ccms_thread_lock_temp_release(&ip->topo_cst));
219 }
220 
221 void
222 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate)
223 {
224 	ccms_thread_lock_temp_restore(&ip->topo_cst, ostate);
225 }
226 
227 ccms_state_t
228 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
229 {
230 	return(ccms_thread_lock_upgrade(&ip->topo_cst));
231 }
232 
233 void
234 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate)
235 {
236 	ccms_thread_lock_downgrade(&ip->topo_cst, ostate);
237 }
238 
239 /*
240  * Lookup an inode by inode number
241  */
242 hammer2_inode_t *
243 hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum)
244 {
245 	hammer2_inode_t *ip;
246 
247 	KKASSERT(pmp);
248 	if (pmp->spmp_hmp) {
249 		ip = NULL;
250 	} else {
251 		spin_lock(&pmp->inum_spin);
252 		ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
253 		if (ip)
254 			hammer2_inode_ref(ip);
255 		spin_unlock(&pmp->inum_spin);
256 	}
257 	return(ip);
258 }
259 
260 /*
261  * Adding a ref to an inode is only legal if the inode already has at least
262  * one ref.
263  *
264  * (can be called with spinlock held)
265  */
266 void
267 hammer2_inode_ref(hammer2_inode_t *ip)
268 {
269 	atomic_add_int(&ip->refs, 1);
270 }
271 
272 /*
273  * Drop an inode reference, freeing the inode when the last reference goes
274  * away.
275  */
276 void
277 hammer2_inode_drop(hammer2_inode_t *ip)
278 {
279 	hammer2_pfsmount_t *pmp;
280 	hammer2_inode_t *pip;
281 	u_int refs;
282 
283 	while (ip) {
284 		refs = ip->refs;
285 		cpu_ccfence();
286 		if (refs == 1) {
287 			/*
288 			 * Transition to zero, must interlock with
289 			 * the inode inumber lookup tree (if applicable).
290 			 */
291 			pmp = ip->pmp;
292 			KKASSERT(pmp);
293 			spin_lock(&pmp->inum_spin);
294 
295 			if (atomic_cmpset_int(&ip->refs, 1, 0)) {
296 				KKASSERT(ip->topo_cst.count == 0);
297 				if (ip->flags & HAMMER2_INODE_ONRBTREE) {
298 					atomic_clear_int(&ip->flags,
299 						     HAMMER2_INODE_ONRBTREE);
300 					RB_REMOVE(hammer2_inode_tree,
301 						  &pmp->inum_tree, ip);
302 				}
303 				spin_unlock(&pmp->inum_spin);
304 
305 				pip = ip->pip;
306 				ip->pip = NULL;
307 				ip->pmp = NULL;
308 
309 				/*
310 				 * Cleaning out ip->cluster isn't entirely
311 				 * trivial.
312 				 */
313 				hammer2_inode_repoint(ip, NULL, NULL);
314 
315 				/*
316 				 * We have to drop pip (if non-NULL) to
317 				 * dispose of our implied reference from
318 				 * ip->pip.  We can simply loop on it.
319 				 */
320 				kfree(ip, pmp->minode);
321 				atomic_add_long(&pmp->inmem_inodes, -1);
322 				ip = pip;
323 				/* continue with pip (can be NULL) */
324 			} else {
325 				spin_unlock(&ip->pmp->inum_spin);
326 			}
327 		} else {
328 			/*
329 			 * Non zero transition
330 			 */
331 			if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
332 				break;
333 		}
334 	}
335 }
336 
337 /*
338  * Get the vnode associated with the given inode, allocating the vnode if
339  * necessary.  The vnode will be returned exclusively locked.
340  *
341  * The caller must lock the inode (shared or exclusive).
342  *
343  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
344  * races.
345  */
346 struct vnode *
347 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
348 {
349 	const hammer2_inode_data_t *ripdata;
350 	hammer2_pfsmount_t *pmp;
351 	struct vnode *vp;
352 	ccms_state_t ostate;
353 
354 	pmp = ip->pmp;
355 	KKASSERT(pmp != NULL);
356 	*errorp = 0;
357 
358 	ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
359 
360 	for (;;) {
361 		/*
362 		 * Attempt to reuse an existing vnode assignment.  It is
363 		 * possible to race a reclaim so the vget() may fail.  The
364 		 * inode must be unlocked during the vget() to avoid a
365 		 * deadlock against a reclaim.
366 		 */
367 		vp = ip->vp;
368 		if (vp) {
369 			/*
370 			 * Inode must be unlocked during the vget() to avoid
371 			 * possible deadlocks, but leave the ip ref intact.
372 			 *
373 			 * vnode is held to prevent destruction during the
374 			 * vget().  The vget() can still fail if we lost
375 			 * a reclaim race on the vnode.
376 			 */
377 			vhold(vp);
378 			ostate = hammer2_inode_lock_temp_release(ip);
379 			if (vget(vp, LK_EXCLUSIVE)) {
380 				vdrop(vp);
381 				hammer2_inode_lock_temp_restore(ip, ostate);
382 				continue;
383 			}
384 			hammer2_inode_lock_temp_restore(ip, ostate);
385 			vdrop(vp);
386 			/* vp still locked and ref from vget */
387 			if (ip->vp != vp) {
388 				kprintf("hammer2: igetv race %p/%p\n",
389 					ip->vp, vp);
390 				vput(vp);
391 				continue;
392 			}
393 			*errorp = 0;
394 			break;
395 		}
396 
397 		/*
398 		 * No vnode exists, allocate a new vnode.  Beware of
399 		 * allocation races.  This function will return an
400 		 * exclusively locked and referenced vnode.
401 		 */
402 		*errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
403 		if (*errorp) {
404 			kprintf("hammer2: igetv getnewvnode failed %d\n",
405 				*errorp);
406 			vp = NULL;
407 			break;
408 		}
409 
410 		/*
411 		 * Lock the inode and check for an allocation race.
412 		 */
413 		ostate = hammer2_inode_lock_upgrade(ip);
414 		if (ip->vp != NULL) {
415 			vp->v_type = VBAD;
416 			vx_put(vp);
417 			hammer2_inode_lock_downgrade(ip, ostate);
418 			continue;
419 		}
420 
421 		switch (ripdata->type) {
422 		case HAMMER2_OBJTYPE_DIRECTORY:
423 			vp->v_type = VDIR;
424 			break;
425 		case HAMMER2_OBJTYPE_REGFILE:
426 			vp->v_type = VREG;
427 			vinitvmio(vp, ripdata->size,
428 				  HAMMER2_LBUFSIZE,
429 				  (int)ripdata->size & HAMMER2_LBUFMASK);
430 			break;
431 		case HAMMER2_OBJTYPE_SOFTLINK:
432 			/*
433 			 * XXX for now we are using the generic file_read
434 			 * and file_write code so we need a buffer cache
435 			 * association.
436 			 */
437 			vp->v_type = VLNK;
438 			vinitvmio(vp, ripdata->size,
439 				  HAMMER2_LBUFSIZE,
440 				  (int)ripdata->size & HAMMER2_LBUFMASK);
441 			break;
442 		case HAMMER2_OBJTYPE_CDEV:
443 			vp->v_type = VCHR;
444 			/* fall through */
445 		case HAMMER2_OBJTYPE_BDEV:
446 			vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
447 			if (ripdata->type != HAMMER2_OBJTYPE_CDEV)
448 				vp->v_type = VBLK;
449 			addaliasu(vp, ripdata->rmajor, ripdata->rminor);
450 			break;
451 		case HAMMER2_OBJTYPE_FIFO:
452 			vp->v_type = VFIFO;
453 			vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
454 			break;
455 		default:
456 			panic("hammer2: unhandled objtype %d", ripdata->type);
457 			break;
458 		}
459 
460 		if (ip == pmp->iroot)
461 			vsetflags(vp, VROOT);
462 
463 		vp->v_data = ip;
464 		ip->vp = vp;
465 		hammer2_inode_ref(ip);		/* vp association */
466 		hammer2_inode_lock_downgrade(ip, ostate);
467 		break;
468 	}
469 
470 	/*
471 	 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
472 	 */
473 	if (hammer2_debug & 0x0002) {
474 		kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
475 			vp, vp->v_refcnt, vp->v_auxrefs);
476 	}
477 	return (vp);
478 }
479 
480 /*
481  * Returns the inode associated with the passed-in cluster, creating the
482  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
483  *
484  * The passed-in chain must be locked and will remain locked on return.
485  * The returned inode will be locked and the caller may dispose of both
486  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
487  * a hardlink it must ref/unlock/relock/drop the inode.
488  *
489  * The hammer2_inode structure regulates the interface between the high level
490  * kernel VNOPS API and the filesystem backend (the chains).
491  */
492 hammer2_inode_t *
493 hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
494 		  hammer2_cluster_t *cluster)
495 {
496 	hammer2_inode_t *nip;
497 	const hammer2_inode_data_t *iptmp;
498 	const hammer2_inode_data_t *nipdata;
499 
500 	KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
501 	KKASSERT(pmp);
502 
503 	/*
504 	 * Interlocked lookup/ref of the inode.  This code is only needed
505 	 * when looking up inodes with nlinks != 0 (TODO: optimize out
506 	 * otherwise and test for duplicates).
507 	 */
508 again:
509 	for (;;) {
510 		iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
511 		nip = hammer2_inode_lookup(pmp, iptmp->inum);
512 		if (nip == NULL)
513 			break;
514 
515 		ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
516 
517 		/*
518 		 * Handle SMP race (not applicable to the super-root spmp
519 		 * which can't index inodes due to duplicative inode numbers).
520 		 */
521 		if (pmp->spmp_hmp == NULL &&
522 		    (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
523 			ccms_thread_unlock(&nip->topo_cst);
524 			hammer2_inode_drop(nip);
525 			continue;
526 		}
527 		hammer2_inode_repoint(nip, NULL, cluster);
528 		return nip;
529 	}
530 
531 	/*
532 	 * We couldn't find the inode number, create a new inode.
533 	 */
534 	nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
535 	atomic_add_long(&pmp->inmem_inodes, 1);
536 	hammer2_pfs_memory_inc(pmp);
537 	hammer2_pfs_memory_wakeup(pmp);
538 	if (pmp->spmp_hmp)
539 		nip->flags = HAMMER2_INODE_SROOT;
540 
541 	/*
542 	 * Initialize nip's cluster
543 	 */
544 	nip->cluster.refs = 1;
545 	nip->cluster.pmp = pmp;
546 	nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
547 	hammer2_cluster_replace(&nip->cluster, cluster);
548 
549 	nipdata = &hammer2_cluster_rdata(cluster)->ipdata;
550 	nip->inum = nipdata->inum;
551 	nip->size = nipdata->size;
552 	nip->mtime = nipdata->mtime;
553 	hammer2_inode_repoint(nip, NULL, cluster);
554 	nip->pip = dip;				/* can be NULL */
555 	if (dip)
556 		hammer2_inode_ref(dip);	/* ref dip for nip->pip */
557 
558 	nip->pmp = pmp;
559 
560 	/*
561 	 * ref and lock on nip gives it state compatible to after a
562 	 * hammer2_inode_lock_ex() call.
563 	 */
564 	nip->refs = 1;
565 	ccms_cst_init(&nip->topo_cst, &nip->cluster);
566 	ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
567 	/* combination of thread lock and chain lock == inode lock */
568 
569 	/*
570 	 * Attempt to add the inode.  If it fails we raced another inode
571 	 * get.  Undo all the work and try again.
572 	 */
573 	if (pmp->spmp_hmp == NULL) {
574 		spin_lock(&pmp->inum_spin);
575 		if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
576 			spin_unlock(&pmp->inum_spin);
577 			ccms_thread_unlock(&nip->topo_cst);
578 			hammer2_inode_drop(nip);
579 			goto again;
580 		}
581 		atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
582 		spin_unlock(&pmp->inum_spin);
583 	}
584 
585 	return (nip);
586 }
587 
588 /*
589  * Create a new inode in the specified directory using the vattr to
590  * figure out the type of inode.
591  *
592  * If no error occurs the new inode with its cluster locked is returned in
593  * *nipp, otherwise an error is returned and *nipp is set to NULL.
594  *
595  * If vap and/or cred are NULL the related fields are not set and the
596  * inode type defaults to a directory.  This is used when creating PFSs
597  * under the super-root, so the inode number is set to 1 in this case.
598  *
599  * dip is not locked on entry.
600  *
601  * NOTE: When used to create a snapshot, the inode is temporarily associated
602  *	 with the super-root spmp. XXX should pass new pmp for snapshot.
603  */
604 hammer2_inode_t *
605 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
606 		     struct vattr *vap, struct ucred *cred,
607 		     const uint8_t *name, size_t name_len,
608 		     hammer2_cluster_t **clusterp, int *errorp)
609 {
610 	const hammer2_inode_data_t *dipdata;
611 	hammer2_inode_data_t *nipdata;
612 	hammer2_cluster_t *cluster;
613 	hammer2_cluster_t *cparent;
614 	hammer2_inode_t *nip;
615 	hammer2_key_t key_dummy;
616 	hammer2_key_t lhc;
617 	int error;
618 	uid_t xuid;
619 	uuid_t dip_uid;
620 	uuid_t dip_gid;
621 	uint32_t dip_mode;
622 	uint8_t dip_comp_algo;
623 	uint8_t dip_check_algo;
624 	int ddflag;
625 
626 	lhc = hammer2_dirhash(name, name_len);
627 	*errorp = 0;
628 
629 	/*
630 	 * Locate the inode or indirect block to create the new
631 	 * entry in.  At the same time check for key collisions
632 	 * and iterate until we don't get one.
633 	 *
634 	 * NOTE: hidden inodes do not have iterators.
635 	 */
636 retry:
637 	cparent = hammer2_inode_lock_ex(dip);
638 	dipdata = &hammer2_cluster_rdata(cparent)->ipdata;
639 	dip_uid = dipdata->uid;
640 	dip_gid = dipdata->gid;
641 	dip_mode = dipdata->mode;
642 	dip_comp_algo = dipdata->comp_algo;
643 	dip_check_algo = dipdata->check_algo;
644 
645 	error = 0;
646 	while (error == 0) {
647 		cluster = hammer2_cluster_lookup(cparent, &key_dummy,
648 						 lhc, lhc, 0, &ddflag);
649 		if (cluster == NULL)
650 			break;
651 		if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
652 			error = ENOSPC;
653 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
654 			error = ENOSPC;
655 		hammer2_cluster_unlock(cluster);
656 		cluster = NULL;
657 		++lhc;
658 	}
659 
660 	if (error == 0) {
661 		error = hammer2_cluster_create(trans, cparent, &cluster,
662 					     lhc, 0,
663 					     HAMMER2_BREF_TYPE_INODE,
664 					     HAMMER2_INODE_BYTES,
665 					     0);
666 	}
667 #if INODE_DEBUG
668 	kprintf("CREATE INODE %*.*s chain=%p\n",
669 		(int)name_len, (int)name_len, name,
670 		(cluster ? cluster->focus : NULL));
671 #endif
672 
673 	/*
674 	 * Cleanup and handle retries.
675 	 */
676 	if (error == EAGAIN) {
677 		hammer2_cluster_ref(cparent);
678 		hammer2_inode_unlock_ex(dip, cparent);
679 		hammer2_cluster_wait(cparent);
680 		hammer2_cluster_drop(cparent);
681 		goto retry;
682 	}
683 	hammer2_inode_unlock_ex(dip, cparent);
684 	cparent = NULL;
685 
686 	if (error) {
687 		KKASSERT(cluster == NULL);
688 		*errorp = error;
689 		return (NULL);
690 	}
691 
692 	/*
693 	 * Set up the new inode.
694 	 *
695 	 * NOTE: *_get() integrates chain's lock into the inode lock.
696 	 *
697 	 * NOTE: Only one new inode can currently be created per
698 	 *	 transaction.  If the need arises we can adjust
699 	 *	 hammer2_trans_init() to allow more.
700 	 *
701 	 * NOTE: nipdata will have chain's blockset data.
702 	 */
703 	KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
704 	nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
705 	nipdata->inum = trans->inode_tid;
706 	hammer2_cluster_modsync(cluster);
707 	nip = hammer2_inode_get(dip->pmp, dip, cluster);
708 	nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
709 
710 	if (vap) {
711 		KKASSERT(trans->inodes_created == 0);
712 		nipdata->type = hammer2_get_obj_type(vap->va_type);
713 		nipdata->inum = trans->inode_tid;
714 		++trans->inodes_created;
715 
716 		switch (nipdata->type) {
717 		case HAMMER2_OBJTYPE_CDEV:
718 		case HAMMER2_OBJTYPE_BDEV:
719 			nipdata->rmajor = vap->va_rmajor;
720 			nipdata->rminor = vap->va_rminor;
721 			break;
722 		default:
723 			break;
724 		}
725 	} else {
726 		nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
727 		nipdata->inum = 1;
728 	}
729 
730 	/* Inherit parent's inode compression mode. */
731 	nip->comp_heuristic = 0;
732 	nipdata->comp_algo = dip_comp_algo;
733 	nipdata->check_algo = dip_check_algo;
734 	nipdata->version = HAMMER2_INODE_VERSION_ONE;
735 	hammer2_update_time(&nipdata->ctime);
736 	nipdata->mtime = nipdata->ctime;
737 	if (vap)
738 		nipdata->mode = vap->va_mode;
739 	nipdata->nlinks = 1;
740 	if (vap) {
741 		if (dip && dip->pmp) {
742 			xuid = hammer2_to_unix_xid(&dip_uid);
743 			xuid = vop_helper_create_uid(dip->pmp->mp,
744 						     dip_mode,
745 						     xuid,
746 						     cred,
747 						     &vap->va_mode);
748 		} else {
749 			/* super-root has no dip and/or pmp */
750 			xuid = 0;
751 		}
752 		if (vap->va_vaflags & VA_UID_UUID_VALID)
753 			nipdata->uid = vap->va_uid_uuid;
754 		else if (vap->va_uid != (uid_t)VNOVAL)
755 			hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
756 		else
757 			hammer2_guid_to_uuid(&nipdata->uid, xuid);
758 
759 		if (vap->va_vaflags & VA_GID_UUID_VALID)
760 			nipdata->gid = vap->va_gid_uuid;
761 		else if (vap->va_gid != (gid_t)VNOVAL)
762 			hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
763 		else if (dip)
764 			nipdata->gid = dip_gid;
765 	}
766 
767 	/*
768 	 * Regular files and softlinks allow a small amount of data to be
769 	 * directly embedded in the inode.  This flag will be cleared if
770 	 * the size is extended past the embedded limit.
771 	 */
772 	if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
773 	    nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
774 		nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
775 	}
776 
777 	KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
778 	bcopy(name, nipdata->filename, name_len);
779 	nipdata->name_key = lhc;
780 	nipdata->name_len = name_len;
781 	hammer2_cluster_modsync(cluster);
782 	*clusterp = cluster;
783 
784 	return (nip);
785 }
786 
787 /*
788  * The cluster has been removed from the original directory and replaced
789  * with a hardlink pointer.  Move the cluster to the specified parent
790  * directory, change the filename to "0xINODENUMBER", and adjust the key.
791  * The cluster becomes our invisible hardlink target.
792  *
793  * The original cluster must be deleted on entry.
794  */
795 static
796 void
797 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
798 			hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
799 			int nlinks, int *errorp)
800 {
801 	const hammer2_inode_data_t *iptmp;
802 	hammer2_inode_data_t *nipdata;
803 	hammer2_cluster_t *xcluster;
804 	hammer2_key_t key_dummy;
805 	hammer2_key_t lhc;
806 	hammer2_blockref_t bref;
807 	int ddflag;
808 
809 	iptmp = &hammer2_cluster_rdata(cluster)->ipdata;
810 	lhc = iptmp->inum;
811 	KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
812 
813 	/*
814 	 * Locate the inode or indirect block to create the new
815 	 * entry in.  lhc represents the inode number so there is
816 	 * no collision iteration.
817 	 *
818 	 * There should be no key collisions with invisible inode keys.
819 	 *
820 	 * WARNING! Must use inode_lock_ex() on dip to handle a stale
821 	 *	    dip->cluster cache.
822 	 */
823 	*errorp = 0;
824 	xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
825 				      lhc, lhc, 0, &ddflag);
826 	if (xcluster) {
827 		kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
828 			xcluster->focus, dip, dcluster->focus,
829 			dip->cluster.focus);
830 		hammer2_cluster_unlock(xcluster);
831 		xcluster = NULL;
832 		*errorp = ENOSPC;
833 #if 0
834 		Debugger("X3");
835 #endif
836 	}
837 
838 	/*
839 	 * Handle the error case
840 	 */
841 	if (*errorp) {
842 		panic("error2");
843 		KKASSERT(xcluster == NULL);
844 		return;
845 	}
846 
847 	/*
848 	 * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
849 	 * same target bref as xcluster and then delete xcluster.  The
850 	 * duplication occurs after xcluster in flush order even though
851 	 * xcluster is deleted after the duplication. XXX
852 	 *
853 	 * WARNING! Duplications (to a different parent) can cause indirect
854 	 *	    blocks to be inserted, refactor xcluster.
855 	 *
856 	 * WARNING! Only key and keybits is extracted from a passed-in bref.
857 	 */
858 	hammer2_cluster_bref(cluster, &bref);
859 	bref.key = lhc;			/* invisible dir entry key */
860 	bref.keybits = 0;
861 	hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
862 
863 	/*
864 	 * cluster is now 'live' again.. adjust the filename.
865 	 *
866 	 * Directory entries are inodes but this is a hidden hardlink
867 	 * target.  The name isn't used but to ease debugging give it
868 	 * a name after its inode number.
869 	 */
870 	hammer2_cluster_modify(trans, cluster, 0);
871 	nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
872 	ksnprintf(nipdata->filename, sizeof(nipdata->filename),
873 		  "0x%016jx", (intmax_t)nipdata->inum);
874 	nipdata->name_len = strlen(nipdata->filename);
875 	nipdata->name_key = lhc;
876 	nipdata->nlinks += nlinks;
877 	hammer2_cluster_modsync(cluster);
878 }
879 
880 /*
881  * Connect the target inode represented by (cluster) to the media topology
882  * at (dip, name, len).  The caller can pass a rough *chainp, this function
883  * will issue lookup()s to position the parent chain properly for the
884  * chain insertion.
885  *
886  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
887  * entry instead of connecting (cluster).
888  *
889  * If hlink is FALSE this function expects (cluster) to be unparented.
890  */
891 int
892 hammer2_inode_connect(hammer2_trans_t *trans,
893 		      hammer2_cluster_t **clusterp, int hlink,
894 		      hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
895 		      const uint8_t *name, size_t name_len,
896 		      hammer2_key_t lhc)
897 {
898 	hammer2_inode_data_t *wipdata;
899 	hammer2_cluster_t *ocluster;
900 	hammer2_cluster_t *ncluster;
901 	hammer2_key_t key_dummy;
902 	int ddflag;
903 	int error;
904 
905 	/*
906 	 * Since ocluster is either disconnected from the topology or
907 	 * represents a hardlink terminus which is always a parent of or
908 	 * equal to dip, we should be able to safely lock dip->chain for
909 	 * our setup.
910 	 *
911 	 * WARNING! Must use inode_lock_ex() on dip to handle a stale
912 	 *	    dip->cluster.
913 	 *
914 	 * If name is non-NULL we calculate lhc, else we use the passed-in
915 	 * lhc.
916 	 */
917 	ocluster = *clusterp;
918 
919 	if (name) {
920 		lhc = hammer2_dirhash(name, name_len);
921 
922 		/*
923 		 * Locate the inode or indirect block to create the new
924 		 * entry in.  At the same time check for key collisions
925 		 * and iterate until we don't get one.
926 		 */
927 		error = 0;
928 		while (error == 0) {
929 			ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
930 						      lhc, lhc,
931 						      0, &ddflag);
932 			if (ncluster == NULL)
933 				break;
934 			if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
935 			    HAMMER2_DIRHASH_LOMASK) {
936 				error = ENOSPC;
937 			}
938 			hammer2_cluster_unlock(ncluster);
939 			ncluster = NULL;
940 			++lhc;
941 		}
942 	} else {
943 		/*
944 		 * Reconnect to specific key (used when moving
945 		 * unlinked-but-open files into the hidden directory).
946 		 */
947 		ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
948 						  lhc, lhc,
949 						  0, &ddflag);
950 		KKASSERT(ncluster == NULL);
951 	}
952 
953 	if (error == 0) {
954 		if (hlink) {
955 			/*
956 			 * Hardlink pointer needed, create totally fresh
957 			 * directory entry.
958 			 *
959 			 * We must refactor ocluster because it might have
960 			 * been shifted into an indirect cluster by the
961 			 * create.
962 			 */
963 			KKASSERT(ncluster == NULL);
964 			error = hammer2_cluster_create(trans,
965 						       dcluster, &ncluster,
966 						       lhc, 0,
967 						       HAMMER2_BREF_TYPE_INODE,
968 						       HAMMER2_INODE_BYTES,
969 						       0);
970 		} else {
971 			/*
972 			 * Reconnect the original cluster under the new name.
973 			 * Original cluster must have already been deleted by
974 			 * teh caller.
975 			 *
976 			 * WARNING! Can cause held-over clusters to require a
977 			 *	    refactor.  Fortunately we have none (our
978 			 *	    locked clusters are passed into and
979 			 *	    modified by the call).
980 			 */
981 			ncluster = ocluster;
982 			ocluster = NULL;
983 			error = hammer2_cluster_create(trans,
984 						       dcluster, &ncluster,
985 						       lhc, 0,
986 						       HAMMER2_BREF_TYPE_INODE,
987 						       HAMMER2_INODE_BYTES,
988 						       0);
989 		}
990 	}
991 
992 	/*
993 	 * Unlock stuff.
994 	 */
995 	KKASSERT(error != EAGAIN);
996 
997 	/*
998 	 * ncluster should be NULL on error, leave ocluster
999 	 * (ocluster == *clusterp) alone.
1000 	 */
1001 	if (error) {
1002 		KKASSERT(ncluster == NULL);
1003 		return (error);
1004 	}
1005 
1006 	/*
1007 	 * Directory entries are inodes so if the name has changed we have
1008 	 * to update the inode.
1009 	 *
1010 	 * When creating an OBJTYPE_HARDLINK entry remember to unlock the
1011 	 * cluster, the caller will access the hardlink via the actual hardlink
1012 	 * target file and not the hardlink pointer entry, so we must still
1013 	 * return ocluster.
1014 	 */
1015 	if (hlink && hammer2_hardlink_enable >= 0) {
1016 		/*
1017 		 * Create the HARDLINK pointer.  oip represents the hardlink
1018 		 * target in this situation.
1019 		 *
1020 		 * We will return ocluster (the hardlink target).
1021 		 */
1022 		hammer2_cluster_modify(trans, ncluster, 0);
1023 		hammer2_cluster_clr_chainflags(ncluster,
1024 					       HAMMER2_CHAIN_UNLINKED);
1025 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1026 		wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1027 		bcopy(name, wipdata->filename, name_len);
1028 		wipdata->name_key = lhc;
1029 		wipdata->name_len = name_len;
1030 		wipdata->target_type =
1031 				hammer2_cluster_rdata(ocluster)->ipdata.type;
1032 		wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1033 		wipdata->inum = hammer2_cluster_rdata(ocluster)->ipdata.inum;
1034 		wipdata->version = HAMMER2_INODE_VERSION_ONE;
1035 		wipdata->nlinks = 1;
1036 		wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1037 		hammer2_cluster_modsync(ncluster);
1038 		hammer2_cluster_unlock(ncluster);
1039 		ncluster = ocluster;
1040 		ocluster = NULL;
1041 	} else {
1042 		/*
1043 		 * ncluster is a duplicate of ocluster at the new location.
1044 		 * We must fixup the name stored in the inode data.
1045 		 * The bref key has already been adjusted by inode_connect().
1046 		 */
1047 		hammer2_cluster_modify(trans, ncluster, 0);
1048 		hammer2_cluster_clr_chainflags(ncluster,
1049 					       HAMMER2_CHAIN_UNLINKED);
1050 		wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1051 
1052 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1053 		bcopy(name, wipdata->filename, name_len);
1054 		wipdata->name_key = lhc;
1055 		wipdata->name_len = name_len;
1056 		wipdata->nlinks = 1;
1057 		hammer2_cluster_modsync(ncluster);
1058 	}
1059 
1060 	/*
1061 	 * We are replacing ocluster with ncluster, unlock ocluster.  In the
1062 	 * case where ocluster is left unchanged the code above sets
1063 	 * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1064 	 */
1065 	if (ocluster)
1066 		hammer2_cluster_unlock(ocluster);
1067 	*clusterp = ncluster;
1068 
1069 	return (0);
1070 }
1071 
1072 /*
1073  * Repoint ip->cluster's chains to cluster's chains.  Caller must hold
1074  * the inode exclusively locked.  cluster may be NULL to clean out any
1075  * chains in ip->cluster.
1076  */
1077 void
1078 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1079 		      hammer2_cluster_t *cluster)
1080 {
1081 	hammer2_chain_t *ochain;
1082 	hammer2_chain_t *nchain;
1083 	hammer2_inode_t *opip;
1084 	int i;
1085 
1086 	/*
1087 	 * Replace chains in ip->cluster with chains from cluster and
1088 	 * adjust the focus if necessary.
1089 	 *
1090 	 * NOTE: nchain and/or ochain can be NULL due to gaps
1091 	 *	 in the cluster arrays.
1092 	 */
1093 	ip->cluster.focus = NULL;
1094 	for (i = 0; cluster && i < cluster->nchains; ++i) {
1095 		nchain = cluster->array[i];
1096 		if (i < ip->cluster.nchains) {
1097 			ochain = ip->cluster.array[i];
1098 			if (ochain == nchain) {
1099 				if (ip->cluster.focus == NULL)
1100 					ip->cluster.focus = nchain;
1101 				continue;
1102 			}
1103 		} else {
1104 			ochain = NULL;
1105 		}
1106 
1107 		/*
1108 		 * Make adjustments
1109 		 */
1110 		ip->cluster.array[i] = nchain;
1111 		if (ip->cluster.focus == NULL)
1112 			ip->cluster.focus = nchain;
1113 		if (nchain)
1114 			hammer2_chain_ref(nchain);
1115 		if (ochain)
1116 			hammer2_chain_drop(ochain);
1117 	}
1118 
1119 	/*
1120 	 * Release any left-over chains in ip->cluster.
1121 	 */
1122 	while (i < ip->cluster.nchains) {
1123 		nchain = ip->cluster.array[i];
1124 		if (nchain) {
1125 			ip->cluster.array[i] = NULL;
1126 			hammer2_chain_drop(nchain);
1127 		}
1128 		++i;
1129 	}
1130 	ip->cluster.nchains = cluster ? cluster->nchains : 0;
1131 
1132 	/*
1133 	 * Repoint ip->pip if requested (non-NULL pip).
1134 	 */
1135 	if (pip && ip->pip != pip) {
1136 		opip = ip->pip;
1137 		hammer2_inode_ref(pip);
1138 		ip->pip = pip;
1139 		if (opip)
1140 			hammer2_inode_drop(opip);
1141 	}
1142 }
1143 
1144 /*
1145  * Unlink the file from the specified directory inode.  The directory inode
1146  * does not need to be locked.
1147  *
1148  * isdir determines whether a directory/non-directory check should be made.
1149  * No check is made if isdir is set to -1.
1150  *
1151  * isopen specifies whether special unlink-with-open-descriptor handling
1152  * must be performed.  If set to -1 the caller is deleting a PFS and we
1153  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1154  * implied if it is mounted.
1155  *
1156  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1157  * to a special hidden directory until last-close occurs on the file.
1158  *
1159  * NOTE!  The underlying file can still be active with open descriptors
1160  *	  or if the chain is being manually held (e.g. for rename).
1161  *
1162  *	  The caller is responsible for fixing up ip->chain if e.g. a
1163  *	  rename occurs (see chain_duplicate()).
1164  *
1165  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1166  *	  but otherwise will be deleted.
1167  */
1168 int
1169 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1170 		    const uint8_t *name, size_t name_len,
1171 		    int isdir, int *hlinkp, struct nchandle *nch,
1172 		    int nlinks)
1173 {
1174 	const hammer2_inode_data_t *ripdata;
1175 	hammer2_inode_data_t *wipdata;
1176 	hammer2_cluster_t *cparent;
1177 	hammer2_cluster_t *hcluster;
1178 	hammer2_cluster_t *hparent;
1179 	hammer2_cluster_t *cluster;
1180 	hammer2_cluster_t *dparent;
1181 	hammer2_cluster_t *dcluster;
1182 	hammer2_key_t key_dummy;
1183 	hammer2_key_t key_next;
1184 	hammer2_key_t lhc;
1185 	int error;
1186 	int ddflag;
1187 	int hlink;
1188 	uint8_t type;
1189 
1190 	error = 0;
1191 	hlink = 0;
1192 	hcluster = NULL;
1193 	hparent = NULL;
1194 	lhc = hammer2_dirhash(name, name_len);
1195 
1196 again:
1197 	/*
1198 	 * Search for the filename in the directory
1199 	 */
1200 	cparent = hammer2_inode_lock_ex(dip);
1201 	cluster = hammer2_cluster_lookup(cparent, &key_next,
1202 				     lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1203 				     0, &ddflag);
1204 	while (cluster) {
1205 		if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1206 			ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1207 			if (ripdata->name_len == name_len &&
1208 			    bcmp(ripdata->filename, name, name_len) == 0) {
1209 				break;
1210 			}
1211 		}
1212 		cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1213 					       key_next,
1214 					       lhc + HAMMER2_DIRHASH_LOMASK,
1215 					       0);
1216 	}
1217 	hammer2_inode_unlock_ex(dip, NULL);	/* retain cparent */
1218 
1219 	/*
1220 	 * Not found or wrong type (isdir < 0 disables the type check).
1221 	 * If a hardlink pointer, type checks use the hardlink target.
1222 	 */
1223 	if (cluster == NULL) {
1224 		error = ENOENT;
1225 		goto done;
1226 	}
1227 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1228 	type = ripdata->type;
1229 	if (type == HAMMER2_OBJTYPE_HARDLINK) {
1230 		hlink = 1;
1231 		type = ripdata->target_type;
1232 	}
1233 
1234 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1235 		error = ENOTDIR;
1236 		goto done;
1237 	}
1238 	if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1239 		error = EISDIR;
1240 		goto done;
1241 	}
1242 
1243 	/*
1244 	 * Hardlink must be resolved.  We can't hold the parent locked
1245 	 * while we do this or we could deadlock.  The physical file will
1246 	 * be located at or above the current directory.
1247 	 *
1248 	 * We loop to reacquire the hardlink origination.
1249 	 *
1250 	 * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1251 	 *	 returning a modified hparent and hcluster.
1252 	 */
1253 	if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1254 		if (hcluster == NULL) {
1255 			hcluster = cluster;
1256 			cluster = NULL;	/* safety */
1257 			hammer2_cluster_unlock(cparent);
1258 			cparent = NULL; /* safety */
1259 			ripdata = NULL;	/* safety (associated w/cparent) */
1260 			error = hammer2_hardlink_find(dip, &hparent, hcluster);
1261 
1262 			/*
1263 			 * If we couldn't find the hardlink target then some
1264 			 * parent directory containing the hardlink pointer
1265 			 * probably got renamed to above the original target,
1266 			 * a case not yet handled by H2.
1267 			 */
1268 			if (error) {
1269 				kprintf("H2 unlink_file: hardlink target for "
1270 					"\"%s\" not found\n",
1271 					name);
1272 				kprintf("(likely due to known directory "
1273 					"rename bug)\n");
1274 				goto done;
1275 			}
1276 			goto again;
1277 		}
1278 	}
1279 
1280 	/*
1281 	 * If this is a directory the directory must be empty.  However, if
1282 	 * isdir < 0 we are doing a rename and the directory does not have
1283 	 * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1284 	 * and the directory does not have to be empty.
1285 	 *
1286 	 * NOTE: We check the full key range here which covers both visible
1287 	 *	 and invisible entries.  Theoretically there should be no
1288 	 *	 invisible (hardlink target) entries if there are no visible
1289 	 *	 entries.
1290 	 */
1291 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1292 		dparent = hammer2_cluster_lookup_init(cluster, 0);
1293 		dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1294 					          0, (hammer2_key_t)-1,
1295 					          HAMMER2_LOOKUP_NODATA,
1296 						  &ddflag);
1297 		if (dcluster) {
1298 			hammer2_cluster_unlock(dcluster);
1299 			hammer2_cluster_lookup_done(dparent);
1300 			error = ENOTEMPTY;
1301 			goto done;
1302 		}
1303 		hammer2_cluster_lookup_done(dparent);
1304 		dparent = NULL;
1305 		/* dcluster NULL */
1306 	}
1307 
1308 	/*
1309 	 * If this was a hardlink then (cparent, cluster) is the hardlink
1310 	 * pointer, which we can simply destroy outright.  Discard the
1311 	 * clusters and replace with the hardlink target.
1312 	 */
1313 	if (hcluster) {
1314 		hammer2_cluster_delete(trans, cparent, cluster,
1315 				       HAMMER2_DELETE_PERMANENT);
1316 		hammer2_cluster_unlock(cparent);
1317 		hammer2_cluster_unlock(cluster);
1318 		cparent = hparent;
1319 		cluster = hcluster;
1320 		hparent = NULL;
1321 		hcluster = NULL;
1322 	}
1323 
1324 	/*
1325 	 * This leaves us with the hardlink target or non-hardlinked file
1326 	 * or directory in (cparent, cluster).
1327 	 *
1328 	 * Delete the target when nlinks reaches 0 with special handling
1329 	 * if (isopen) is set.
1330 	 *
1331 	 * NOTE! In DragonFly the vnops function calls cache_unlink() after
1332 	 *	 calling us here to clean out the namecache association,
1333 	 *	 (which does not represent a ref for the open-test), and to
1334 	 *	 force finalization of the vnode if/when the last ref gets
1335 	 *	 dropped.
1336 	 *
1337 	 * NOTE! Files are unlinked by rename and then relinked.  nch will be
1338 	 *	 passed as NULL in this situation.  hammer2_inode_connect()
1339 	 *	 will bump nlinks.
1340 	 */
1341 	KKASSERT(cluster != NULL);
1342 	hammer2_cluster_modify(trans, cluster, 0);
1343 	wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1344 	ripdata = wipdata;
1345 	wipdata->nlinks += nlinks;
1346 	if ((int64_t)wipdata->nlinks < 0) {	/* XXX debugging */
1347 		wipdata->nlinks = 0;
1348 	}
1349 	hammer2_cluster_modsync(cluster);
1350 
1351 	if (wipdata->nlinks == 0) {
1352 		/*
1353 		 * Target nlinks has reached 0, file now unlinked (but may
1354 		 * still be open).
1355 		 */
1356 		/* XXX need interlock if mounted
1357 		if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) &&
1358 		    cluster->pmp) {
1359 			error = EINVAL;
1360 			kprintf("hammer2: PFS \"%s\" cannot be deleted "
1361 				"while still mounted\n",
1362 				wipdata->filename);
1363 			goto done;
1364 		}
1365 		*/
1366 		hammer2_cluster_set_chainflags(cluster, HAMMER2_CHAIN_UNLINKED);
1367 		if (nch && cache_isopen(nch)) {
1368 			hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1369 						     wipdata->inum);
1370 		} else {
1371 			/*
1372 			 * This won't get everything if a vnode is still
1373 			 * present, but the cache_unlink() call the caller
1374 			 * makes will.
1375 			 */
1376 			hammer2_cluster_delete(trans, cparent, cluster,
1377 					       HAMMER2_DELETE_PERMANENT);
1378 		}
1379 	} else if (hlink == 0) {
1380 		/*
1381 		 * In this situation a normal non-hardlinked file (which can
1382 		 * only have nlinks == 1) still has a non-zero nlinks, the
1383 		 * caller must be doing a RENAME operation and so is passing
1384 		 * a nlinks adjustment of 0, and only wishes to remove file
1385 		 * in order to be able to reconnect it under a different name.
1386 		 *
1387 		 * In this situation we do a non-permanent deletion of the
1388 		 * chain in order to allow the file to be reconnected in
1389 		 * a different location.
1390 		 */
1391 		KKASSERT(nlinks == 0);
1392 		hammer2_cluster_delete(trans, cparent, cluster, 0);
1393 	}
1394 	error = 0;
1395 done:
1396 	if (cparent)
1397 		hammer2_cluster_unlock(cparent);
1398 	if (cluster)
1399 		hammer2_cluster_unlock(cluster);
1400 	if (hparent)
1401 		hammer2_cluster_unlock(hparent);
1402 	if (hcluster)
1403 		hammer2_cluster_unlock(hcluster);
1404 	if (hlinkp)
1405 		*hlinkp = hlink;
1406 
1407 	return error;
1408 }
1409 
1410 /*
1411  * This is called from the mount code to initialize pmp->ihidden
1412  */
1413 void
1414 hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
1415 {
1416 	hammer2_trans_t trans;
1417 	hammer2_cluster_t *cparent;
1418 	hammer2_cluster_t *cluster;
1419 	hammer2_cluster_t *scan;
1420 	const hammer2_inode_data_t *ripdata;
1421 	hammer2_inode_data_t *wipdata;
1422 	hammer2_key_t key_dummy;
1423 	hammer2_key_t key_next;
1424 	int ddflag;
1425 	int error;
1426 	int count;
1427 	int dip_check_algo;
1428 	int dip_comp_algo;
1429 
1430 	if (pmp->ihidden)
1431 		return;
1432 
1433 	/*
1434 	 * Find the hidden directory
1435 	 */
1436 	bzero(&key_dummy, sizeof(key_dummy));
1437 	hammer2_trans_init(&trans, pmp, 0);
1438 
1439 	/*
1440 	 * Setup for lookup, retrieve iroot's check and compression
1441 	 * algorithm request which was likely generated by newfs_hammer2.
1442 	 *
1443 	 * The check/comp fields will probably never be used since inodes
1444 	 * are renamed into the hidden directory and not created relative to
1445 	 * the hidden directory, chain creation inherits from bref.methods,
1446 	 * and data chains inherit from their respective file inode *_algo
1447 	 * fields.
1448 	 */
1449 	cparent = hammer2_inode_lock_ex(pmp->iroot);
1450 	ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1451 	dip_check_algo = ripdata->check_algo;
1452 	dip_comp_algo = ripdata->comp_algo;
1453 	ripdata = NULL;
1454 
1455 	cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1456 					 HAMMER2_INODE_HIDDENDIR,
1457 					 HAMMER2_INODE_HIDDENDIR,
1458 					 0, &ddflag);
1459 	if (cluster) {
1460 		pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1461 		hammer2_inode_ref(pmp->ihidden);
1462 
1463 		/*
1464 		 * Remove any unlinked files which were left open as-of
1465 		 * any system crash.
1466 		 *
1467 		 * Don't pass NODATA, we need the inode data so the delete
1468 		 * can do proper statistics updates.
1469 		 */
1470 		count = 0;
1471 		scan = hammer2_cluster_lookup(cluster, &key_next,
1472 					      0, HAMMER2_TID_MAX,
1473 					      0, &ddflag);
1474 		while (scan) {
1475 			if (hammer2_cluster_type(scan) ==
1476 			    HAMMER2_BREF_TYPE_INODE) {
1477 				hammer2_cluster_delete(&trans, cluster, scan,
1478 						   HAMMER2_DELETE_PERMANENT);
1479 				++count;
1480 			}
1481 			scan = hammer2_cluster_next(cluster, scan, &key_next,
1482 						    0, HAMMER2_TID_MAX, 0);
1483 		}
1484 
1485 		hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1486 		hammer2_inode_unlock_ex(pmp->iroot, cparent);
1487 		hammer2_trans_done(&trans);
1488 		kprintf("hammer2: PFS loaded hidden dir, "
1489 			"removed %d dead entries\n", count);
1490 		return;
1491 	}
1492 
1493 	/*
1494 	 * Create the hidden directory
1495 	 */
1496 	error = hammer2_cluster_create(&trans, cparent, &cluster,
1497 				       HAMMER2_INODE_HIDDENDIR, 0,
1498 				       HAMMER2_BREF_TYPE_INODE,
1499 				       HAMMER2_INODE_BYTES,
1500 				       0);
1501 	hammer2_inode_unlock_ex(pmp->iroot, cparent);
1502 
1503 	hammer2_cluster_modify(&trans, cluster, 0);
1504 	wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1505 	wipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
1506 	wipdata->inum = HAMMER2_INODE_HIDDENDIR;
1507 	wipdata->nlinks = 1;
1508 	wipdata->comp_algo = dip_comp_algo;
1509 	wipdata->check_algo = dip_check_algo;
1510 	hammer2_cluster_modsync(cluster);
1511 	kprintf("hammer2: PFS root missing hidden directory, creating\n");
1512 
1513 	pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1514 	hammer2_inode_ref(pmp->ihidden);
1515 	hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1516 	hammer2_trans_done(&trans);
1517 }
1518 
1519 /*
1520  * If an open file is unlinked H2 needs to retain the file in the topology
1521  * to ensure that its backing store is not recovered by the bulk free scan.
1522  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1523  *
1524  * To do this the file is moved to a hidden directory in the PFS root and
1525  * renamed.  The hidden directory must be created if it does not exist.
1526  */
1527 static
1528 void
1529 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1530 			     hammer2_cluster_t **cparentp,
1531 			     hammer2_cluster_t **clusterp,
1532 			     hammer2_tid_t inum)
1533 {
1534 	hammer2_cluster_t *dcluster;
1535 	hammer2_pfsmount_t *pmp;
1536 	int error;
1537 
1538 	pmp = (*clusterp)->pmp;
1539 	KKASSERT(pmp != NULL);
1540 	KKASSERT(pmp->ihidden != NULL);
1541 
1542 	hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1543 	dcluster = hammer2_inode_lock_ex(pmp->ihidden);
1544 	error = hammer2_inode_connect(trans, clusterp, 0,
1545 				      pmp->ihidden, dcluster,
1546 				      NULL, 0, inum);
1547 	hammer2_inode_unlock_ex(pmp->ihidden, dcluster);
1548 	KKASSERT(error == 0);
1549 }
1550 
1551 /*
1552  * Given an exclusively locked inode and cluster we consolidate the cluster
1553  * for hardlink creation, adding (nlinks) to the file's link count and
1554  * potentially relocating the inode to (cdip) which is a parent directory
1555  * common to both the current location of the inode and the intended new
1556  * hardlink.
1557  *
1558  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1559  * and returning a new locked cluster.
1560  *
1561  * NOTE!  This function will also replace ip->cluster.
1562  */
1563 int
1564 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1565 			     hammer2_inode_t *ip,
1566 			     hammer2_cluster_t **clusterp,
1567 			     hammer2_inode_t *cdip,
1568 			     hammer2_cluster_t *cdcluster,
1569 			     int nlinks)
1570 {
1571 	const hammer2_inode_data_t *ripdata;
1572 	hammer2_inode_data_t *wipdata;
1573 	hammer2_cluster_t *cluster;
1574 	hammer2_cluster_t *cparent;
1575 	int error;
1576 
1577 	cluster = *clusterp;
1578 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1579 	if (nlinks == 0 &&			/* no hardlink needed */
1580 	    (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE)) {
1581 		return (0);
1582 	}
1583 
1584 	if (hammer2_hardlink_enable == 0) {	/* disallow hardlinks */
1585 		hammer2_cluster_unlock(cluster);
1586 		*clusterp = NULL;
1587 		return (ENOTSUP);
1588 	}
1589 
1590 	cparent = NULL;
1591 
1592 	/*
1593 	 * If no change in the hardlink's target directory is required and
1594 	 * this is already a hardlink target, all we need to do is adjust
1595 	 * the link count.
1596 	 */
1597 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1598 	if (cdip == ip->pip &&
1599 	    (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1600 		if (nlinks) {
1601 			hammer2_cluster_modify(trans, cluster, 0);
1602 			wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1603 			wipdata->nlinks += nlinks;
1604 			hammer2_cluster_modsync(cluster);
1605 			ripdata = wipdata;
1606 		}
1607 		error = 0;
1608 		goto done;
1609 	}
1610 
1611 	/*
1612 	 * Cluster is the real inode.  The originating directory is locked
1613 	 * by the caller so we can manipulate it without worrying about races
1614 	 * against other lookups.
1615 	 *
1616 	 * If cluster is visible we need to delete it from the current
1617 	 * location and create a hardlink pointer in its place.  If it is
1618 	 * not visible we need only delete it.  Then later cluster will be
1619 	 * renamed to a parent directory and converted (if necessary) to
1620 	 * a hidden inode (via shiftup).
1621 	 *
1622 	 * NOTE! We must hold cparent locked through the delete/create/rename
1623 	 *	 operation to ensure that other threads block resolving to
1624 	 *	 the same hardlink, otherwise the other threads may not see
1625 	 *	 the hardlink.
1626 	 */
1627 	KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1628 	cparent = hammer2_cluster_parent(cluster);
1629 
1630 	hammer2_cluster_delete(trans, cparent, cluster, 0);
1631 
1632 	ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1633 	KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
1634 	if (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) {
1635 		hammer2_cluster_t *ncluster;
1636 		hammer2_key_t lhc;
1637 
1638 		ncluster = NULL;
1639 		lhc = cluster->focus->bref.key;
1640 		error = hammer2_cluster_create(trans, cparent, &ncluster,
1641 					     lhc, 0,
1642 					     HAMMER2_BREF_TYPE_INODE,
1643 					     HAMMER2_INODE_BYTES,
1644 					     0);
1645 		hammer2_cluster_modify(trans, ncluster, 0);
1646 		wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1647 
1648 		/* wipdata->comp_algo = ripdata->comp_algo; */
1649 		wipdata->comp_algo = 0;
1650 		wipdata->check_algo = 0;
1651 		wipdata->version = HAMMER2_INODE_VERSION_ONE;
1652 		wipdata->inum = ripdata->inum;
1653 		wipdata->target_type = ripdata->type;
1654 		wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1655 		wipdata->uflags = 0;
1656 		wipdata->rmajor = 0;
1657 		wipdata->rminor = 0;
1658 		wipdata->ctime = 0;
1659 		wipdata->mtime = 0;
1660 		wipdata->atime = 0;
1661 		wipdata->btime = 0;
1662 		bzero(&wipdata->uid, sizeof(wipdata->uid));
1663 		bzero(&wipdata->gid, sizeof(wipdata->gid));
1664 		wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1665 		wipdata->cap_flags = 0;
1666 		wipdata->mode = 0;
1667 		wipdata->size = 0;
1668 		wipdata->nlinks = 1;
1669 		wipdata->iparent = 0;	/* XXX */
1670 		wipdata->pfs_type = 0;
1671 		wipdata->pfs_inum = 0;
1672 		bzero(&wipdata->pfs_clid, sizeof(wipdata->pfs_clid));
1673 		bzero(&wipdata->pfs_fsid, sizeof(wipdata->pfs_fsid));
1674 		wipdata->data_quota = 0;
1675 		wipdata->data_count = 0;
1676 		wipdata->inode_quota = 0;
1677 		wipdata->inode_count = 0;
1678 		wipdata->attr_tid = 0;
1679 		wipdata->dirent_tid = 0;
1680 		bzero(&wipdata->u, sizeof(wipdata->u));
1681 		bcopy(ripdata->filename, wipdata->filename, ripdata->name_len);
1682 		wipdata->name_key = ncluster->focus->bref.key;
1683 		wipdata->name_len = ripdata->name_len;
1684 		/* XXX transaction ids */
1685 		hammer2_cluster_modsync(ncluster);
1686 		hammer2_cluster_unlock(ncluster);
1687 	}
1688 	ripdata = wipdata;
1689 
1690 	/*
1691 	 * cluster represents the hardlink target and is now flagged deleted.
1692 	 * duplicate it to the parent directory and adjust nlinks.
1693 	 *
1694 	 * WARNING! The shiftup() call can cause ncluster to be moved into
1695 	 *	    an indirect block, and our ncluster will wind up pointing
1696 	 *	    to the older/original version.
1697 	 */
1698 	KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1699 	hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster,
1700 				 nlinks, &error);
1701 
1702 	if (error == 0)
1703 		hammer2_inode_repoint(ip, cdip, cluster);
1704 
1705 done:
1706 	/*
1707 	 * Cleanup, cluster/ncluster already dealt with.
1708 	 *
1709 	 * Return the shifted cluster in *clusterp.
1710 	 */
1711 	if (cparent)
1712 		hammer2_cluster_unlock(cparent);
1713 	*clusterp = cluster;
1714 
1715 	return (error);
1716 }
1717 
1718 /*
1719  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1720  * inode while (*chainp) points to the resolved (hidden hardlink
1721  * target) inode.  In this situation when nlinks is 1 we wish to
1722  * deconsolidate the hardlink, moving it back to the directory that now
1723  * represents the only remaining link.
1724  */
1725 int
1726 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1727 			       hammer2_inode_t *dip,
1728 			       hammer2_chain_t **chainp,
1729 			       hammer2_chain_t **ochainp)
1730 {
1731 	if (*ochainp == NULL)
1732 		return (0);
1733 	/* XXX */
1734 	return (0);
1735 }
1736 
1737 /*
1738  * The caller presents a locked cluster with an obj_type of
1739  * HAMMER2_OBJTYPE_HARDLINK.  This routine will locate and replace the
1740  * cluster with the target hardlink, also locked.
1741  *
1742  * If cparentp is not NULL a locked cluster representing the hardlink's
1743  * parent is also returned.
1744  *
1745  * If we are unable to locate the hardlink target EIO is returned and
1746  * (*cparentp) is set to NULL.  The passed-in cluster still needs to be
1747  * unlocked by the caller but will be degenerate... not have any chains.
1748  */
1749 int
1750 hammer2_hardlink_find(hammer2_inode_t *dip,
1751 		      hammer2_cluster_t **cparentp, hammer2_cluster_t *cluster)
1752 {
1753 	const hammer2_inode_data_t *ipdata;
1754 	hammer2_cluster_t *cparent;
1755 	hammer2_cluster_t *rcluster;
1756 	hammer2_inode_t *ip;
1757 	hammer2_inode_t *pip;
1758 	hammer2_key_t key_dummy;
1759 	hammer2_key_t lhc;
1760 	int ddflag;
1761 
1762 	pip = dip;
1763 	hammer2_inode_ref(pip);		/* for loop */
1764 
1765 	/*
1766 	 * Locate the hardlink.  pip is referenced and not locked.
1767 	 */
1768 	ipdata = &hammer2_cluster_rdata(cluster)->ipdata;
1769 	lhc = ipdata->inum;
1770 
1771 	/*
1772 	 * We don't need the cluster's chains, but we need to retain the
1773 	 * cluster structure itself so we can load the hardlink search
1774 	 * result into it.
1775 	 */
1776 	KKASSERT(cluster->refs == 1);
1777 	atomic_add_int(&cluster->refs, 1);
1778 	hammer2_cluster_unlock(cluster);	/* hack */
1779 	cluster->nchains = 0;			/* hack */
1780 
1781 	rcluster = NULL;
1782 	cparent = NULL;
1783 
1784 	while ((ip = pip) != NULL) {
1785 		cparent = hammer2_inode_lock_ex(ip);
1786 		hammer2_inode_drop(ip);			/* loop */
1787 		KKASSERT(hammer2_cluster_type(cparent) ==
1788 			 HAMMER2_BREF_TYPE_INODE);
1789 		rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1790 					     lhc, lhc, 0, &ddflag);
1791 		if (rcluster)
1792 			break;
1793 		hammer2_cluster_lookup_done(cparent);	/* discard parent */
1794 		cparent = NULL;				/* safety */
1795 		pip = ip->pip;		/* safe, ip held locked */
1796 		if (pip)
1797 			hammer2_inode_ref(pip);		/* loop */
1798 		hammer2_inode_unlock_ex(ip, NULL);
1799 	}
1800 
1801 	/*
1802 	 * chain is locked, ip is locked.  Unlock ip, return the locked
1803 	 * chain.  *ipp is already set w/a ref count and not locked.
1804 	 *
1805 	 * (cparent is already unlocked).
1806 	 */
1807 	if (rcluster) {
1808 		hammer2_cluster_replace(cluster, rcluster);
1809 		hammer2_cluster_drop(rcluster);
1810 		if (cparentp) {
1811 			*cparentp = cparent;
1812 			hammer2_inode_unlock_ex(ip, NULL);
1813 		} else {
1814 			hammer2_inode_unlock_ex(ip, cparent);
1815 		}
1816 		return (0);
1817 	} else {
1818 		if (cparentp)
1819 			*cparentp = NULL;
1820 		if (ip)
1821 			hammer2_inode_unlock_ex(ip, cparent);
1822 		return (EIO);
1823 	}
1824 }
1825 
1826 /*
1827  * Find the directory common to both fdip and tdip.
1828  *
1829  * Returns a held but not locked inode.  Caller typically locks the inode,
1830  * and when through unlocks AND drops it.
1831  */
1832 hammer2_inode_t *
1833 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1834 {
1835 	hammer2_inode_t *scan1;
1836 	hammer2_inode_t *scan2;
1837 
1838 	/*
1839 	 * We used to have a depth field but it complicated matters too
1840 	 * much for directory renames.  So now its ugly.  Check for
1841 	 * simple cases before giving up and doing it the expensive way.
1842 	 *
1843 	 * XXX need a bottom-up topology stability lock
1844 	 */
1845 	if (fdip == tdip || fdip == tdip->pip) {
1846 		hammer2_inode_ref(fdip);
1847 		return(fdip);
1848 	}
1849 	if (fdip->pip == tdip) {
1850 		hammer2_inode_ref(tdip);
1851 		return(tdip);
1852 	}
1853 
1854 	/*
1855 	 * XXX not MPSAFE
1856 	 */
1857 	for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1858 		scan2 = tdip;
1859 		while (scan2->pmp == tdip->pmp) {
1860 			if (scan1 == scan2) {
1861 				hammer2_inode_ref(scan1);
1862 				return(scan1);
1863 			}
1864 			scan2 = scan2->pip;
1865 			if (scan2 == NULL)
1866 				break;
1867 		}
1868 	}
1869 	panic("hammer2_inode_common_parent: no common parent %p %p\n",
1870 	      fdip, tdip);
1871 	/* NOT REACHED */
1872 	return(NULL);
1873 }
1874 
1875 /*
1876  * Synchronize the inode's frontend state with the chain state prior
1877  * to any explicit flush of the inode or any strategy write call.
1878  *
1879  * Called with a locked inode.
1880  */
1881 void
1882 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip,
1883 		    hammer2_cluster_t *cparent)
1884 {
1885 	const hammer2_inode_data_t *ripdata;
1886 	hammer2_inode_data_t *wipdata;
1887 	hammer2_cluster_t *dparent;
1888 	hammer2_cluster_t *cluster;
1889 	hammer2_key_t lbase;
1890 	hammer2_key_t key_next;
1891 	int dosync = 0;
1892 	int ddflag;
1893 
1894 	ripdata = &hammer2_cluster_rdata(cparent)->ipdata;    /* target file */
1895 
1896 	if (ip->flags & HAMMER2_INODE_MTIME) {
1897 		wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1898 		atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
1899 		wipdata->mtime = ip->mtime;
1900 		dosync = 1;
1901 		ripdata = wipdata;
1902 	}
1903 	if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ripdata->size) {
1904 		wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1905 		wipdata->size = ip->size;
1906 		dosync = 1;
1907 		ripdata = wipdata;
1908 		atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1909 
1910 		/*
1911 		 * We must delete any chains beyond the EOF.  The chain
1912 		 * straddling the EOF will be pending in the bioq.
1913 		 */
1914 		lbase = (ripdata->size + HAMMER2_PBUFMASK64) &
1915 			~HAMMER2_PBUFMASK64;
1916 		dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
1917 		cluster = hammer2_cluster_lookup(dparent, &key_next,
1918 					         lbase, (hammer2_key_t)-1,
1919 						 HAMMER2_LOOKUP_NODATA,
1920 						 &ddflag);
1921 		while (cluster) {
1922 			/*
1923 			 * Degenerate embedded case, nothing to loop on
1924 			 */
1925 			switch (hammer2_cluster_type(cluster)) {
1926 			case HAMMER2_BREF_TYPE_INODE:
1927 				hammer2_cluster_unlock(cluster);
1928 				cluster = NULL;
1929 				break;
1930 			case HAMMER2_BREF_TYPE_DATA:
1931 				hammer2_cluster_delete(trans, dparent, cluster,
1932 						   HAMMER2_DELETE_PERMANENT);
1933 				/* fall through */
1934 			default:
1935 				cluster = hammer2_cluster_next(dparent, cluster,
1936 						   &key_next,
1937 						   key_next, (hammer2_key_t)-1,
1938 						   HAMMER2_LOOKUP_NODATA);
1939 				break;
1940 			}
1941 		}
1942 		hammer2_cluster_lookup_done(dparent);
1943 	} else
1944 	if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ripdata->size) {
1945 		wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1946 		wipdata->size = ip->size;
1947 		atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1948 
1949 		/*
1950 		 * When resizing larger we may not have any direct-data
1951 		 * available.
1952 		 */
1953 		if ((wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1954 		    ip->size > HAMMER2_EMBEDDED_BYTES) {
1955 			wipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1956 			bzero(&wipdata->u.blockset,
1957 			      sizeof(wipdata->u.blockset));
1958 		}
1959 		dosync = 1;
1960 		ripdata = wipdata;
1961 	}
1962 	if (dosync)
1963 		hammer2_cluster_modsync(cparent);
1964 }
1965