xref: /dflybsd-src/sys/vfs/hammer/hammer_vnops.c (revision c4bf625e67439f34b29bfd33c4e2555ffea63ce9)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.37 2008/04/22 19:00:15 dillon Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/namecache.h>
42 #include <sys/vnode.h>
43 #include <sys/lockf.h>
44 #include <sys/event.h>
45 #include <sys/stat.h>
46 #include <sys/dirent.h>
47 #include <vm/vm_extern.h>
48 #include <vfs/fifofs/fifo.h>
49 #include "hammer.h"
50 
51 /*
52  * USERFS VNOPS
53  */
54 /*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
55 static int hammer_vop_fsync(struct vop_fsync_args *);
56 static int hammer_vop_read(struct vop_read_args *);
57 static int hammer_vop_write(struct vop_write_args *);
58 static int hammer_vop_access(struct vop_access_args *);
59 static int hammer_vop_advlock(struct vop_advlock_args *);
60 static int hammer_vop_close(struct vop_close_args *);
61 static int hammer_vop_ncreate(struct vop_ncreate_args *);
62 static int hammer_vop_getattr(struct vop_getattr_args *);
63 static int hammer_vop_nresolve(struct vop_nresolve_args *);
64 static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
65 static int hammer_vop_nlink(struct vop_nlink_args *);
66 static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
67 static int hammer_vop_nmknod(struct vop_nmknod_args *);
68 static int hammer_vop_open(struct vop_open_args *);
69 static int hammer_vop_pathconf(struct vop_pathconf_args *);
70 static int hammer_vop_print(struct vop_print_args *);
71 static int hammer_vop_readdir(struct vop_readdir_args *);
72 static int hammer_vop_readlink(struct vop_readlink_args *);
73 static int hammer_vop_nremove(struct vop_nremove_args *);
74 static int hammer_vop_nrename(struct vop_nrename_args *);
75 static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
76 static int hammer_vop_setattr(struct vop_setattr_args *);
77 static int hammer_vop_strategy(struct vop_strategy_args *);
78 static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
79 static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
80 static int hammer_vop_ioctl(struct vop_ioctl_args *);
81 static int hammer_vop_mountctl(struct vop_mountctl_args *);
82 
83 static int hammer_vop_fifoclose (struct vop_close_args *);
84 static int hammer_vop_fiforead (struct vop_read_args *);
85 static int hammer_vop_fifowrite (struct vop_write_args *);
86 
87 static int hammer_vop_specclose (struct vop_close_args *);
88 static int hammer_vop_specread (struct vop_read_args *);
89 static int hammer_vop_specwrite (struct vop_write_args *);
90 
91 struct vop_ops hammer_vnode_vops = {
92 	.vop_default =		vop_defaultop,
93 	.vop_fsync =		hammer_vop_fsync,
94 	.vop_getpages =		vop_stdgetpages,
95 	.vop_putpages =		vop_stdputpages,
96 	.vop_read =		hammer_vop_read,
97 	.vop_write =		hammer_vop_write,
98 	.vop_access =		hammer_vop_access,
99 	.vop_advlock =		hammer_vop_advlock,
100 	.vop_close =		hammer_vop_close,
101 	.vop_ncreate =		hammer_vop_ncreate,
102 	.vop_getattr =		hammer_vop_getattr,
103 	.vop_inactive =		hammer_vop_inactive,
104 	.vop_reclaim =		hammer_vop_reclaim,
105 	.vop_nresolve =		hammer_vop_nresolve,
106 	.vop_nlookupdotdot =	hammer_vop_nlookupdotdot,
107 	.vop_nlink =		hammer_vop_nlink,
108 	.vop_nmkdir =		hammer_vop_nmkdir,
109 	.vop_nmknod =		hammer_vop_nmknod,
110 	.vop_open =		hammer_vop_open,
111 	.vop_pathconf =		hammer_vop_pathconf,
112 	.vop_print =		hammer_vop_print,
113 	.vop_readdir =		hammer_vop_readdir,
114 	.vop_readlink =		hammer_vop_readlink,
115 	.vop_nremove =		hammer_vop_nremove,
116 	.vop_nrename =		hammer_vop_nrename,
117 	.vop_nrmdir =		hammer_vop_nrmdir,
118 	.vop_setattr =		hammer_vop_setattr,
119 	.vop_strategy =		hammer_vop_strategy,
120 	.vop_nsymlink =		hammer_vop_nsymlink,
121 	.vop_nwhiteout =	hammer_vop_nwhiteout,
122 	.vop_ioctl =		hammer_vop_ioctl,
123 	.vop_mountctl =		hammer_vop_mountctl
124 };
125 
126 struct vop_ops hammer_spec_vops = {
127 	.vop_default =		spec_vnoperate,
128 	.vop_fsync =		hammer_vop_fsync,
129 	.vop_read =		hammer_vop_specread,
130 	.vop_write =		hammer_vop_specwrite,
131 	.vop_access =		hammer_vop_access,
132 	.vop_close =		hammer_vop_specclose,
133 	.vop_getattr =		hammer_vop_getattr,
134 	.vop_inactive =		hammer_vop_inactive,
135 	.vop_reclaim =		hammer_vop_reclaim,
136 	.vop_setattr =		hammer_vop_setattr
137 };
138 
139 struct vop_ops hammer_fifo_vops = {
140 	.vop_default =		fifo_vnoperate,
141 	.vop_fsync =		hammer_vop_fsync,
142 	.vop_read =		hammer_vop_fiforead,
143 	.vop_write =		hammer_vop_fifowrite,
144 	.vop_access =		hammer_vop_access,
145 	.vop_close =		hammer_vop_fifoclose,
146 	.vop_getattr =		hammer_vop_getattr,
147 	.vop_inactive =		hammer_vop_inactive,
148 	.vop_reclaim =		hammer_vop_reclaim,
149 	.vop_setattr =		hammer_vop_setattr
150 };
151 
152 static int hammer_dounlink(struct nchandle *nch, struct vnode *dvp,
153 			   struct ucred *cred, int flags);
154 static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
155 static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
156 
157 #if 0
158 static
159 int
160 hammer_vop_vnoperate(struct vop_generic_args *)
161 {
162 	return (VOCALL(&hammer_vnode_vops, ap));
163 }
164 #endif
165 
166 /*
167  * hammer_vop_fsync { vp, waitfor }
168  */
169 static
170 int
171 hammer_vop_fsync(struct vop_fsync_args *ap)
172 {
173 	hammer_inode_t ip;
174 
175 	ip = VTOI(ap->a_vp);
176 	if ((ip->flags & HAMMER_INODE_FLUSHQ) == 0) {
177 		++ip->lock.refs;
178 		ip->flags |= HAMMER_INODE_FLUSHQ;
179 		TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
180 		hammer_flusher_async(ip->hmp);
181 	}
182 	if (ap->a_waitfor == MNT_WAIT) {
183 		while (ip->flags & HAMMER_INODE_FLUSHQ) {
184 			ip->flags |= HAMMER_INODE_FLUSHW;
185 			tsleep(ip, 0, "hmrifl", 0);
186 		}
187 	}
188 	return (ip->error);
189 }
190 
191 /*
192  * hammer_vop_read { vp, uio, ioflag, cred }
193  */
194 static
195 int
196 hammer_vop_read(struct vop_read_args *ap)
197 {
198 	struct hammer_transaction trans;
199 	hammer_inode_t ip;
200 	off_t offset;
201 	struct buf *bp;
202 	struct uio *uio;
203 	int error;
204 	int n;
205 	int seqcount;
206 
207 	if (ap->a_vp->v_type != VREG)
208 		return (EINVAL);
209 	ip = VTOI(ap->a_vp);
210 	error = 0;
211 	seqcount = ap->a_ioflag >> 16;
212 
213 	hammer_start_transaction(&trans, ip->hmp);
214 
215 	/*
216 	 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
217 	 */
218 	uio = ap->a_uio;
219 	while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_rec.ino_size) {
220 		offset = uio->uio_offset & HAMMER_BUFMASK;
221 #if 0
222 		error = cluster_read(ap->a_vp, ip->ino_rec.ino_size,
223 				     uio->uio_offset - offset, HAMMER_BUFSIZE,
224 				     MAXBSIZE, seqcount, &bp);
225 #endif
226 		error = bread(ap->a_vp, uio->uio_offset - offset,
227 			      HAMMER_BUFSIZE, &bp);
228 		if (error) {
229 			brelse(bp);
230 			break;
231 		}
232 		/* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
233 		n = HAMMER_BUFSIZE - offset;
234 		if (n > uio->uio_resid)
235 			n = uio->uio_resid;
236 		if (n > ip->ino_rec.ino_size - uio->uio_offset)
237 			n = (int)(ip->ino_rec.ino_size - uio->uio_offset);
238 		error = uiomove((char *)bp->b_data + offset, n, uio);
239 		if (error) {
240 			bqrelse(bp);
241 			break;
242 		}
243 		if ((ip->flags & HAMMER_INODE_RO) == 0 &&
244 		    (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
245 			ip->ino_rec.ino_atime = trans.tid;
246 			hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
247 		}
248 		bqrelse(bp);
249 	}
250 	hammer_commit_transaction(&trans);
251 	return (error);
252 }
253 
254 /*
255  * hammer_vop_write { vp, uio, ioflag, cred }
256  */
257 static
258 int
259 hammer_vop_write(struct vop_write_args *ap)
260 {
261 	struct hammer_transaction trans;
262 	struct hammer_inode *ip;
263 	struct uio *uio;
264 	off_t offset;
265 	struct buf *bp;
266 	int error;
267 	int n;
268 	int flags;
269 	int count;
270 
271 	if (ap->a_vp->v_type != VREG)
272 		return (EINVAL);
273 	ip = VTOI(ap->a_vp);
274 	error = 0;
275 
276 	if (ip->flags & HAMMER_INODE_RO)
277 		return (EROFS);
278 
279 	/*
280 	 * Create a transaction to cover the operations we perform.
281 	 */
282 	hammer_start_transaction(&trans, ip->hmp);
283 	uio = ap->a_uio;
284 
285 	/*
286 	 * Check append mode
287 	 */
288 	if (ap->a_ioflag & IO_APPEND)
289 		uio->uio_offset = ip->ino_rec.ino_size;
290 
291 	/*
292 	 * Check for illegal write offsets.  Valid range is 0...2^63-1
293 	 */
294 	if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0) {
295 		hammer_commit_transaction(&trans);
296 		return (EFBIG);
297 	}
298 
299 	/*
300 	 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
301 	 */
302 	count = 0;
303 	while (uio->uio_resid > 0) {
304 		int fixsize = 0;
305 
306 		/*
307 		 * Do not allow huge writes to deadlock the buffer cache
308 		 */
309 		if ((++count & 15) == 0) {
310 			vn_unlock(ap->a_vp);
311 			if ((ap->a_ioflag & IO_NOBWILL) == 0)
312 				bwillwrite();
313 			vn_lock(ap->a_vp, LK_EXCLUSIVE|LK_RETRY);
314 		}
315 
316 		offset = uio->uio_offset & HAMMER_BUFMASK;
317 		n = HAMMER_BUFSIZE - offset;
318 		if (n > uio->uio_resid)
319 			n = uio->uio_resid;
320 		if (uio->uio_offset + n > ip->ino_rec.ino_size) {
321 			vnode_pager_setsize(ap->a_vp, uio->uio_offset + n);
322 			fixsize = 1;
323 		}
324 
325 		if (uio->uio_segflg == UIO_NOCOPY) {
326 			/*
327 			 * Issuing a write with the same data backing the
328 			 * buffer.  Instantiate the buffer to collect the
329 			 * backing vm pages, then read-in any missing bits.
330 			 *
331 			 * This case is used by vop_stdputpages().
332 			 */
333 			bp = getblk(ap->a_vp, uio->uio_offset - offset,
334 				    HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
335 			if ((bp->b_flags & B_CACHE) == 0) {
336 				bqrelse(bp);
337 				error = bread(ap->a_vp,
338 					      uio->uio_offset - offset,
339 					      HAMMER_BUFSIZE, &bp);
340 			}
341 		} else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
342 			/*
343 			 * entirely overwrite the buffer
344 			 */
345 			bp = getblk(ap->a_vp, uio->uio_offset - offset,
346 				    HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
347 		} else if (offset == 0 && uio->uio_offset >= ip->ino_rec.ino_size) {
348 			/*
349 			 * XXX
350 			 */
351 			bp = getblk(ap->a_vp, uio->uio_offset - offset,
352 				    HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
353 			vfs_bio_clrbuf(bp);
354 		} else {
355 			/*
356 			 * Partial overwrite, read in any missing bits then
357 			 * replace the portion being written.
358 			 */
359 			error = bread(ap->a_vp, uio->uio_offset - offset,
360 				      HAMMER_BUFSIZE, &bp);
361 			if (error == 0)
362 				bheavy(bp);
363 		}
364 		if (error == 0)
365 			error = uiomove((char *)bp->b_data + offset, n, uio);
366 
367 		/*
368 		 * If we screwed up we have to undo any VM size changes we
369 		 * made.
370 		 */
371 		if (error) {
372 			brelse(bp);
373 			if (fixsize) {
374 				vtruncbuf(ap->a_vp, ip->ino_rec.ino_size,
375 					  HAMMER_BUFSIZE);
376 			}
377 			break;
378 		}
379 		/* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
380 		if (ip->ino_rec.ino_size < uio->uio_offset) {
381 			ip->ino_rec.ino_size = uio->uio_offset;
382 			flags = HAMMER_INODE_RDIRTY;
383 			vnode_pager_setsize(ap->a_vp, ip->ino_rec.ino_size);
384 		} else {
385 			flags = 0;
386 		}
387 		ip->ino_rec.ino_mtime = trans.tid;
388 		flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS;
389 		hammer_modify_inode(&trans, ip, flags);
390 
391 		if (ap->a_ioflag & IO_SYNC) {
392 			bwrite(bp);
393 		} else if (ap->a_ioflag & IO_DIRECT) {
394 			bawrite(bp);
395 #if 0
396 		} else if ((ap->a_ioflag >> 16) == IO_SEQMAX &&
397 			   (uio->uio_offset & HAMMER_BUFMASK) == 0) {
398 			/*
399 			 * XXX HAMMER can only fsync the whole inode,
400 			 * doing it on every buffer would be a bad idea.
401 			 */
402 			/*
403 			 * If seqcount indicates sequential operation and
404 			 * we just finished filling a buffer, push it out
405 			 * now to prevent the buffer cache from becoming
406 			 * too full, which would trigger non-optimal
407 			 * flushes.
408 			 */
409 			bdwrite(bp);
410 #endif
411 		} else {
412 			bdwrite(bp);
413 		}
414 	}
415 	if (error)
416 		hammer_abort_transaction(&trans);
417 	else
418 		hammer_commit_transaction(&trans);
419 	return (error);
420 }
421 
422 /*
423  * hammer_vop_access { vp, mode, cred }
424  */
425 static
426 int
427 hammer_vop_access(struct vop_access_args *ap)
428 {
429 	struct hammer_inode *ip = VTOI(ap->a_vp);
430 	uid_t uid;
431 	gid_t gid;
432 	int error;
433 
434 	uid = hammer_to_unix_xid(&ip->ino_data.uid);
435 	gid = hammer_to_unix_xid(&ip->ino_data.gid);
436 
437 	error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
438 				  ip->ino_data.uflags);
439 	return (error);
440 }
441 
442 /*
443  * hammer_vop_advlock { vp, id, op, fl, flags }
444  */
445 static
446 int
447 hammer_vop_advlock(struct vop_advlock_args *ap)
448 {
449 	struct hammer_inode *ip = VTOI(ap->a_vp);
450 
451 	return (lf_advlock(ap, &ip->advlock, ip->ino_rec.ino_size));
452 }
453 
454 /*
455  * hammer_vop_close { vp, fflag }
456  */
457 static
458 int
459 hammer_vop_close(struct vop_close_args *ap)
460 {
461 	return (vop_stdclose(ap));
462 }
463 
464 /*
465  * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
466  *
467  * The operating system has already ensured that the directory entry
468  * does not exist and done all appropriate namespace locking.
469  */
470 static
471 int
472 hammer_vop_ncreate(struct vop_ncreate_args *ap)
473 {
474 	struct hammer_transaction trans;
475 	struct hammer_inode *dip;
476 	struct hammer_inode *nip;
477 	struct nchandle *nch;
478 	int error;
479 
480 	nch = ap->a_nch;
481 	dip = VTOI(ap->a_dvp);
482 
483 	if (dip->flags & HAMMER_INODE_RO)
484 		return (EROFS);
485 
486 	/*
487 	 * Create a transaction to cover the operations we perform.
488 	 */
489 	hammer_start_transaction(&trans, dip->hmp);
490 
491 	/*
492 	 * Create a new filesystem object of the requested type.  The
493 	 * returned inode will be referenced but not locked.
494 	 */
495 
496 	error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
497 	if (error)
498 		kprintf("hammer_create_inode error %d\n", error);
499 	if (error) {
500 		hammer_abort_transaction(&trans);
501 		*ap->a_vpp = NULL;
502 		return (error);
503 	}
504 
505 	/*
506 	 * Add the new filesystem object to the directory.  This will also
507 	 * bump the inode's link count.
508 	 */
509 	error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
510 	if (error)
511 		kprintf("hammer_ip_add_directory error %d\n", error);
512 
513 	/*
514 	 * Finish up.
515 	 */
516 	if (error) {
517 		hammer_rel_inode(nip, 0);
518 		hammer_abort_transaction(&trans);
519 		*ap->a_vpp = NULL;
520 	} else {
521 		hammer_commit_transaction(&trans);
522 		error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
523 		hammer_rel_inode(nip, 0);
524 		if (error == 0) {
525 			cache_setunresolved(ap->a_nch);
526 			cache_setvp(ap->a_nch, *ap->a_vpp);
527 		}
528 	}
529 	return (error);
530 }
531 
532 /*
533  * hammer_vop_getattr { vp, vap }
534  */
535 static
536 int
537 hammer_vop_getattr(struct vop_getattr_args *ap)
538 {
539 	struct hammer_inode *ip = VTOI(ap->a_vp);
540 	struct vattr *vap = ap->a_vap;
541 
542 #if 0
543 	if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
544 	    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
545 	    ip->obj_asof == XXX
546 	) {
547 		/* LAZYMOD XXX */
548 	}
549 	hammer_itimes(ap->a_vp);
550 #endif
551 
552 	vap->va_fsid = ip->hmp->fsid_udev;
553 	vap->va_fileid = ip->ino_rec.base.base.obj_id;
554 	vap->va_mode = ip->ino_data.mode;
555 	vap->va_nlink = ip->ino_rec.ino_nlinks;
556 	vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
557 	vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
558 	vap->va_rmajor = 0;
559 	vap->va_rminor = 0;
560 	vap->va_size = ip->ino_rec.ino_size;
561 	hammer_to_timespec(ip->ino_rec.ino_atime, &vap->va_atime);
562 	hammer_to_timespec(ip->ino_rec.ino_mtime, &vap->va_mtime);
563 	hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
564 	vap->va_flags = ip->ino_data.uflags;
565 	vap->va_gen = 1;	/* hammer inums are unique for all time */
566 	vap->va_blocksize = HAMMER_BUFSIZE;
567 	vap->va_bytes = (ip->ino_rec.ino_size + 63) & ~63;
568 	vap->va_type = hammer_get_vnode_type(ip->ino_rec.base.base.obj_type);
569 	vap->va_filerev = 0; 	/* XXX */
570 	/* mtime uniquely identifies any adjustments made to the file */
571 	vap->va_fsmid = ip->ino_rec.ino_mtime;
572 	vap->va_uid_uuid = ip->ino_data.uid;
573 	vap->va_gid_uuid = ip->ino_data.gid;
574 	vap->va_fsid_uuid = ip->hmp->fsid;
575 	vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
576 			  VA_FSID_UUID_VALID;
577 
578 	switch (ip->ino_rec.base.base.obj_type) {
579 	case HAMMER_OBJTYPE_CDEV:
580 	case HAMMER_OBJTYPE_BDEV:
581 		vap->va_rmajor = ip->ino_data.rmajor;
582 		vap->va_rminor = ip->ino_data.rminor;
583 		break;
584 	default:
585 		break;
586 	}
587 
588 	return(0);
589 }
590 
591 /*
592  * hammer_vop_nresolve { nch, dvp, cred }
593  *
594  * Locate the requested directory entry.
595  */
596 static
597 int
598 hammer_vop_nresolve(struct vop_nresolve_args *ap)
599 {
600 	struct hammer_transaction trans;
601 	struct namecache *ncp;
602 	hammer_inode_t dip;
603 	hammer_inode_t ip;
604 	hammer_tid_t asof;
605 	struct hammer_cursor cursor;
606 	union hammer_record_ondisk *rec;
607 	struct vnode *vp;
608 	int64_t namekey;
609 	int error;
610 	int i;
611 	int nlen;
612 	int flags;
613 	u_int64_t obj_id;
614 
615 	/*
616 	 * Misc initialization, plus handle as-of name extensions.  Look for
617 	 * the '@@' extension.  Note that as-of files and directories cannot
618 	 * be modified.
619 	 */
620 	dip = VTOI(ap->a_dvp);
621 	ncp = ap->a_nch->ncp;
622 	asof = dip->obj_asof;
623 	nlen = ncp->nc_nlen;
624 	flags = dip->flags;
625 
626 	hammer_simple_transaction(&trans, dip->hmp);
627 
628 	for (i = 0; i < nlen; ++i) {
629 		if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
630 			asof = hammer_str_to_tid(ncp->nc_name + i + 2);
631 			flags |= HAMMER_INODE_RO;
632 			break;
633 		}
634 	}
635 	nlen = i;
636 
637 	/*
638 	 * If there is no path component the time extension is relative to
639 	 * dip.
640 	 */
641 	if (nlen == 0) {
642 		ip = hammer_get_inode(&trans, &dip->cache[1], dip->obj_id,
643 				      asof, flags, &error);
644 		if (error == 0) {
645 			error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
646 			hammer_rel_inode(ip, 0);
647 		} else {
648 			vp = NULL;
649 		}
650 		if (error == 0) {
651 			vn_unlock(vp);
652 			cache_setvp(ap->a_nch, vp);
653 			vrele(vp);
654 		}
655 		goto done;
656 	}
657 
658 	/*
659 	 * Calculate the namekey and setup the key range for the scan.  This
660 	 * works kinda like a chained hash table where the lower 32 bits
661 	 * of the namekey synthesize the chain.
662 	 *
663 	 * The key range is inclusive of both key_beg and key_end.
664 	 */
665 	namekey = hammer_directory_namekey(ncp->nc_name, nlen);
666 
667 	error = hammer_init_cursor(&trans, &cursor, &dip->cache[0]);
668         cursor.key_beg.obj_id = dip->obj_id;
669 	cursor.key_beg.key = namekey;
670         cursor.key_beg.create_tid = 0;
671         cursor.key_beg.delete_tid = 0;
672         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
673         cursor.key_beg.obj_type = 0;
674 
675 	cursor.key_end = cursor.key_beg;
676 	cursor.key_end.key |= 0xFFFFFFFFULL;
677 	cursor.asof = asof;
678 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
679 
680 	/*
681 	 * Scan all matching records (the chain), locate the one matching
682 	 * the requested path component.
683 	 *
684 	 * The hammer_ip_*() functions merge in-memory records with on-disk
685 	 * records for the purposes of the search.
686 	 */
687 	if (error == 0)
688 		error = hammer_ip_first(&cursor, dip);
689 
690 	rec = NULL;
691 	obj_id = 0;
692 
693 	while (error == 0) {
694 		error = hammer_ip_resolve_data(&cursor);
695 		if (error)
696 			break;
697 		rec = cursor.record;
698 		if (nlen == rec->entry.base.data_len &&
699 		    bcmp(ncp->nc_name, cursor.data, nlen) == 0) {
700 			obj_id = rec->entry.obj_id;
701 			break;
702 		}
703 		error = hammer_ip_next(&cursor);
704 	}
705 	hammer_done_cursor(&cursor);
706 	if (error == 0) {
707 		ip = hammer_get_inode(&trans, &dip->cache[1],
708 				      obj_id, asof, flags, &error);
709 		if (error == 0) {
710 			error = hammer_get_vnode(ip, LK_EXCLUSIVE, &vp);
711 			hammer_rel_inode(ip, 0);
712 		} else {
713 			vp = NULL;
714 		}
715 		if (error == 0) {
716 			vn_unlock(vp);
717 			cache_setvp(ap->a_nch, vp);
718 			vrele(vp);
719 		}
720 	} else if (error == ENOENT) {
721 		cache_setvp(ap->a_nch, NULL);
722 	}
723 done:
724 	hammer_commit_transaction(&trans);
725 	return (error);
726 }
727 
728 /*
729  * hammer_vop_nlookupdotdot { dvp, vpp, cred }
730  *
731  * Locate the parent directory of a directory vnode.
732  *
733  * dvp is referenced but not locked.  *vpp must be returned referenced and
734  * locked.  A parent_obj_id of 0 does not necessarily indicate that we are
735  * at the root, instead it could indicate that the directory we were in was
736  * removed.
737  *
738  * NOTE: as-of sequences are not linked into the directory structure.  If
739  * we are at the root with a different asof then the mount point, reload
740  * the same directory with the mount point's asof.   I'm not sure what this
741  * will do to NFS.  We encode ASOF stamps in NFS file handles so it might not
742  * get confused, but it hasn't been tested.
743  */
744 static
745 int
746 hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
747 {
748 	struct hammer_transaction trans;
749 	struct hammer_inode *dip;
750 	struct hammer_inode *ip;
751 	int64_t parent_obj_id;
752 	hammer_tid_t asof;
753 	int error;
754 
755 	dip = VTOI(ap->a_dvp);
756 	asof = dip->obj_asof;
757 	parent_obj_id = dip->ino_data.parent_obj_id;
758 
759 	if (parent_obj_id == 0) {
760 		if (dip->obj_id == HAMMER_OBJID_ROOT &&
761 		   asof != dip->hmp->asof) {
762 			parent_obj_id = dip->obj_id;
763 			asof = dip->hmp->asof;
764 			*ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK);
765 			ksnprintf(*ap->a_fakename, 19, "0x%016llx",
766 				   dip->obj_asof);
767 		} else {
768 			*ap->a_vpp = NULL;
769 			return ENOENT;
770 		}
771 	}
772 
773 	hammer_simple_transaction(&trans, dip->hmp);
774 
775 	ip = hammer_get_inode(&trans, &dip->cache[1], parent_obj_id,
776 			      asof, dip->flags, &error);
777 	if (ip) {
778 		error = hammer_get_vnode(ip, LK_EXCLUSIVE, ap->a_vpp);
779 		hammer_rel_inode(ip, 0);
780 	} else {
781 		*ap->a_vpp = NULL;
782 	}
783 	hammer_commit_transaction(&trans);
784 	return (error);
785 }
786 
787 /*
788  * hammer_vop_nlink { nch, dvp, vp, cred }
789  */
790 static
791 int
792 hammer_vop_nlink(struct vop_nlink_args *ap)
793 {
794 	struct hammer_transaction trans;
795 	struct hammer_inode *dip;
796 	struct hammer_inode *ip;
797 	struct nchandle *nch;
798 	int error;
799 
800 	nch = ap->a_nch;
801 	dip = VTOI(ap->a_dvp);
802 	ip = VTOI(ap->a_vp);
803 
804 	if (dip->flags & HAMMER_INODE_RO)
805 		return (EROFS);
806 	if (ip->flags & HAMMER_INODE_RO)
807 		return (EROFS);
808 
809 	/*
810 	 * Create a transaction to cover the operations we perform.
811 	 */
812 	hammer_start_transaction(&trans, dip->hmp);
813 
814 	/*
815 	 * Add the filesystem object to the directory.  Note that neither
816 	 * dip nor ip are referenced or locked, but their vnodes are
817 	 * referenced.  This function will bump the inode's link count.
818 	 */
819 	error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip);
820 
821 	/*
822 	 * Finish up.
823 	 */
824 	if (error) {
825 		hammer_abort_transaction(&trans);
826 	} else {
827 		cache_setunresolved(nch);
828 		cache_setvp(nch, ap->a_vp);
829 		hammer_commit_transaction(&trans);
830 	}
831 	return (error);
832 }
833 
834 /*
835  * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
836  *
837  * The operating system has already ensured that the directory entry
838  * does not exist and done all appropriate namespace locking.
839  */
840 static
841 int
842 hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
843 {
844 	struct hammer_transaction trans;
845 	struct hammer_inode *dip;
846 	struct hammer_inode *nip;
847 	struct nchandle *nch;
848 	int error;
849 
850 	nch = ap->a_nch;
851 	dip = VTOI(ap->a_dvp);
852 
853 	if (dip->flags & HAMMER_INODE_RO)
854 		return (EROFS);
855 
856 	/*
857 	 * Create a transaction to cover the operations we perform.
858 	 */
859 	hammer_start_transaction(&trans, dip->hmp);
860 
861 	/*
862 	 * Create a new filesystem object of the requested type.  The
863 	 * returned inode will be referenced but not locked.
864 	 */
865 	error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
866 	if (error)
867 		kprintf("hammer_mkdir error %d\n", error);
868 	if (error) {
869 		hammer_abort_transaction(&trans);
870 		*ap->a_vpp = NULL;
871 		return (error);
872 	}
873 
874 	/*
875 	 * Add the new filesystem object to the directory.  This will also
876 	 * bump the inode's link count.
877 	 */
878 	error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
879 	if (error)
880 		kprintf("hammer_mkdir (add) error %d\n", error);
881 
882 	/*
883 	 * Finish up.
884 	 */
885 	if (error) {
886 		hammer_rel_inode(nip, 0);
887 		hammer_abort_transaction(&trans);
888 		*ap->a_vpp = NULL;
889 	} else {
890 		hammer_commit_transaction(&trans);
891 		error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
892 		hammer_rel_inode(nip, 0);
893 		if (error == 0) {
894 			cache_setunresolved(ap->a_nch);
895 			cache_setvp(ap->a_nch, *ap->a_vpp);
896 		}
897 	}
898 	return (error);
899 }
900 
901 /*
902  * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
903  *
904  * The operating system has already ensured that the directory entry
905  * does not exist and done all appropriate namespace locking.
906  */
907 static
908 int
909 hammer_vop_nmknod(struct vop_nmknod_args *ap)
910 {
911 	struct hammer_transaction trans;
912 	struct hammer_inode *dip;
913 	struct hammer_inode *nip;
914 	struct nchandle *nch;
915 	int error;
916 
917 	nch = ap->a_nch;
918 	dip = VTOI(ap->a_dvp);
919 
920 	if (dip->flags & HAMMER_INODE_RO)
921 		return (EROFS);
922 
923 	/*
924 	 * Create a transaction to cover the operations we perform.
925 	 */
926 	hammer_start_transaction(&trans, dip->hmp);
927 
928 	/*
929 	 * Create a new filesystem object of the requested type.  The
930 	 * returned inode will be referenced but not locked.
931 	 */
932 	error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
933 	if (error) {
934 		hammer_abort_transaction(&trans);
935 		*ap->a_vpp = NULL;
936 		return (error);
937 	}
938 
939 	/*
940 	 * Add the new filesystem object to the directory.  This will also
941 	 * bump the inode's link count.
942 	 */
943 	error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
944 
945 	/*
946 	 * Finish up.
947 	 */
948 	if (error) {
949 		hammer_rel_inode(nip, 0);
950 		hammer_abort_transaction(&trans);
951 		*ap->a_vpp = NULL;
952 	} else {
953 		hammer_commit_transaction(&trans);
954 		error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
955 		hammer_rel_inode(nip, 0);
956 		if (error == 0) {
957 			cache_setunresolved(ap->a_nch);
958 			cache_setvp(ap->a_nch, *ap->a_vpp);
959 		}
960 	}
961 	return (error);
962 }
963 
964 /*
965  * hammer_vop_open { vp, mode, cred, fp }
966  */
967 static
968 int
969 hammer_vop_open(struct vop_open_args *ap)
970 {
971 	if ((ap->a_mode & FWRITE) && (VTOI(ap->a_vp)->flags & HAMMER_INODE_RO))
972 		return (EROFS);
973 
974 	return(vop_stdopen(ap));
975 }
976 
977 /*
978  * hammer_vop_pathconf { vp, name, retval }
979  */
980 static
981 int
982 hammer_vop_pathconf(struct vop_pathconf_args *ap)
983 {
984 	return EOPNOTSUPP;
985 }
986 
987 /*
988  * hammer_vop_print { vp }
989  */
990 static
991 int
992 hammer_vop_print(struct vop_print_args *ap)
993 {
994 	return EOPNOTSUPP;
995 }
996 
997 /*
998  * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
999  */
1000 static
1001 int
1002 hammer_vop_readdir(struct vop_readdir_args *ap)
1003 {
1004 	struct hammer_transaction trans;
1005 	struct hammer_cursor cursor;
1006 	struct hammer_inode *ip;
1007 	struct uio *uio;
1008 	hammer_record_ondisk_t rec;
1009 	hammer_base_elm_t base;
1010 	int error;
1011 	int cookie_index;
1012 	int ncookies;
1013 	off_t *cookies;
1014 	off_t saveoff;
1015 	int r;
1016 
1017 	ip = VTOI(ap->a_vp);
1018 	uio = ap->a_uio;
1019 	saveoff = uio->uio_offset;
1020 
1021 	if (ap->a_ncookies) {
1022 		ncookies = uio->uio_resid / 16 + 1;
1023 		if (ncookies > 1024)
1024 			ncookies = 1024;
1025 		cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
1026 		cookie_index = 0;
1027 	} else {
1028 		ncookies = -1;
1029 		cookies = NULL;
1030 		cookie_index = 0;
1031 	}
1032 
1033 	hammer_simple_transaction(&trans, ip->hmp);
1034 
1035 	/*
1036 	 * Handle artificial entries
1037 	 */
1038 	error = 0;
1039 	if (saveoff == 0) {
1040 		r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
1041 		if (r)
1042 			goto done;
1043 		if (cookies)
1044 			cookies[cookie_index] = saveoff;
1045 		++saveoff;
1046 		++cookie_index;
1047 		if (cookie_index == ncookies)
1048 			goto done;
1049 	}
1050 	if (saveoff == 1) {
1051 		if (ip->ino_data.parent_obj_id) {
1052 			r = vop_write_dirent(&error, uio,
1053 					     ip->ino_data.parent_obj_id,
1054 					     DT_DIR, 2, "..");
1055 		} else {
1056 			r = vop_write_dirent(&error, uio,
1057 					     ip->obj_id, DT_DIR, 2, "..");
1058 		}
1059 		if (r)
1060 			goto done;
1061 		if (cookies)
1062 			cookies[cookie_index] = saveoff;
1063 		++saveoff;
1064 		++cookie_index;
1065 		if (cookie_index == ncookies)
1066 			goto done;
1067 	}
1068 
1069 	/*
1070 	 * Key range (begin and end inclusive) to scan.  Directory keys
1071 	 * directly translate to a 64 bit 'seek' position.
1072 	 */
1073 	hammer_init_cursor(&trans, &cursor, &ip->cache[0]);
1074 	cursor.key_beg.obj_id = ip->obj_id;
1075 	cursor.key_beg.create_tid = 0;
1076 	cursor.key_beg.delete_tid = 0;
1077         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1078 	cursor.key_beg.obj_type = 0;
1079 	cursor.key_beg.key = saveoff;
1080 
1081 	cursor.key_end = cursor.key_beg;
1082 	cursor.key_end.key = HAMMER_MAX_KEY;
1083 	cursor.asof = ip->obj_asof;
1084 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1085 
1086 	error = hammer_ip_first(&cursor, ip);
1087 
1088 	while (error == 0) {
1089 		error = hammer_ip_resolve_record_and_data(&cursor);
1090 		if (error)
1091 			break;
1092 		rec = cursor.record;
1093 		base = &rec->base.base;
1094 		saveoff = base->key;
1095 
1096 		if (base->obj_id != ip->obj_id)
1097 			panic("readdir: bad record at %p", cursor.node);
1098 
1099 		r = vop_write_dirent(
1100 			     &error, uio, rec->entry.obj_id,
1101 			     hammer_get_dtype(rec->entry.base.base.obj_type),
1102 			     rec->entry.base.data_len,
1103 			     (void *)cursor.data);
1104 		if (r)
1105 			break;
1106 		++saveoff;
1107 		if (cookies)
1108 			cookies[cookie_index] = base->key;
1109 		++cookie_index;
1110 		if (cookie_index == ncookies)
1111 			break;
1112 		error = hammer_ip_next(&cursor);
1113 	}
1114 	hammer_done_cursor(&cursor);
1115 
1116 done:
1117 	hammer_commit_transaction(&trans);
1118 
1119 	if (ap->a_eofflag)
1120 		*ap->a_eofflag = (error == ENOENT);
1121 	uio->uio_offset = saveoff;
1122 	if (error && cookie_index == 0) {
1123 		if (error == ENOENT)
1124 			error = 0;
1125 		if (cookies) {
1126 			kfree(cookies, M_TEMP);
1127 			*ap->a_ncookies = 0;
1128 			*ap->a_cookies = NULL;
1129 		}
1130 	} else {
1131 		if (error == ENOENT)
1132 			error = 0;
1133 		if (cookies) {
1134 			*ap->a_ncookies = cookie_index;
1135 			*ap->a_cookies = cookies;
1136 		}
1137 	}
1138 	return(error);
1139 }
1140 
1141 /*
1142  * hammer_vop_readlink { vp, uio, cred }
1143  */
1144 static
1145 int
1146 hammer_vop_readlink(struct vop_readlink_args *ap)
1147 {
1148 	struct hammer_transaction trans;
1149 	struct hammer_cursor cursor;
1150 	struct hammer_inode *ip;
1151 	int error;
1152 
1153 	ip = VTOI(ap->a_vp);
1154 
1155 	hammer_simple_transaction(&trans, ip->hmp);
1156 
1157 	hammer_init_cursor(&trans, &cursor, &ip->cache[0]);
1158 
1159 	/*
1160 	 * Key range (begin and end inclusive) to scan.  Directory keys
1161 	 * directly translate to a 64 bit 'seek' position.
1162 	 */
1163 	cursor.key_beg.obj_id = ip->obj_id;
1164 	cursor.key_beg.create_tid = 0;
1165 	cursor.key_beg.delete_tid = 0;
1166         cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
1167 	cursor.key_beg.obj_type = 0;
1168 	cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
1169 	cursor.asof = ip->obj_asof;
1170 	cursor.flags |= HAMMER_CURSOR_ASOF;
1171 
1172 	error = hammer_ip_lookup(&cursor, ip);
1173 	if (error == 0) {
1174 		error = hammer_ip_resolve_data(&cursor);
1175 		if (error == 0) {
1176 			error = uiomove((char *)cursor.data,
1177 					cursor.record->base.data_len,
1178 					ap->a_uio);
1179 		}
1180 	}
1181 	hammer_done_cursor(&cursor);
1182 	hammer_commit_transaction(&trans);
1183 	return(error);
1184 }
1185 
1186 /*
1187  * hammer_vop_nremove { nch, dvp, cred }
1188  */
1189 static
1190 int
1191 hammer_vop_nremove(struct vop_nremove_args *ap)
1192 {
1193 	return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
1194 }
1195 
1196 /*
1197  * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1198  */
1199 static
1200 int
1201 hammer_vop_nrename(struct vop_nrename_args *ap)
1202 {
1203 	struct hammer_transaction trans;
1204 	struct namecache *fncp;
1205 	struct namecache *tncp;
1206 	struct hammer_inode *fdip;
1207 	struct hammer_inode *tdip;
1208 	struct hammer_inode *ip;
1209 	struct hammer_cursor cursor;
1210 	union hammer_record_ondisk *rec;
1211 	int64_t namekey;
1212 	int error;
1213 
1214 	fdip = VTOI(ap->a_fdvp);
1215 	tdip = VTOI(ap->a_tdvp);
1216 	fncp = ap->a_fnch->ncp;
1217 	tncp = ap->a_tnch->ncp;
1218 	ip = VTOI(fncp->nc_vp);
1219 	KKASSERT(ip != NULL);
1220 
1221 	if (fdip->flags & HAMMER_INODE_RO)
1222 		return (EROFS);
1223 	if (tdip->flags & HAMMER_INODE_RO)
1224 		return (EROFS);
1225 	if (ip->flags & HAMMER_INODE_RO)
1226 		return (EROFS);
1227 
1228 	hammer_start_transaction(&trans, fdip->hmp);
1229 
1230 	/*
1231 	 * Remove tncp from the target directory and then link ip as
1232 	 * tncp. XXX pass trans to dounlink
1233 	 *
1234 	 * Force the inode sync-time to match the transaction so it is
1235 	 * in-sync with the creation of the target directory entry.
1236 	 */
1237 	error = hammer_dounlink(ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
1238 	if (error == 0 || error == ENOENT) {
1239 		error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
1240 		if (error == 0) {
1241 			ip->ino_data.parent_obj_id = tdip->obj_id;
1242 			hammer_modify_inode(&trans, ip,
1243 				HAMMER_INODE_DDIRTY | HAMMER_INODE_TIDLOCKED);
1244 		}
1245 	}
1246 	if (error)
1247 		goto failed; /* XXX */
1248 
1249 	/*
1250 	 * Locate the record in the originating directory and remove it.
1251 	 *
1252 	 * Calculate the namekey and setup the key range for the scan.  This
1253 	 * works kinda like a chained hash table where the lower 32 bits
1254 	 * of the namekey synthesize the chain.
1255 	 *
1256 	 * The key range is inclusive of both key_beg and key_end.
1257 	 */
1258 	namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
1259 retry:
1260 	hammer_init_cursor(&trans, &cursor, &fdip->cache[0]);
1261         cursor.key_beg.obj_id = fdip->obj_id;
1262 	cursor.key_beg.key = namekey;
1263         cursor.key_beg.create_tid = 0;
1264         cursor.key_beg.delete_tid = 0;
1265         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1266         cursor.key_beg.obj_type = 0;
1267 
1268 	cursor.key_end = cursor.key_beg;
1269 	cursor.key_end.key |= 0xFFFFFFFFULL;
1270 	cursor.asof = fdip->obj_asof;
1271 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1272 
1273 	/*
1274 	 * Scan all matching records (the chain), locate the one matching
1275 	 * the requested path component.
1276 	 *
1277 	 * The hammer_ip_*() functions merge in-memory records with on-disk
1278 	 * records for the purposes of the search.
1279 	 */
1280 	error = hammer_ip_first(&cursor, fdip);
1281 	while (error == 0) {
1282 		if (hammer_ip_resolve_data(&cursor) != 0)
1283 			break;
1284 		rec = cursor.record;
1285 		if (fncp->nc_nlen == rec->entry.base.data_len &&
1286 		    bcmp(fncp->nc_name, cursor.data, fncp->nc_nlen) == 0) {
1287 			break;
1288 		}
1289 		error = hammer_ip_next(&cursor);
1290 	}
1291 
1292 	/*
1293 	 * If all is ok we have to get the inode so we can adjust nlinks.
1294 	 *
1295 	 * WARNING: hammer_ip_del_directory() may have to terminate the
1296 	 * cursor to avoid a recursion.  It's ok to call hammer_done_cursor()
1297 	 * twice.
1298 	 */
1299 	if (error == 0)
1300 		error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
1301         hammer_done_cursor(&cursor);
1302 	if (error == 0)
1303 		cache_rename(ap->a_fnch, ap->a_tnch);
1304 	if (error == EDEADLK)
1305 		goto retry;
1306 failed:
1307 	if (error == 0) {
1308 		hammer_commit_transaction(&trans);
1309 	} else {
1310 		hammer_abort_transaction(&trans);
1311 	}
1312 	return (error);
1313 }
1314 
1315 /*
1316  * hammer_vop_nrmdir { nch, dvp, cred }
1317  */
1318 static
1319 int
1320 hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
1321 {
1322 	return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, 0));
1323 }
1324 
1325 /*
1326  * hammer_vop_setattr { vp, vap, cred }
1327  */
1328 static
1329 int
1330 hammer_vop_setattr(struct vop_setattr_args *ap)
1331 {
1332 	struct hammer_transaction trans;
1333 	struct vattr *vap;
1334 	struct hammer_inode *ip;
1335 	int modflags;
1336 	int error;
1337 	int truncating;
1338 	int64_t aligned_size;
1339 	u_int32_t flags;
1340 	uuid_t uuid;
1341 
1342 	vap = ap->a_vap;
1343 	ip = ap->a_vp->v_data;
1344 	modflags = 0;
1345 
1346 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1347 		return(EROFS);
1348 	if (ip->flags & HAMMER_INODE_RO)
1349 		return (EROFS);
1350 
1351 	hammer_start_transaction(&trans, ip->hmp);
1352 	error = 0;
1353 
1354 	if (vap->va_flags != VNOVAL) {
1355 		flags = ip->ino_data.uflags;
1356 		error = vop_helper_setattr_flags(&flags, vap->va_flags,
1357 					 hammer_to_unix_xid(&ip->ino_data.uid),
1358 					 ap->a_cred);
1359 		if (error == 0) {
1360 			if (ip->ino_data.uflags != flags) {
1361 				ip->ino_data.uflags = flags;
1362 				modflags |= HAMMER_INODE_DDIRTY;
1363 			}
1364 			if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1365 				error = 0;
1366 				goto done;
1367 			}
1368 		}
1369 		goto done;
1370 	}
1371 	if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1372 		error = EPERM;
1373 		goto done;
1374 	}
1375 	if (vap->va_uid != (uid_t)VNOVAL) {
1376 		hammer_guid_to_uuid(&uuid, vap->va_uid);
1377 		if (bcmp(&uuid, &ip->ino_data.uid, sizeof(uuid)) != 0) {
1378 			ip->ino_data.uid = uuid;
1379 			modflags |= HAMMER_INODE_DDIRTY;
1380 		}
1381 	}
1382 	if (vap->va_gid != (uid_t)VNOVAL) {
1383 		hammer_guid_to_uuid(&uuid, vap->va_gid);
1384 		if (bcmp(&uuid, &ip->ino_data.gid, sizeof(uuid)) != 0) {
1385 			ip->ino_data.gid = uuid;
1386 			modflags |= HAMMER_INODE_DDIRTY;
1387 		}
1388 	}
1389 	while (vap->va_size != VNOVAL && ip->ino_rec.ino_size != vap->va_size) {
1390 		switch(ap->a_vp->v_type) {
1391 		case VREG:
1392 			if (vap->va_size == ip->ino_rec.ino_size)
1393 				break;
1394 			if (vap->va_size < ip->ino_rec.ino_size) {
1395 				vtruncbuf(ap->a_vp, vap->va_size,
1396 					  HAMMER_BUFSIZE);
1397 				truncating = 1;
1398 			} else {
1399 				vnode_pager_setsize(ap->a_vp, vap->va_size);
1400 				truncating = 0;
1401 			}
1402 			ip->ino_rec.ino_size = vap->va_size;
1403 			modflags |= HAMMER_INODE_RDIRTY;
1404 			aligned_size = (vap->va_size + HAMMER_BUFMASK) &
1405 					~(int64_t)HAMMER_BUFMASK;
1406 
1407 			if (truncating) {
1408 				error = hammer_ip_delete_range(&trans, ip,
1409 						    aligned_size,
1410 						    0x7FFFFFFFFFFFFFFFLL);
1411 			}
1412 			/*
1413 			 * If truncating we have to clean out a portion of
1414 			 * the last block on-disk.
1415 			 */
1416 			if (truncating && error == 0 &&
1417 			    vap->va_size < aligned_size) {
1418 				struct buf *bp;
1419 				int offset;
1420 
1421 				offset = vap->va_size & HAMMER_BUFMASK;
1422 				error = bread(ap->a_vp,
1423 					      aligned_size - HAMMER_BUFSIZE,
1424 					      HAMMER_BUFSIZE, &bp);
1425 				if (error == 0) {
1426 					bzero(bp->b_data + offset,
1427 					      HAMMER_BUFSIZE - offset);
1428 					bdwrite(bp);
1429 				} else {
1430 					brelse(bp);
1431 				}
1432 			}
1433 			break;
1434 		case VDATABASE:
1435 			error = hammer_ip_delete_range(&trans, ip,
1436 						    vap->va_size,
1437 						    0x7FFFFFFFFFFFFFFFLL);
1438 			ip->ino_rec.ino_size = vap->va_size;
1439 			modflags |= HAMMER_INODE_RDIRTY;
1440 			break;
1441 		default:
1442 			error = EINVAL;
1443 			goto done;
1444 		}
1445 		break;
1446 	}
1447 	if (vap->va_atime.tv_sec != VNOVAL) {
1448 		ip->ino_rec.ino_atime =
1449 			hammer_timespec_to_transid(&vap->va_atime);
1450 		modflags |= HAMMER_INODE_ITIMES;
1451 	}
1452 	if (vap->va_mtime.tv_sec != VNOVAL) {
1453 		ip->ino_rec.ino_mtime =
1454 			hammer_timespec_to_transid(&vap->va_mtime);
1455 		modflags |= HAMMER_INODE_ITIMES;
1456 	}
1457 	if (vap->va_mode != (mode_t)VNOVAL) {
1458 		if (ip->ino_data.mode != vap->va_mode) {
1459 			ip->ino_data.mode = vap->va_mode;
1460 			modflags |= HAMMER_INODE_DDIRTY;
1461 		}
1462 	}
1463 done:
1464 	if (error) {
1465 		hammer_abort_transaction(&trans);
1466 	} else {
1467 		hammer_modify_inode(&trans, ip, modflags);
1468 		hammer_commit_transaction(&trans);
1469 	}
1470 	return (error);
1471 }
1472 
1473 /*
1474  * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1475  */
1476 static
1477 int
1478 hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
1479 {
1480 	struct hammer_transaction trans;
1481 	struct hammer_inode *dip;
1482 	struct hammer_inode *nip;
1483 	struct nchandle *nch;
1484 	hammer_record_t record;
1485 	int error;
1486 	int bytes;
1487 
1488 	ap->a_vap->va_type = VLNK;
1489 
1490 	nch = ap->a_nch;
1491 	dip = VTOI(ap->a_dvp);
1492 
1493 	if (dip->flags & HAMMER_INODE_RO)
1494 		return (EROFS);
1495 
1496 	/*
1497 	 * Create a transaction to cover the operations we perform.
1498 	 */
1499 	hammer_start_transaction(&trans, dip->hmp);
1500 
1501 	/*
1502 	 * Create a new filesystem object of the requested type.  The
1503 	 * returned inode will be referenced but not locked.
1504 	 */
1505 
1506 	error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
1507 	if (error) {
1508 		hammer_abort_transaction(&trans);
1509 		*ap->a_vpp = NULL;
1510 		return (error);
1511 	}
1512 
1513 	/*
1514 	 * Add the new filesystem object to the directory.  This will also
1515 	 * bump the inode's link count.
1516 	 */
1517 	error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
1518 
1519 	/*
1520 	 * Add a record representing the symlink.  symlink stores the link
1521 	 * as pure data, not a string, and is no \0 terminated.
1522 	 */
1523 	if (error == 0) {
1524 		record = hammer_alloc_mem_record(nip);
1525 		bytes = strlen(ap->a_target);
1526 
1527 		record->rec.base.base.key = HAMMER_FIXKEY_SYMLINK;
1528 		record->rec.base.base.rec_type = HAMMER_RECTYPE_FIX;
1529 		record->rec.base.data_len = bytes;
1530 		record->data = (void *)ap->a_target;
1531 		/* will be reallocated by routine below */
1532 		error = hammer_ip_add_record(&trans, record);
1533 
1534 		/*
1535 		 * Set the file size to the length of the link.
1536 		 */
1537 		if (error == 0) {
1538 			nip->ino_rec.ino_size = bytes;
1539 			hammer_modify_inode(&trans, nip, HAMMER_INODE_RDIRTY);
1540 		}
1541 	}
1542 
1543 	/*
1544 	 * Finish up.
1545 	 */
1546 	if (error) {
1547 		hammer_rel_inode(nip, 0);
1548 		hammer_abort_transaction(&trans);
1549 		*ap->a_vpp = NULL;
1550 	} else {
1551 		hammer_commit_transaction(&trans);
1552 		error = hammer_get_vnode(nip, LK_EXCLUSIVE, ap->a_vpp);
1553 		hammer_rel_inode(nip, 0);
1554 		if (error == 0) {
1555 			cache_setunresolved(ap->a_nch);
1556 			cache_setvp(ap->a_nch, *ap->a_vpp);
1557 		}
1558 	}
1559 	return (error);
1560 }
1561 
1562 /*
1563  * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1564  */
1565 static
1566 int
1567 hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
1568 {
1569 	return(hammer_dounlink(ap->a_nch, ap->a_dvp, ap->a_cred, ap->a_flags));
1570 }
1571 
1572 /*
1573  * hammer_vop_ioctl { vp, command, data, fflag, cred }
1574  */
1575 static
1576 int
1577 hammer_vop_ioctl(struct vop_ioctl_args *ap)
1578 {
1579 	struct hammer_inode *ip = ap->a_vp->v_data;
1580 
1581 	return(hammer_ioctl(ip, ap->a_command, ap->a_data,
1582 			    ap->a_fflag, ap->a_cred));
1583 }
1584 
1585 static
1586 int
1587 hammer_vop_mountctl(struct vop_mountctl_args *ap)
1588 {
1589 	struct mount *mp;
1590 	int error;
1591 
1592 	mp = ap->a_head.a_ops->head.vv_mount;
1593 
1594 	switch(ap->a_op) {
1595 	case MOUNTCTL_SET_EXPORT:
1596 		if (ap->a_ctllen != sizeof(struct export_args))
1597 			error = EINVAL;
1598 		error = hammer_vfs_export(mp, ap->a_op,
1599 				      (const struct export_args *)ap->a_ctl);
1600 		break;
1601 	default:
1602 		error = journal_mountctl(ap);
1603 		break;
1604 	}
1605 	return(error);
1606 }
1607 
1608 /*
1609  * hammer_vop_strategy { vp, bio }
1610  *
1611  * Strategy call, used for regular file read & write only.  Note that the
1612  * bp may represent a cluster.
1613  *
1614  * To simplify operation and allow better optimizations in the future,
1615  * this code does not make any assumptions with regards to buffer alignment
1616  * or size.
1617  */
1618 static
1619 int
1620 hammer_vop_strategy(struct vop_strategy_args *ap)
1621 {
1622 	struct buf *bp;
1623 	int error;
1624 
1625 	bp = ap->a_bio->bio_buf;
1626 
1627 	switch(bp->b_cmd) {
1628 	case BUF_CMD_READ:
1629 		error = hammer_vop_strategy_read(ap);
1630 		break;
1631 	case BUF_CMD_WRITE:
1632 		error = hammer_vop_strategy_write(ap);
1633 		break;
1634 	default:
1635 		bp->b_error = error = EINVAL;
1636 		bp->b_flags |= B_ERROR;
1637 		biodone(ap->a_bio);
1638 		break;
1639 	}
1640 	return (error);
1641 }
1642 
1643 /*
1644  * Read from a regular file.  Iterate the related records and fill in the
1645  * BIO/BUF.  Gaps are zero-filled.
1646  *
1647  * The support code in hammer_object.c should be used to deal with mixed
1648  * in-memory and on-disk records.
1649  *
1650  * XXX atime update
1651  */
1652 static
1653 int
1654 hammer_vop_strategy_read(struct vop_strategy_args *ap)
1655 {
1656 	struct hammer_transaction trans;
1657 	struct hammer_inode *ip;
1658 	struct hammer_cursor cursor;
1659 	hammer_record_ondisk_t rec;
1660 	hammer_base_elm_t base;
1661 	struct bio *bio;
1662 	struct buf *bp;
1663 	int64_t rec_offset;
1664 	int64_t ran_end;
1665 	int64_t tmp64;
1666 	int error;
1667 	int boff;
1668 	int roff;
1669 	int n;
1670 
1671 	bio = ap->a_bio;
1672 	bp = bio->bio_buf;
1673 	ip = ap->a_vp->v_data;
1674 
1675 	hammer_simple_transaction(&trans, ip->hmp);
1676 	hammer_init_cursor(&trans, &cursor, &ip->cache[0]);
1677 
1678 	/*
1679 	 * Key range (begin and end inclusive) to scan.  Note that the key's
1680 	 * stored in the actual records represent BASE+LEN, not BASE.  The
1681 	 * first record containing bio_offset will have a key > bio_offset.
1682 	 */
1683 	cursor.key_beg.obj_id = ip->obj_id;
1684 	cursor.key_beg.create_tid = 0;
1685 	cursor.key_beg.delete_tid = 0;
1686 	cursor.key_beg.obj_type = 0;
1687 	cursor.key_beg.key = bio->bio_offset + 1;
1688 	cursor.asof = ip->obj_asof;
1689 	cursor.flags |= HAMMER_CURSOR_ASOF | HAMMER_CURSOR_DATAEXTOK;
1690 
1691 	cursor.key_end = cursor.key_beg;
1692 	if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1693 		cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
1694 		cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
1695 		cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1696 	} else {
1697 		ran_end = bio->bio_offset + bp->b_bufsize;
1698 		cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1699 		cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
1700 		tmp64 = ran_end + MAXPHYS + 1;	/* work-around GCC-4 bug */
1701 		if (tmp64 < ran_end)
1702 			cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1703 		else
1704 			cursor.key_end.key = ran_end + MAXPHYS + 1;
1705 	}
1706 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
1707 
1708 	error = hammer_ip_first(&cursor, ip);
1709 	boff = 0;
1710 
1711 	while (error == 0) {
1712 		error = hammer_ip_resolve_data(&cursor);
1713 		if (error)
1714 			break;
1715 		rec = cursor.record;
1716 		base = &rec->base.base;
1717 
1718 		rec_offset = base->key - rec->data.base.data_len;
1719 
1720 		/*
1721 		 * Calculate the gap, if any, and zero-fill it.
1722 		 */
1723 		n = (int)(rec_offset - (bio->bio_offset + boff));
1724 		if (n > 0) {
1725 			if (n > bp->b_bufsize - boff)
1726 				n = bp->b_bufsize - boff;
1727 			bzero((char *)bp->b_data + boff, n);
1728 			boff += n;
1729 			n = 0;
1730 		}
1731 
1732 		/*
1733 		 * Calculate the data offset in the record and the number
1734 		 * of bytes we can copy.
1735 		 *
1736 		 * Note there is a degenerate case here where boff may
1737 		 * already be at bp->b_bufsize.
1738 		 */
1739 		roff = -n;
1740 		n = rec->data.base.data_len - roff;
1741 		KKASSERT(n > 0);
1742 		if (n > bp->b_bufsize - boff)
1743 			n = bp->b_bufsize - boff;
1744 
1745 		bcopy((char *)cursor.data + roff,
1746 		      (char *)bp->b_data + boff, n);
1747 		boff += n;
1748 		if (boff == bp->b_bufsize)
1749 			break;
1750 		error = hammer_ip_next(&cursor);
1751 	}
1752 	hammer_done_cursor(&cursor);
1753 	hammer_commit_transaction(&trans);
1754 
1755 	/*
1756 	 * There may have been a gap after the last record
1757 	 */
1758 	if (error == ENOENT)
1759 		error = 0;
1760 	if (error == 0 && boff != bp->b_bufsize) {
1761 		KKASSERT(boff < bp->b_bufsize);
1762 		bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
1763 		/* boff = bp->b_bufsize; */
1764 	}
1765 	bp->b_resid = 0;
1766 	bp->b_error = error;
1767 	if (error)
1768 		bp->b_flags |= B_ERROR;
1769 	biodone(ap->a_bio);
1770 	return(error);
1771 }
1772 
1773 /*
1774  * Write to a regular file.   Because this is a strategy call the OS is
1775  * trying to actually sync data to the media.   HAMMER can only flush
1776  * the entire inode (so the TID remains properly synchronized).
1777  *
1778  * Basically all we do here is place the bio on the inode's flush queue
1779  * and activate the flusher.
1780  */
1781 static
1782 int
1783 hammer_vop_strategy_write(struct vop_strategy_args *ap)
1784 {
1785 	struct hammer_transaction trans;
1786 	hammer_inode_t ip;
1787 	struct bio *bio;
1788 	struct buf *bp;
1789 
1790 	bio = ap->a_bio;
1791 	bp = bio->bio_buf;
1792 	ip = ap->a_vp->v_data;
1793 
1794 	if (ip->flags & HAMMER_INODE_RO) {
1795 		bp->b_error = EROFS;
1796 		bp->b_flags |= B_ERROR;
1797 		biodone(ap->a_bio);
1798 		return(EROFS);
1799 	}
1800 	BUF_KERNPROC(bp);
1801 	TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act);
1802 	hammer_start_transaction(&trans, ip->hmp);	/* XXX */
1803 	hammer_modify_inode(&trans, ip, HAMMER_INODE_XDIRTY);
1804 	hammer_commit_transaction(&trans);
1805 
1806 	if ((ip->flags & HAMMER_INODE_FLUSHQ) == 0) {
1807 		++ip->lock.refs;
1808 		ip->flags |= HAMMER_INODE_FLUSHQ;
1809 		TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
1810 		hammer_flusher_async(ip->hmp);
1811 	}
1812 	return(0);
1813 }
1814 
1815 /*
1816  * Back-end code which actually performs the write to the media.  This
1817  * routine is typically called from the flusher.  The bio will be disposed
1818  * of (biodone'd) by this routine.
1819  *
1820  * Iterate the related records and mark for deletion.  If existing edge
1821  * records (left and right side) overlap our write they have to be marked
1822  * deleted and new records created, usually referencing a portion of the
1823  * original data.  Then add a record to represent the buffer.
1824  */
1825 int
1826 hammer_dowrite(hammer_transaction_t trans, hammer_inode_t ip, struct bio *bio)
1827 {
1828 	struct buf *bp = bio->bio_buf;
1829 	int error;
1830 
1831 	/*
1832 	 * Delete any records overlapping our range.  This function will
1833 	 * (eventually) properly truncate partial overlaps.
1834 	 */
1835 	if (ip->ino_rec.base.base.obj_type == HAMMER_OBJTYPE_DBFILE) {
1836 		error = hammer_ip_delete_range(trans, ip, bio->bio_offset,
1837 					       bio->bio_offset);
1838 	} else {
1839 		error = hammer_ip_delete_range(trans, ip, bio->bio_offset,
1840 					       bio->bio_offset +
1841 						bp->b_bufsize - 1);
1842 	}
1843 
1844 	/*
1845 	 * Add a single record to cover the write.  We can write a record
1846 	 * with only the actual file data - for example, a small 200 byte
1847 	 * file does not have to write out a 16K record.
1848 	 *
1849 	 * While the data size does not have to be aligned, we still do it
1850 	 * to reduce fragmentation in a future allocation model.
1851 	 */
1852 	if (error == 0) {
1853 		int limit_size;
1854 
1855 		if (ip->ino_rec.ino_size - bio->bio_offset > bp->b_bufsize) {
1856 			limit_size = bp->b_bufsize;
1857 		} else {
1858 			limit_size = (int)(ip->ino_rec.ino_size -
1859 					   bio->bio_offset);
1860 			KKASSERT(limit_size >= 0);
1861 			limit_size = (limit_size + 63) & ~63;
1862 		}
1863 		error = hammer_ip_sync_data(trans, ip, bio->bio_offset,
1864 					    bp->b_data, limit_size);
1865 	}
1866 
1867 	if (error) {
1868 		bp->b_resid = bp->b_bufsize;
1869 		bp->b_error = error;
1870 		bp->b_flags |= B_ERROR;
1871 	} else {
1872 		bp->b_resid = 0;
1873 	}
1874 	biodone(bio);
1875 	return(error);
1876 }
1877 
1878 /*
1879  * dounlink - disconnect a directory entry
1880  *
1881  * XXX whiteout support not really in yet
1882  */
1883 static int
1884 hammer_dounlink(struct nchandle *nch, struct vnode *dvp, struct ucred *cred,
1885 		int flags)
1886 {
1887 	struct hammer_transaction trans;
1888 	struct namecache *ncp;
1889 	hammer_inode_t dip;
1890 	hammer_inode_t ip;
1891 	hammer_record_ondisk_t rec;
1892 	struct hammer_cursor cursor;
1893 	int64_t namekey;
1894 	int error;
1895 
1896 	/*
1897 	 * Calculate the namekey and setup the key range for the scan.  This
1898 	 * works kinda like a chained hash table where the lower 32 bits
1899 	 * of the namekey synthesize the chain.
1900 	 *
1901 	 * The key range is inclusive of both key_beg and key_end.
1902 	 */
1903 	dip = VTOI(dvp);
1904 	ncp = nch->ncp;
1905 
1906 	if (dip->flags & HAMMER_INODE_RO)
1907 		return (EROFS);
1908 
1909 	hammer_start_transaction(&trans, dip->hmp);
1910 
1911 	namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
1912 retry:
1913 	hammer_init_cursor(&trans, &cursor, &dip->cache[0]);
1914         cursor.key_beg.obj_id = dip->obj_id;
1915 	cursor.key_beg.key = namekey;
1916         cursor.key_beg.create_tid = 0;
1917         cursor.key_beg.delete_tid = 0;
1918         cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1919         cursor.key_beg.obj_type = 0;
1920 
1921 	cursor.key_end = cursor.key_beg;
1922 	cursor.key_end.key |= 0xFFFFFFFFULL;
1923 	cursor.asof = dip->obj_asof;
1924 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
1925 
1926 	/*
1927 	 * Scan all matching records (the chain), locate the one matching
1928 	 * the requested path component.  info->last_error contains the
1929 	 * error code on search termination and could be 0, ENOENT, or
1930 	 * something else.
1931 	 *
1932 	 * The hammer_ip_*() functions merge in-memory records with on-disk
1933 	 * records for the purposes of the search.
1934 	 */
1935 	error = hammer_ip_first(&cursor, dip);
1936 	while (error == 0) {
1937 		error = hammer_ip_resolve_data(&cursor);
1938 		if (error)
1939 			break;
1940 		rec = cursor.record;
1941 		if (ncp->nc_nlen == rec->entry.base.data_len &&
1942 		    bcmp(ncp->nc_name, cursor.data, ncp->nc_nlen) == 0) {
1943 			break;
1944 		}
1945 		error = hammer_ip_next(&cursor);
1946 	}
1947 
1948 	/*
1949 	 * If all is ok we have to get the inode so we can adjust nlinks.
1950 	 *
1951 	 * If the target is a directory, it must be empty.
1952 	 */
1953 	if (error == 0) {
1954 		ip = hammer_get_inode(&trans, &dip->cache[1],
1955 				      rec->entry.obj_id,
1956 				      dip->hmp->asof, 0, &error);
1957 		if (error == ENOENT) {
1958 			kprintf("obj_id %016llx\n", rec->entry.obj_id);
1959 			Debugger("ENOENT unlinking object that should exist, cont to sync");
1960 			hammer_sync_hmp(dip->hmp, MNT_NOWAIT);
1961 			Debugger("ENOENT - sync done");
1962 		}
1963 		if (error == 0 && ip->ino_rec.base.base.obj_type ==
1964 				  HAMMER_OBJTYPE_DIRECTORY) {
1965 			error = hammer_ip_check_directory_empty(&trans, ip);
1966 		}
1967 		/*
1968 		 * WARNING: hammer_ip_del_directory() may have to terminate
1969 		 * the cursor to avoid a lock recursion.  It's ok to call
1970 		 * hammer_done_cursor() twice.
1971 		 */
1972 		if (error == 0)
1973 			error = hammer_ip_del_directory(&trans, &cursor, dip, ip);
1974 		if (error == 0) {
1975 			cache_setunresolved(nch);
1976 			cache_setvp(nch, NULL);
1977 			/* XXX locking */
1978 			if (ip->vp)
1979 				cache_inval_vp(ip->vp, CINV_DESTROY);
1980 		}
1981 		hammer_rel_inode(ip, 0);
1982 	}
1983         hammer_done_cursor(&cursor);
1984 	if (error == EDEADLK)
1985 		goto retry;
1986 
1987 	if (error == 0)
1988 		hammer_commit_transaction(&trans);
1989 	else
1990 		hammer_abort_transaction(&trans);
1991 	return (error);
1992 }
1993 
1994 /************************************************************************
1995  *			    FIFO AND SPECFS OPS				*
1996  ************************************************************************
1997  *
1998  */
1999 
2000 static int
2001 hammer_vop_fifoclose (struct vop_close_args *ap)
2002 {
2003 	/* XXX update itimes */
2004 	return (VOCALL(&fifo_vnode_vops, &ap->a_head));
2005 }
2006 
2007 static int
2008 hammer_vop_fiforead (struct vop_read_args *ap)
2009 {
2010 	int error;
2011 
2012 	error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2013 	/* XXX update access time */
2014 	return (error);
2015 }
2016 
2017 static int
2018 hammer_vop_fifowrite (struct vop_write_args *ap)
2019 {
2020 	int error;
2021 
2022 	error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2023 	/* XXX update access time */
2024 	return (error);
2025 }
2026 
2027 static int
2028 hammer_vop_specclose (struct vop_close_args *ap)
2029 {
2030 	/* XXX update itimes */
2031 	return (VOCALL(&spec_vnode_vops, &ap->a_head));
2032 }
2033 
2034 static int
2035 hammer_vop_specread (struct vop_read_args *ap)
2036 {
2037 	/* XXX update access time */
2038 	return (VOCALL(&spec_vnode_vops, &ap->a_head));
2039 }
2040 
2041 static int
2042 hammer_vop_specwrite (struct vop_write_args *ap)
2043 {
2044 	/* XXX update last change time */
2045 	return (VOCALL(&spec_vnode_vops, &ap->a_head));
2046 }
2047 
2048