xref: /dflybsd-src/sys/vfs/hammer/hammer_io.c (revision 36f82b234899b55629bc12f8d9df44ce2917e4d0)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.22 2008/03/19 20:18:17 dillon Exp $
35  */
36 /*
37  * IO Primitives and buffer cache management
38  *
39  * All major data-tracking structures in HAMMER contain a struct hammer_io
40  * which is used to manage their backing store.  We use filesystem buffers
41  * for backing store and we leave them passively associated with their
42  * HAMMER structures.
43  *
44  * If the kernel tries to release a passively associated buf which we cannot
45  * yet let go we set B_LOCKED in the buffer and then actively released it
46  * later when we can.
47  */
48 
49 #include "hammer.h"
50 #include <sys/fcntl.h>
51 #include <sys/nlookup.h>
52 #include <sys/buf.h>
53 #include <sys/buf2.h>
54 
55 static void hammer_io_deallocate(struct buf *bp);
56 static int hammer_io_checkwrite(struct buf *bp);
57 
58 /*
59  * Initialize an already-zero'd hammer_io structure
60  */
61 void
62 hammer_io_init(hammer_io_t io, enum hammer_io_type type)
63 {
64 	io->type = type;
65 	TAILQ_INIT(&io->deplist);
66 }
67 
68 /*
69  * Helper routine to disassociate a buffer cache buffer from an I/O
70  * structure.  Called with the io structure exclusively locked.
71  *
72  * The io may have 0 or 1 references depending on who called us.  The
73  * caller is responsible for dealing with the refs.
74  *
75  * This call can only be made when no action is required on the buffer.
76  * HAMMER must own the buffer (released == 0) since we mess around with it.
77  */
78 static void
79 hammer_io_disassociate(hammer_io_structure_t iou, int elseit)
80 {
81 	struct buf *bp = iou->io.bp;
82 
83 	KKASSERT(TAILQ_EMPTY(&iou->io.deplist) && iou->io.modified == 0);
84 	buf_dep_init(bp);
85 	iou->io.bp = NULL;
86 	bp->b_flags &= ~B_LOCKED;
87 	if (elseit) {
88 		KKASSERT(iou->io.released == 0);
89 		iou->io.released = 1;
90 		bqrelse(bp);
91 	} else {
92 		KKASSERT(iou->io.released);
93 	}
94 
95 	switch(iou->io.type) {
96 	case HAMMER_STRUCTURE_VOLUME:
97 		iou->volume.ondisk = NULL;
98 		break;
99 	case HAMMER_STRUCTURE_BUFFER:
100 		iou->buffer.ondisk = NULL;
101 		break;
102 	}
103 }
104 
105 /*
106  * Wait for any physical IO to complete
107  */
108 static void
109 hammer_io_wait(hammer_io_t io)
110 {
111 	if (io->running) {
112 		crit_enter();
113 		tsleep_interlock(io);
114 		io->waiting = 1;
115 		for (;;) {
116 			tsleep(io, 0, "hmrflw", 0);
117 			if (io->running == 0)
118 				break;
119 			tsleep_interlock(io);
120 			io->waiting = 1;
121 			if (io->running == 0)
122 				break;
123 		}
124 		crit_exit();
125 	}
126 }
127 
128 void
129 hammer_io_waitdep(hammer_io_t io)
130 {
131 	while (TAILQ_FIRST(&io->deplist)) {
132 		kprintf("waitdep %p\n", io);
133 		tsleep(io, 0, "hmrdep", hz);
134 	}
135 }
136 
137 /*
138  * Load bp for a HAMMER structure.  The io is exclusively locked by the
139  * caller.
140  */
141 int
142 hammer_io_read(struct vnode *devvp, struct hammer_io *io)
143 {
144 	struct buf *bp;
145 	int error;
146 
147 	if ((bp = io->bp) == NULL) {
148 		error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp);
149 		if (error == 0) {
150 			bp = io->bp;
151 			bp->b_ops = &hammer_bioops;
152 			LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
153 			BUF_KERNPROC(bp);
154 		}
155 		io->modified = 0;	/* no new modifications yet */
156 		io->released = 0;	/* we hold an active lock on bp */
157 		io->running = 0;
158 		io->waiting = 0;
159 	} else {
160 		error = 0;
161 	}
162 	return(error);
163 }
164 
165 /*
166  * Similar to hammer_io_read() but returns a zero'd out buffer instead.
167  * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background
168  * I/O so we can call it.
169  *
170  * The caller is responsible for calling hammer_modify_*() on the appropriate
171  * HAMMER structure.
172  */
173 int
174 hammer_io_new(struct vnode *devvp, struct hammer_io *io)
175 {
176 	struct buf *bp;
177 
178 	if ((bp = io->bp) == NULL) {
179 		io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0);
180 		bp = io->bp;
181 		bp->b_ops = &hammer_bioops;
182 		LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node);
183 		io->modified = 0;
184 		io->released = 0;
185 		io->running = 0;
186 		io->waiting = 0;
187 		BUF_KERNPROC(bp);
188 	} else {
189 		if (io->released) {
190 			regetblk(bp);
191 			BUF_KERNPROC(bp);
192 			io->released = 0;
193 		}
194 	}
195 	vfs_bio_clrbuf(bp);
196 	return(0);
197 }
198 
199 /*
200  * This routine is called on the last reference to a hammer structure.
201  * The io is usually locked exclusively (but may not be during unmount).
202  *
203  * If flush is 1, or B_LOCKED was set indicating that the kernel
204  * wanted to recycle the buffer, and there are no dependancies, this
205  * function will issue an asynchronous write.
206  *
207  * If flush is 2 this function waits until all I/O has completed and
208  * disassociates the bp from the IO before returning, unless there
209  * are still other references.
210  */
211 void
212 hammer_io_release(struct hammer_io *io, int flush)
213 {
214 	struct buf *bp;
215 
216 	if ((bp = io->bp) == NULL)
217 		return;
218 
219 #if 0
220 	/*
221 	 * If flush is 2 wait for dependancies
222 	 */
223 	while (flush == 2 && TAILQ_FIRST(&io->deplist)) {
224 		hammer_io_wait(TAILQ_FIRST(&io->deplist));
225 	}
226 #endif
227 
228 	/*
229 	 * Try to flush a dirty IO to disk if asked to by the caller
230 	 * or if the kernel tried to flush the buffer in the past.
231 	 *
232 	 * The flush will fail if any dependancies are present.
233 	 */
234 	if (io->modified && (flush || bp->b_flags & B_LOCKED))
235 		hammer_io_flush(io);
236 
237 	/*
238 	 * If flush is 2 we wait for the IO to complete.
239 	 */
240 	if (flush == 2 && io->running) {
241 		hammer_io_wait(io);
242 	}
243 
244 	/*
245 	 * Actively or passively release the buffer.  Modified IOs with
246 	 * dependancies cannot be released.
247 	 */
248 	if (flush && io->modified == 0 && io->running == 0) {
249 		KKASSERT(TAILQ_EMPTY(&io->deplist));
250 		if (io->released) {
251 			regetblk(bp);
252 			BUF_KERNPROC(bp);
253 			io->released = 0;
254 		}
255 		hammer_io_disassociate((hammer_io_structure_t)io, 1);
256 	} else if (io->modified) {
257 		if (io->released == 0 && TAILQ_EMPTY(&io->deplist)) {
258 			io->released = 1;
259 			bdwrite(bp);
260 		}
261 	} else if (io->released == 0) {
262 		io->released = 1;
263 		bqrelse(bp);
264 	}
265 }
266 
267 /*
268  * This routine is called with a locked IO when a flush is desired and
269  * no other references to the structure exists other then ours.  This
270  * routine is ONLY called when HAMMER believes it is safe to flush a
271  * potentially modified buffer out.
272  */
273 void
274 hammer_io_flush(struct hammer_io *io)
275 {
276 	struct buf *bp;
277 
278 	/*
279 	 * Can't flush if the IO isn't modified or if it has dependancies.
280 	 */
281 	if (io->modified == 0)
282 		return;
283 	if (TAILQ_FIRST(&io->deplist))
284 		return;
285 
286 	KKASSERT(io->bp);
287 
288 	/*
289 	 * XXX - umount syncs buffers without referencing them, check for 0
290 	 * also.
291 	 */
292 	KKASSERT(io->lock.refs == 0 || io->lock.refs == 1);
293 
294 	/*
295 	 * Reset modified to 0 here and re-check it after the IO completes.
296 	 * This is only legal when lock.refs == 1 (otherwise we might clear
297 	 * the modified bit while there are still users of the cluster
298 	 * modifying the data).
299 	 *
300 	 * NOTE: We have no dependancies so we don't have to worry about
301 	 * cluster-open's here.
302 	 *
303 	 * Do this before potentially blocking so any attempt to modify the
304 	 * ondisk while we are blocked blocks waiting for us.
305 	 */
306 	io->modified = 0;	/* force interlock */
307 	bp = io->bp;
308 
309 	if (io->released) {
310 		regetblk(bp);
311 		/* BUF_KERNPROC(io->bp); */
312 		io->released = 0;
313 	}
314 	io->released = 1;
315 	io->running = 1;
316 	bawrite(bp);
317 }
318 
319 /************************************************************************
320  *				BUFFER DIRTYING				*
321  ************************************************************************
322  *
323  * These routines deal with dependancies created when IO buffers get
324  * modified.  The caller must call hammer_modify_*() on a referenced
325  * HAMMER structure prior to modifying its on-disk data.
326  *
327  * Any intent to modify an IO buffer acquires the related bp and imposes
328  * various write ordering dependancies.
329  */
330 
331 /*
332  * Mark a HAMMER structure as undergoing modification.  Return 1 when applying
333  * a non-NULL ordering dependancy for the first time, 0 otherwise.
334  *
335  * list can be NULL, indicating that a structural modification is being made
336  * without creating an ordering dependancy.
337  */
338 static __inline
339 int
340 hammer_io_modify(hammer_io_t io, struct hammer_io_list *list)
341 {
342 	int r;
343 
344 	/*
345 	 * Shortcut if nothing to do.
346 	 */
347 	KKASSERT(io->lock.refs != 0 && io->bp != NULL);
348 	if (io->modified && io->released == 0 &&
349 	    (io->entry_list || list == NULL)) {
350 		return(0);
351 	}
352 
353 	hammer_lock_ex(&io->lock);
354 	io->modified = 1;
355 	if (io->released) {
356 		regetblk(io->bp);
357 		BUF_KERNPROC(io->bp);
358 		io->released = 0;
359 		KKASSERT(io->modified != 0);
360 	}
361 	if (io->entry_list == NULL) {
362 		io->entry_list = list;
363 		if (list) {
364 			TAILQ_INSERT_TAIL(list, io, entry);
365 			r = 1;
366 		} else {
367 			r = 0;
368 		}
369 	} else {
370 		/* only one dependancy is allowed */
371 		KKASSERT(list == NULL || io->entry_list == list);
372 		r = 0;
373 	}
374 	hammer_unlock(&io->lock);
375 	return(r);
376 }
377 
378 void
379 hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume,
380 		     void *base, int len)
381 {
382 	hammer_io_modify(&volume->io, NULL);
383 
384 	if (len) {
385 		intptr_t rel_offset = (intptr_t)base - (intptr_t)volume->ondisk;
386 		KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0);
387 		hammer_generate_undo(trans,
388 			 HAMMER_ENCODE_RAW_VOLUME(volume->vol_no, rel_offset),
389 			 base, len);
390 	}
391 }
392 
393 /*
394  * Caller intends to modify a buffer's ondisk structure.  The related
395  * cluster must be marked open prior to being able to flush the modified
396  * buffer so get that I/O going now.
397  */
398 void
399 hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer,
400 		     void *base, int len)
401 {
402 	hammer_io_modify(&buffer->io, NULL);
403 	if (len) {
404 		intptr_t rel_offset = (intptr_t)base - (intptr_t)buffer->ondisk;
405 		KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0);
406 		hammer_generate_undo(trans,
407 				     buffer->zone2_offset + rel_offset,
408 				     base, len);
409 	}
410 }
411 
412 /*
413  * Mark an entity as not being dirty any more -- this usually occurs when
414  * the governing a-list has freed the entire entity.
415  *
416  * XXX
417  */
418 void
419 hammer_io_clear_modify(struct hammer_io *io)
420 {
421 #if 0
422 	struct buf *bp;
423 
424 	io->modified = 0;
425 	if ((bp = io->bp) != NULL) {
426 		if (io->released) {
427 			regetblk(bp);
428 			/* BUF_KERNPROC(io->bp); */
429 		} else {
430 			io->released = 1;
431 		}
432 		if (io->modified == 0) {
433 			kprintf("hammer_io_clear_modify: cleared %p\n", io);
434 			bundirty(bp);
435 			bqrelse(bp);
436 		} else {
437 			bdwrite(bp);
438 		}
439 	}
440 #endif
441 }
442 
443 /************************************************************************
444  *				HAMMER_BIOOPS				*
445  ************************************************************************
446  *
447  */
448 
449 /*
450  * Pre-IO initiation kernel callback - cluster build only
451  */
452 static void
453 hammer_io_start(struct buf *bp)
454 {
455 }
456 
457 /*
458  * Post-IO completion kernel callback
459  *
460  * NOTE: HAMMER may modify a buffer after initiating I/O.  The modified bit
461  * may also be set if we were marking a cluster header open.  Only remove
462  * our dependancy if the modified bit is clear.
463  */
464 static void
465 hammer_io_complete(struct buf *bp)
466 {
467 	union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep);
468 
469 	KKASSERT(iou->io.released == 1);
470 
471 	/*
472 	 * If this was a write and the modified bit is still clear we can
473 	 * remove ourselves from the dependancy list.
474 	 *
475 	 * If no lock references remain and we can acquire the IO lock and
476 	 * someone at some point wanted us to flush (B_LOCKED test), then
477 	 * try to dispose of the IO.
478 	 */
479 	if (iou->io.modified == 0 && iou->io.entry_list) {
480 		TAILQ_REMOVE(iou->io.entry_list, &iou->io, entry);
481 		iou->io.entry_list = NULL;
482 	}
483 	iou->io.running = 0;
484 	if (iou->io.waiting) {
485 		iou->io.waiting = 0;
486 		wakeup(iou);
487 	}
488 
489 	/*
490 	 * Someone wanted us to flush, try to clean out the buffer.
491 	 */
492 	if ((bp->b_flags & B_LOCKED) && iou->io.lock.refs == 0) {
493 		KKASSERT(iou->io.modified == 0);
494 		bp->b_flags &= ~B_LOCKED;
495 		hammer_io_deallocate(bp);
496 		/* structure may be dead now */
497 	}
498 }
499 
500 /*
501  * Callback from kernel when it wishes to deallocate a passively
502  * associated structure.  This case can only occur with read-only
503  * bp's.
504  *
505  * If we cannot disassociate we set B_LOCKED to prevent the buffer
506  * from getting reused.
507  *
508  * WARNING: Because this can be called directly by getnewbuf we cannot
509  * recurse into the tree.  If a bp cannot be immediately disassociated
510  * our only recourse is to set B_LOCKED.
511  *
512  * WARNING: If the HAMMER structure is passively cached we have to
513  * scrap it here.
514  */
515 static void
516 hammer_io_deallocate(struct buf *bp)
517 {
518 	hammer_io_structure_t iou = (void *)LIST_FIRST(&bp->b_dep);
519 
520 	KKASSERT((bp->b_flags & B_LOCKED) == 0 && iou->io.running == 0);
521 	if (iou->io.lock.refs > 0 || iou->io.modified) {
522 		bp->b_flags |= B_LOCKED;
523 	} else {
524 		/* XXX interlock against ref or another disassociate */
525 		/* XXX this can leave HAMMER structures lying around */
526 		hammer_io_disassociate(iou, 0);
527 #if 0
528 		switch(iou->io.type) {
529 		case HAMMER_STRUCTURE_VOLUME:
530 			hammer_rel_volume(&iou->volume, 1);
531 			break;
532 		case HAMMER_STRUCTURE_BUFFER:
533 			hammer_rel_buffer(&iou->buffer, 1);
534 			break;
535 		}
536 #endif
537 	}
538 }
539 
540 static int
541 hammer_io_fsync(struct vnode *vp)
542 {
543 	return(0);
544 }
545 
546 /*
547  * NOTE: will not be called unless we tell the kernel about the
548  * bioops.  Unused... we use the mount's VFS_SYNC instead.
549  */
550 static int
551 hammer_io_sync(struct mount *mp)
552 {
553 	return(0);
554 }
555 
556 static void
557 hammer_io_movedeps(struct buf *bp1, struct buf *bp2)
558 {
559 }
560 
561 /*
562  * I/O pre-check for reading and writing.  HAMMER only uses this for
563  * B_CACHE buffers so checkread just shouldn't happen, but if it does
564  * allow it.
565  *
566  * Writing is a different case.  We don't want the kernel to try to write
567  * out a buffer that HAMMER may be modifying passively or which has a
568  * dependancy.
569  *
570  * This code enforces the following write ordering: buffers, then cluster
571  * headers, then volume headers.
572  */
573 static int
574 hammer_io_checkread(struct buf *bp)
575 {
576 	return(0);
577 }
578 
579 static int
580 hammer_io_checkwrite(struct buf *bp)
581 {
582 	union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep);
583 
584 	KKASSERT(TAILQ_EMPTY(&iou->io.deplist));
585 
586 	/*
587 	 * We are called from the kernel on delayed-write buffers, and
588 	 * called from hammer_io_flush() on flush requests.  There should
589 	 * be no dependancies in either case.
590 	 *
591 	 * In the case of delayed-writes, the introduction of a dependancy
592 	 * will block until the bp can be reacquired, and the bp is then
593 	 * simply not released until the dependancy can be satisfied.
594 	 *
595 	 * We can only clear the modified bit when entered from the kernel
596 	 * if io.lock.refs == 0.
597 	 */
598 	if (iou->io.lock.refs == 0) {
599 		iou->io.modified = 0;
600 	}
601 	return(0);
602 }
603 
604 /*
605  * Return non-zero if the caller should flush the structure associated
606  * with this io sub-structure.
607  */
608 int
609 hammer_io_checkflush(struct hammer_io *io)
610 {
611 	if (io->bp == NULL || (io->bp->b_flags & B_LOCKED)) {
612 		return(1);
613 	}
614 	return(0);
615 }
616 
617 /*
618  * Return non-zero if we wish to delay the kernel's attempt to flush
619  * this buffer to disk.
620  */
621 static int
622 hammer_io_countdeps(struct buf *bp, int n)
623 {
624 	return(0);
625 }
626 
627 struct bio_ops hammer_bioops = {
628 	.io_start	= hammer_io_start,
629 	.io_complete	= hammer_io_complete,
630 	.io_deallocate	= hammer_io_deallocate,
631 	.io_fsync	= hammer_io_fsync,
632 	.io_sync	= hammer_io_sync,
633 	.io_movedeps	= hammer_io_movedeps,
634 	.io_countdeps	= hammer_io_countdeps,
635 	.io_checkread	= hammer_io_checkread,
636 	.io_checkwrite	= hammer_io_checkwrite,
637 };
638 
639