xref: /dflybsd-src/sys/vfs/hammer2/hammer2_io.c (revision e19e5bbc20dd1d64f1833c5d0ac7a605c8e9bfa0)
1 /*
2  * Copyright (c) 2013-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer2.h"
36 
37 /*
38  * Implements an abstraction layer for synchronous and asynchronous
39  * buffered device I/O.  Can be used for OS-abstraction but the main
40  * purpose is to allow larger buffers to be used against hammer2_chain's
41  * using smaller allocations, without causing deadlocks.
42  *
43  */
44 static void hammer2_io_callback(struct bio *bio);
45 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
46 
47 static int
48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
49 {
50 	if (io2->pbase < io1->pbase)
51 		return(-1);
52 	if (io2->pbase > io1->pbase)
53 		return(1);
54 	return(0);
55 }
56 
57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
59 		off_t, pbase);
60 
61 struct hammer2_cleanupcb_info {
62 	struct hammer2_io_tree tmptree;
63 	int	count;
64 };
65 
66 
67 #define HAMMER2_DIO_INPROG	0x80000000
68 #define HAMMER2_DIO_GOOD	0x40000000
69 #define HAMMER2_DIO_WAITING	0x20000000
70 #define HAMMER2_DIO_DIRTY	0x10000000
71 
72 #define HAMMER2_DIO_MASK	0x0FFFFFFF
73 
74 /*
75  * Acquire the requested dio, set *ownerp based on state.  If state is good
76  * *ownerp is set to 0, otherwise *ownerp is set to DIO_INPROG and the
77  * caller must resolve the buffer.
78  */
79 hammer2_io_t *
80 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize, int *ownerp)
81 {
82 	hammer2_io_t *dio;
83 	hammer2_io_t *xio;
84 	off_t pbase;
85 	off_t pmask;
86 	int psize = hammer2_devblksize(lsize);
87 	int refs;
88 
89 	pmask = ~(hammer2_off_t)(psize - 1);
90 
91 	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
92 	lbase &= ~HAMMER2_OFF_MASK_RADIX;
93 	pbase = lbase & pmask;
94 	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
95 
96 	/*
97 	 * Access/Allocate the DIO
98 	 */
99 	spin_lock_shared(&hmp->io_spin);
100 	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
101 	if (dio) {
102 		if ((atomic_fetchadd_int(&dio->refs, 1) &
103 		     HAMMER2_DIO_MASK) == 0) {
104 			atomic_add_int(&dio->hmp->iofree_count, -1);
105 		}
106 		spin_unlock_shared(&hmp->io_spin);
107 	} else {
108 		spin_unlock_shared(&hmp->io_spin);
109 		dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
110 		dio->hmp = hmp;
111 		dio->pbase = pbase;
112 		dio->psize = psize;
113 		dio->refs = 1;
114 		spin_lock(&hmp->io_spin);
115 		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
116 		if (xio == NULL) {
117 			atomic_add_int(&hammer2_dio_count, 1);
118 			spin_unlock(&hmp->io_spin);
119 		} else {
120 			if ((atomic_fetchadd_int(&xio->refs, 1) &
121 			     HAMMER2_DIO_MASK) == 0) {
122 				atomic_add_int(&xio->hmp->iofree_count, -1);
123 			}
124 			spin_unlock(&hmp->io_spin);
125 			kfree(dio, M_HAMMER2);
126 			dio = xio;
127 		}
128 	}
129 
130 	/*
131 	 * Obtain/Validate the buffer.
132 	 */
133 	for (;;) {
134 		refs = dio->refs;
135 		cpu_ccfence();
136 
137 		/*
138 		 * Stop if the buffer is good.  Once set GOOD the flag cannot
139 		 * be cleared until refs drops to 0.
140 		 */
141 		if (refs & HAMMER2_DIO_GOOD) {
142 			*ownerp = 0;
143 			goto done;
144 		}
145 
146 		/*
147 		 * We need to acquire the in-progress lock on the buffer
148 		 */
149 		if (refs & HAMMER2_DIO_INPROG) {
150 			tsleep_interlock(dio, 0);
151 			if (atomic_cmpset_int(&dio->refs, refs,
152 					      refs | HAMMER2_DIO_WAITING)) {
153 				tsleep(dio, PINTERLOCKED, "h2dio", 0);
154 			}
155 			/* retry */
156 		} else {
157 			if (atomic_cmpset_int(&dio->refs, refs,
158 					      refs | HAMMER2_DIO_INPROG)) {
159 				break;
160 			}
161 		}
162 		/* retry */
163 	}
164 
165 	/*
166 	 * We need to do more work before the buffer is usable
167 	 */
168 	*ownerp = HAMMER2_DIO_INPROG;
169 done:
170 	if (dio->act < 5)
171 		++dio->act;
172 	return(dio);
173 }
174 
175 /*
176  * If part of an asynchronous I/O the asynchronous I/O is biodone()'d.
177  *
178  * If the caller owned INPROG then the dio will be set GOOD or not
179  * depending on whether the caller disposed of dio->bp or not.
180  */
181 static
182 void
183 hammer2_io_complete(hammer2_io_t *dio, int owner)
184 {
185 	int refs;
186 	int good;
187 
188 	while (owner & HAMMER2_DIO_INPROG) {
189 		refs = dio->refs;
190 		cpu_ccfence();
191 		good = dio->bp ? HAMMER2_DIO_GOOD : 0;
192 		if (atomic_cmpset_int(&dio->refs, refs,
193 				      (refs & ~(HAMMER2_DIO_WAITING |
194 					        HAMMER2_DIO_INPROG)) |
195 				      good)) {
196 			if (refs & HAMMER2_DIO_WAITING)
197 				wakeup(dio);
198 			if (good)
199 				BUF_KERNPROC(dio->bp);
200 			break;
201 		}
202 		/* retry */
203 	}
204 }
205 
206 /*
207  * Release our ref on *diop, dispose of the underlying buffer.
208  */
209 void
210 hammer2_io_putblk(hammer2_io_t **diop)
211 {
212 	hammer2_mount_t *hmp;
213 	hammer2_io_t *dio;
214 	struct buf *bp;
215 	off_t peof;
216 	off_t pbase;
217 	int psize;
218 	int refs;
219 
220 	dio = *diop;
221 	*diop = NULL;
222 
223 	for (;;) {
224 		refs = dio->refs;
225 
226 		if ((refs & HAMMER2_DIO_MASK) == 1) {
227 			KKASSERT((refs & HAMMER2_DIO_INPROG) == 0);
228 			if (atomic_cmpset_int(&dio->refs, refs,
229 					      ((refs - 1) &
230 					       ~(HAMMER2_DIO_GOOD |
231 						 HAMMER2_DIO_DIRTY)) |
232 					      HAMMER2_DIO_INPROG)) {
233 				break;
234 			}
235 			/* retry */
236 		} else {
237 			if (atomic_cmpset_int(&dio->refs, refs, refs - 1))
238 				return;
239 			/* retry */
240 		}
241 		/* retry */
242 	}
243 
244 	/*
245 	 * Locked INPROG on 1->0 transition and we cleared DIO_GOOD (which is
246 	 * legal only on the last ref).  This allows us to dispose of the
247 	 * buffer.  refs is now 0.
248 	 *
249 	 * The instant we call io_complete dio is a free agent again and
250 	 * can be ripped out from under us.  Acquisition of the dio after
251 	 * this point will require a shared or exclusive spinlock.
252 	 */
253 	hmp = dio->hmp;
254 	bp = dio->bp;
255 	dio->bp = NULL;
256 	pbase = dio->pbase;
257 	psize = dio->psize;
258 	atomic_add_int(&hmp->iofree_count, 1);
259 	hammer2_io_complete(dio, HAMMER2_DIO_INPROG);	/* clears INPROG */
260 	dio = NULL;	/* dio stale */
261 
262 	if (refs & HAMMER2_DIO_GOOD) {
263 		KKASSERT(bp != NULL);
264 		if (refs & HAMMER2_DIO_DIRTY) {
265 			if (hammer2_cluster_enable) {
266 				peof = (pbase + HAMMER2_SEGMASK64) &
267 				       ~HAMMER2_SEGMASK64;
268 				cluster_write(bp, peof, psize, 4);
269 			} else {
270 				bp->b_flags |= B_CLUSTEROK;
271 				bdwrite(bp);
272 			}
273 		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
274 			brelse(bp);
275 		} else {
276 			bqrelse(bp);
277 		}
278 	}
279 
280 	/*
281 	 * We cache free buffers so re-use cases can use a shared lock, but
282 	 * if too many build up we have to clean them out.
283 	 */
284 	if (hmp->iofree_count > 1000) {
285 		struct hammer2_cleanupcb_info info;
286 
287 		RB_INIT(&info.tmptree);
288 		spin_lock(&hmp->io_spin);
289 		if (hmp->iofree_count > 1000) {
290 			info.count = hmp->iofree_count / 2;
291 			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
292 				hammer2_io_cleanup_callback, &info);
293 		}
294 		spin_unlock(&hmp->io_spin);
295 		hammer2_io_cleanup(hmp, &info.tmptree);
296 	}
297 }
298 
299 /*
300  * Cleanup any dio's with no references which are not in-progress.
301  */
302 static
303 int
304 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
305 {
306 	struct hammer2_cleanupcb_info *info = arg;
307 	hammer2_io_t *xio;
308 
309 	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
310 		if (dio->act > 0) {
311 			--dio->act;
312 			return 0;
313 		}
314 		KKASSERT(dio->bp == NULL);
315 		RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
316 		xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
317 		KKASSERT(xio == NULL);
318 		if (--info->count <= 0)	/* limit scan */
319 			return(-1);
320 	}
321 	return 0;
322 }
323 
324 void
325 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree)
326 {
327 	hammer2_io_t *dio;
328 
329 	while ((dio = RB_ROOT(tree)) != NULL) {
330 		RB_REMOVE(hammer2_io_tree, tree, dio);
331 		KKASSERT(dio->bp == NULL &&
332 		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
333 		kfree(dio, M_HAMMER2);
334 		atomic_add_int(&hammer2_dio_count, -1);
335 		atomic_add_int(&hmp->iofree_count, -1);
336 	}
337 }
338 
339 char *
340 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
341 {
342 	struct buf *bp;
343 	int off;
344 
345 	bp = dio->bp;
346 	KKASSERT(bp != NULL);
347 	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
348 	KKASSERT(off >= 0 && off < bp->b_bufsize);
349 	return(bp->b_data + off);
350 }
351 
352 static
353 int
354 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
355 	        hammer2_io_t **diop, int dozero, int quick)
356 {
357 	hammer2_io_t *dio;
358 	int owner;
359 	int error;
360 
361 	dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
362 	if (owner) {
363 		if (lsize == dio->psize) {
364 			dio->bp = getblk(hmp->devvp,
365 					     dio->pbase, dio->psize,
366 					     (quick ? GETBLK_NOWAIT : 0),
367 					     0);
368 			if (dio->bp) {
369 				vfs_bio_clrbuf(dio->bp);
370 				if (quick) {
371 					dio->bp->b_flags |= B_CACHE;
372 					bqrelse(dio->bp);
373 					dio->bp = NULL;
374 				}
375 			}
376 			error = 0;
377 		} else if (quick) {
378 			/* do nothing */
379 			error = 0;
380 		} else {
381 			error = bread(hmp->devvp, dio->pbase,
382 				      dio->psize, &dio->bp);
383 		}
384 		if (error) {
385 			brelse(dio->bp);
386 			dio->bp = NULL;
387 		}
388 		hammer2_io_complete(dio, owner);
389 	} else {
390 		error = 0;
391 	}
392 	if (dio->bp) {
393 		if (dozero)
394 			bzero(hammer2_io_data(dio, lbase), lsize);
395 		atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
396 	}
397 	return error;
398 }
399 
400 int
401 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
402 	       hammer2_io_t **diop)
403 {
404 	return(_hammer2_io_new(hmp, lbase, lsize, diop, 1, 0));
405 }
406 
407 int
408 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
409 	       hammer2_io_t **diop)
410 {
411 	return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 0));
412 }
413 
414 int
415 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
416 	       hammer2_io_t **diop)
417 {
418 	return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 1));
419 }
420 
421 int
422 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
423 		hammer2_io_t **diop)
424 {
425 	hammer2_io_t *dio;
426 	off_t peof;
427 	int owner;
428 	int error;
429 
430 	dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
431 	if (owner) {
432 		if (hammer2_cluster_enable) {
433 			peof = (dio->pbase + HAMMER2_SEGMASK64) &
434 			       ~HAMMER2_SEGMASK64;
435 			error = cluster_read(hmp->devvp, peof, dio->pbase,
436 					     dio->psize,
437 					     dio->psize, HAMMER2_PBUFSIZE*4,
438 					     &dio->bp);
439 		} else {
440 			error = bread(hmp->devvp, dio->pbase,
441 				      dio->psize, &dio->bp);
442 		}
443 		if (error) {
444 			brelse(dio->bp);
445 			dio->bp = NULL;
446 		}
447 		hammer2_io_complete(dio, owner);
448 	} else {
449 		error = 0;
450 	}
451 	return error;
452 }
453 
454 void
455 hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
456 		  void (*callback)(hammer2_io_t *dio,
457 				   hammer2_cluster_t *arg_l,
458 				   hammer2_chain_t *arg_c,
459 				   void *arg_p, off_t arg_o),
460 		  hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c,
461 		  void *arg_p, off_t arg_o)
462 {
463 	hammer2_io_t *dio;
464 	int owner;
465 	int error;
466 
467 	dio = hammer2_io_getblk(hmp, lbase, lsize, &owner);
468 	if (owner) {
469 		dio->callback = callback;
470 		dio->arg_l = arg_l;
471 		dio->arg_c = arg_c;
472 		dio->arg_p = arg_p;
473 		dio->arg_o = arg_o;
474 		breadcb(hmp->devvp, dio->pbase, dio->psize,
475 			hammer2_io_callback, dio);
476 	} else {
477 		error = 0;
478 		callback(dio, arg_l, arg_c, arg_p, arg_o);
479 		hammer2_io_bqrelse(&dio);
480 	}
481 }
482 
483 static void
484 hammer2_io_callback(struct bio *bio)
485 {
486 	struct buf *dbp = bio->bio_buf;
487 	hammer2_io_t *dio = bio->bio_caller_info1.ptr;
488 
489 	if ((bio->bio_flags & BIO_DONE) == 0)
490 		bpdone(dbp, 0);
491 	bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
492 	dio->bp = bio->bio_buf;
493 	KKASSERT((dio->bp->b_flags & B_ERROR) == 0); /* XXX */
494 	hammer2_io_complete(dio, HAMMER2_DIO_INPROG);
495 
496 	/*
497 	 * We still have the ref and DIO_GOOD is now set so nothing else
498 	 * should mess with the callback fields until we release the dio.
499 	 */
500 	dio->callback(dio, dio->arg_l, dio->arg_c, dio->arg_p, dio->arg_o);
501 	hammer2_io_bqrelse(&dio);
502 	/* TODO: async load meta-data and assign chain->dio */
503 }
504 
505 void
506 hammer2_io_bawrite(hammer2_io_t **diop)
507 {
508 	atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
509 	hammer2_io_putblk(diop);
510 }
511 
512 void
513 hammer2_io_bdwrite(hammer2_io_t **diop)
514 {
515 	atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
516 	hammer2_io_putblk(diop);
517 }
518 
519 int
520 hammer2_io_bwrite(hammer2_io_t **diop)
521 {
522 	atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
523 	hammer2_io_putblk(diop);
524 	return (0);	/* XXX */
525 }
526 
527 void
528 hammer2_io_setdirty(hammer2_io_t *dio)
529 {
530 	atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
531 }
532 
533 void
534 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
535 {
536 	if ((u_int)dio->psize == bytes)
537 		dio->bp->b_flags |= B_INVAL | B_RELBUF;
538 }
539 
540 void
541 hammer2_io_brelse(hammer2_io_t **diop)
542 {
543 	hammer2_io_putblk(diop);
544 }
545 
546 void
547 hammer2_io_bqrelse(hammer2_io_t **diop)
548 {
549 	hammer2_io_putblk(diop);
550 }
551 
552 int
553 hammer2_io_isdirty(hammer2_io_t *dio)
554 {
555 	return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
556 }
557