xref: /dflybsd-src/sys/vfs/hammer2/hammer2_io.c (revision 5ca0a96d6c3bf50926197b4bb92af7969ed3528a)
1  /*
2   * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3   *
4   * This code is derived from software contributed to The DragonFly Project
5   * by Matthew Dillon <dillon@dragonflybsd.org>
6   *
7   * Redistribution and use in source and binary forms, with or without
8   * modification, are permitted provided that the following conditions
9   * are met:
10   *
11   * 1. Redistributions of source code must retain the above copyright
12   *    notice, this list of conditions and the following disclaimer.
13   * 2. Redistributions in binary form must reproduce the above copyright
14   *    notice, this list of conditions and the following disclaimer in
15   *    the documentation and/or other materials provided with the
16   *    distribution.
17   * 3. Neither the name of The DragonFly Project nor the names of its
18   *    contributors may be used to endorse or promote products derived
19   *    from this software without specific, prior written permission.
20   *
21   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22   * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25   * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26   * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29   * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32   * SUCH DAMAGE.
33   */
34  
35  #include "hammer2.h"
36  
37  #define HAMMER2_DOP_READ	1
38  #define HAMMER2_DOP_NEW		2
39  #define HAMMER2_DOP_NEWNZ	3
40  #define HAMMER2_DOP_READQ	4
41  
42  /*
43   * Implements an abstraction layer for synchronous and asynchronous
44   * buffered device I/O.  Can be used as an OS-abstraction but the main
45   * purpose is to allow larger buffers to be used against hammer2_chain's
46   * using smaller allocations, without causing deadlocks.
47   *
48   * The DIOs also record temporary state with limited persistence.  This
49   * feature is used to keep track of dedupable blocks.
50   */
51  static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
52  static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp);
53  
54  static int
55  hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
56  {
57  	if (io1->pbase < io2->pbase)
58  		return(-1);
59  	if (io1->pbase > io2->pbase)
60  		return(1);
61  	return(0);
62  }
63  
64  RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
65  RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
66  		off_t, pbase);
67  
68  struct hammer2_cleanupcb_info {
69  	struct hammer2_io_tree tmptree;
70  	int	count;
71  };
72  
73  #if 0
74  static __inline
75  uint64_t
76  hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes)
77  {
78  	uint64_t mask;
79  	int i;
80  
81  	if (bytes < 1024)	/* smaller chunks not supported */
82  		return 0;
83  
84  	/*
85  	 * Calculate crc check mask for larger chunks
86  	 */
87  	i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) &
88  	     HAMMER2_PBUFMASK) >> 10;
89  	if (i == 0 && bytes == HAMMER2_PBUFSIZE)
90  		return((uint64_t)-1);
91  	mask = ((uint64_t)1U << (bytes >> 10)) - 1;
92  	mask <<= i;
93  
94  	return mask;
95  }
96  #endif
97  
98  #ifdef HAMMER2_IO_DEBUG
99  
100  static __inline void
101  DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
102  {
103  	int i;
104  
105  	i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK;
106  
107  	dio->debug_file[i] = file;
108  	dio->debug_line[i] = line;
109  	dio->debug_refs[i] = dio->refs;
110  	dio->debug_td[i] = curthread;
111  }
112  
113  #else
114  
115  #define DIO_RECORD(dio)
116  
117  #endif
118  
119  /*
120   * Returns the DIO corresponding to the data|radix, creating it if necessary.
121   *
122   * If createit is 0, NULL can be returned indicating that the DIO does not
123   * exist.  (btype) is ignored when createit is 0.
124   */
125  static __inline
126  hammer2_io_t *
127  hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype,
128  		 int createit, int *isgoodp)
129  {
130  	hammer2_io_t *dio;
131  	hammer2_io_t *xio;
132  	hammer2_key_t lbase;
133  	hammer2_key_t pbase;
134  	hammer2_key_t pmask;
135  	hammer2_volume_t *vol;
136  	uint64_t refs;
137  	int lsize;
138  	int psize;
139  
140  	psize = HAMMER2_PBUFSIZE;
141  	pmask = ~(hammer2_off_t)(psize - 1);
142  	if ((int)(data_off & HAMMER2_OFF_MASK_RADIX))
143  		lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX);
144  	else
145  		lsize = 0;
146  	lbase = data_off & ~HAMMER2_OFF_MASK_RADIX;
147  	pbase = lbase & pmask;
148  
149  	if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) {
150  		kprintf("Illegal: %016jx %016jx+%08x / %016jx\n",
151  			pbase, lbase, lsize, pmask);
152  	}
153  	KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
154  	*isgoodp = 0;
155  
156  	/*
157  	 * Access/Allocate the DIO, bump dio->refs to prevent destruction.
158  	 *
159  	 * If DIO_GOOD is set the ref should prevent it from being cleared
160  	 * out from under us, we can set *isgoodp, and the caller can operate
161  	 * on the buffer without any further interaction.
162  	 */
163  	hammer2_spin_sh(&hmp->io_spin);
164  	dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
165  	if (dio) {
166  		refs = atomic_fetchadd_64(&dio->refs, 1);
167  		if ((refs & HAMMER2_DIO_MASK) == 0) {
168  			atomic_add_int(&dio->hmp->iofree_count, -1);
169  		}
170  		if (refs & HAMMER2_DIO_GOOD)
171  			*isgoodp = 1;
172  		hammer2_spin_unsh(&hmp->io_spin);
173  	} else if (createit) {
174  		refs = 0;
175  		hammer2_spin_unsh(&hmp->io_spin);
176  		vol = hammer2_get_volume(hmp, pbase);
177  		dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
178  		dio->hmp = hmp;
179  		dio->devvp = vol->dev->devvp;
180  		dio->dbase = vol->offset;
181  		KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0);
182  		dio->pbase = pbase;
183  		dio->psize = psize;
184  		dio->btype = btype;
185  		dio->refs = refs + 1;
186  		dio->act = 5;
187  		hammer2_spin_ex(&hmp->io_spin);
188  		xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
189  		if (xio == NULL) {
190  			atomic_add_int(&hammer2_dio_count, 1);
191  			hammer2_spin_unex(&hmp->io_spin);
192  		} else {
193  			refs = atomic_fetchadd_64(&xio->refs, 1);
194  			if ((refs & HAMMER2_DIO_MASK) == 0)
195  				atomic_add_int(&xio->hmp->iofree_count, -1);
196  			if (refs & HAMMER2_DIO_GOOD)
197  				*isgoodp = 1;
198  			hammer2_spin_unex(&hmp->io_spin);
199  			kfree(dio, M_HAMMER2);
200  			dio = xio;
201  		}
202  	} else {
203  		hammer2_spin_unsh(&hmp->io_spin);
204  		return NULL;
205  	}
206  	dio->ticks = ticks;
207  	if (dio->act < 10)
208  		++dio->act;
209  
210  	return dio;
211  }
212  
213  /*
214   * Acquire the requested dio.  If DIO_GOOD is not set we must instantiate
215   * a buffer.  If set the buffer already exists and is good to go.
216   */
217  hammer2_io_t *
218  _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase,
219  		   int lsize, int op HAMMER2_IO_DEBUG_ARGS)
220  {
221  	hammer2_io_t *dio;
222  	hammer2_off_t dev_pbase;
223  	off_t peof;
224  	uint64_t orefs;
225  	uint64_t nrefs;
226  	int isgood;
227  	int error;
228  	int hce;
229  	int bflags;
230  
231  	bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0);
232  	bflags |= B_KVABIO;
233  
234  	KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
235  
236  	if (op == HAMMER2_DOP_READQ) {
237  		dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood);
238  		if (dio == NULL)
239  			return NULL;
240  		op = HAMMER2_DOP_READ;
241  	} else {
242  		dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood);
243  	}
244  
245  	for (;;) {
246  		orefs = dio->refs;
247  		cpu_ccfence();
248  
249  		/*
250  		 * Buffer is already good, handle the op and return.
251  		 */
252  		if (orefs & HAMMER2_DIO_GOOD) {
253  			if (isgood == 0)
254  				cpu_mfence();
255  			bkvasync(dio->bp);
256  
257  			switch(op) {
258  			case HAMMER2_DOP_NEW:
259  				bzero(hammer2_io_data(dio, lbase), lsize);
260  				/* fall through */
261  			case HAMMER2_DOP_NEWNZ:
262  				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
263  				break;
264  			case HAMMER2_DOP_READ:
265  			default:
266  				/* nothing to do */
267  				break;
268  			}
269  			DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
270  			return (dio);
271  		}
272  
273  		/*
274  		 * Try to own the DIO
275  		 */
276  		if (orefs & HAMMER2_DIO_INPROG) {
277  			nrefs = orefs | HAMMER2_DIO_WAITING;
278  			tsleep_interlock(dio, 0);
279  			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
280  				tsleep(dio, PINTERLOCKED, "h2dio", hz);
281  			}
282  			/* retry */
283  		} else {
284  			nrefs = orefs | HAMMER2_DIO_INPROG;
285  			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
286  				break;
287  			}
288  		}
289  	}
290  
291  	/*
292  	 * We break to here if GOOD is not set and we acquired INPROG for
293  	 * the I/O.
294  	 */
295  	KKASSERT(dio->bp == NULL);
296  	if (btype == HAMMER2_BREF_TYPE_DATA)
297  		hce = hammer2_cluster_data_read;
298  	else
299  		hce = hammer2_cluster_meta_read;
300  
301  	error = 0;
302  	dev_pbase = dio->pbase - dio->dbase;
303  	if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) &&
304  	    dio->psize == lsize) {
305  		switch(op) {
306  		case HAMMER2_DOP_NEW:
307  		case HAMMER2_DOP_NEWNZ:
308  			dio->bp = getblk(dio->devvp,
309  					 dev_pbase, dio->psize,
310  					 GETBLK_KVABIO, 0);
311  			if (op == HAMMER2_DOP_NEW) {
312  				bkvasync(dio->bp);
313  				bzero(dio->bp->b_data, dio->psize);
314  			}
315  			atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
316  			break;
317  		case HAMMER2_DOP_READ:
318  		default:
319  			KKASSERT(dio->bp == NULL);
320  			if (hce > 0) {
321  				/*
322  				 * Synchronous cluster I/O for now.
323  				 */
324  				peof = (dio->pbase + HAMMER2_SEGMASK64) &
325  				       ~HAMMER2_SEGMASK64;
326  				peof -= dio->dbase;
327  				error = cluster_readx(dio->devvp,
328  						     peof, dev_pbase,
329  						     dio->psize, bflags,
330  						     dio->psize,
331  						     HAMMER2_PBUFSIZE*hce,
332  						     &dio->bp);
333  			} else {
334  				error = breadnx(dio->devvp, dev_pbase,
335  						dio->psize, bflags,
336  					        NULL, NULL, 0, &dio->bp);
337  			}
338  		}
339  	} else {
340  		if (hce > 0) {
341  			/*
342  			 * Synchronous cluster I/O for now.
343  			 */
344  			peof = (dio->pbase + HAMMER2_SEGMASK64) &
345  			       ~HAMMER2_SEGMASK64;
346  			peof -= dio->dbase;
347  			error = cluster_readx(dio->devvp,
348  					      peof, dev_pbase, dio->psize,
349  					      bflags,
350  					      dio->psize, HAMMER2_PBUFSIZE*hce,
351  					      &dio->bp);
352  		} else {
353  			error = breadnx(dio->devvp, dev_pbase,
354  				        dio->psize, bflags,
355  					NULL, NULL, 0, &dio->bp);
356  		}
357  		if (dio->bp) {
358  			/*
359  			 * Handle NEW flags
360  			 */
361  			switch(op) {
362  			case HAMMER2_DOP_NEW:
363  				bkvasync(dio->bp);
364  				bzero(hammer2_io_data(dio, lbase), lsize);
365  				/* fall through */
366  			case HAMMER2_DOP_NEWNZ:
367  				atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY);
368  				break;
369  			case HAMMER2_DOP_READ:
370  			default:
371  				break;
372  			}
373  
374  			/*
375  			 * Tell the kernel that the buffer cache is not
376  			 * meta-data based on the btype.  This allows
377  			 * swapcache to distinguish between data and
378  			 * meta-data.
379  			 */
380  			switch(btype) {
381  			case HAMMER2_BREF_TYPE_DATA:
382  				dio->bp->b_flags |= B_NOTMETA;
383  				break;
384  			default:
385  				break;
386  			}
387  		}
388  	}
389  
390  	if (dio->bp) {
391  		bkvasync(dio->bp);
392  		BUF_KERNPROC(dio->bp);
393  		dio->bp->b_flags &= ~B_AGE;
394  		/* dio->bp->b_debug_info2 = dio; */
395  	}
396  	dio->error = error;
397  
398  	/*
399  	 * Clear INPROG and WAITING, set GOOD wake up anyone waiting.
400  	 */
401  	for (;;) {
402  		orefs = dio->refs;
403  		cpu_ccfence();
404  		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING);
405  		if (error == 0)
406  			nrefs |= HAMMER2_DIO_GOOD;
407  		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
408  			if (orefs & HAMMER2_DIO_WAITING)
409  				wakeup(dio);
410  			break;
411  		}
412  		cpu_pause();
413  	}
414  
415  	/* XXX error handling */
416  	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
417  
418  	return dio;
419  }
420  
421  /*
422   * Release our ref on *diop.
423   *
424   * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose
425   * of dio->bp.  Then we clean up DIO_INPROG and DIO_WAITING.
426   */
427  void
428  _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
429  {
430  	hammer2_dev_t *hmp;
431  	hammer2_io_t *dio;
432  	struct buf *bp;
433  	off_t pbase;
434  	int psize;
435  	int dio_limit;
436  	uint64_t orefs;
437  	uint64_t nrefs;
438  
439  	dio = *diop;
440  	*diop = NULL;
441  	hmp = dio->hmp;
442  	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
443  
444  	KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0);
445  
446  	/*
447  	 * Drop refs.
448  	 *
449  	 * On the 1->0 transition clear GOOD and set INPROG, and break.
450  	 * On any other transition we can return early.
451  	 */
452  	for (;;) {
453  		orefs = dio->refs;
454  		cpu_ccfence();
455  
456  		if ((orefs & HAMMER2_DIO_MASK) == 1 &&
457  		    (orefs & HAMMER2_DIO_INPROG) == 0) {
458  			/*
459  			 * Lastdrop case, INPROG can be set.  GOOD must be
460  			 * cleared to prevent the getblk shortcut.
461  			 */
462  			nrefs = orefs - 1;
463  			nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY);
464  			nrefs |= HAMMER2_DIO_INPROG;
465  			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
466  				break;
467  		} else if ((orefs & HAMMER2_DIO_MASK) == 1) {
468  			/*
469  			 * Lastdrop case, INPROG already set.  We must
470  			 * wait for INPROG to clear.
471  			 */
472  			nrefs = orefs | HAMMER2_DIO_WAITING;
473  			tsleep_interlock(dio, 0);
474  			if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
475  				tsleep(dio, PINTERLOCKED, "h2dio", hz);
476  			}
477  			/* retry */
478  		} else {
479  			/*
480  			 * Normal drop case.
481  			 */
482  			nrefs = orefs - 1;
483  			if (atomic_cmpset_64(&dio->refs, orefs, nrefs))
484  				return;
485  			/* retry */
486  		}
487  		cpu_pause();
488  		/* retry */
489  	}
490  
491  	/*
492  	 * Lastdrop (1->0 transition).  INPROG has been set, GOOD and DIRTY
493  	 * have been cleared.  iofree_count has not yet been incremented,
494  	 * note that another accessor race will decrement iofree_count so
495  	 * we have to increment it regardless.
496  	 *
497  	 * We can now dispose of the buffer, and should do it before calling
498  	 * io_complete() in case there's a race against a new reference
499  	 * which causes io_complete() to chain and instantiate the bp again.
500  	 */
501  	pbase = dio->pbase;
502  	psize = dio->psize;
503  	bp = dio->bp;
504  	dio->bp = NULL;
505  
506  	if ((orefs & HAMMER2_DIO_GOOD) && bp) {
507  		/*
508  		 * Non-errored disposal of bp
509  		 */
510  		if (orefs & HAMMER2_DIO_DIRTY) {
511  			dio_write_stats_update(dio, bp);
512  
513  			/*
514  			 * Allows dirty buffers to accumulate and
515  			 * possibly be canceled (e.g. by a 'rm'),
516  			 * by default we will burst-write later.
517  			 *
518  			 * We generally do NOT want to issue an actual
519  			 * b[a]write() or cluster_write() here.  Due to
520  			 * the way chains are locked, buffers may be cycled
521  			 * in and out quite often and disposal here can cause
522  			 * multiple writes or write-read stalls.
523  			 *
524  			 * If FLUSH is set we do want to issue the actual
525  			 * write.  This typically occurs in the write-behind
526  			 * case when writing to large files.
527  			 */
528  			off_t peof;
529  			int hce;
530  			if (dio->refs & HAMMER2_DIO_FLUSH) {
531  				if ((hce = hammer2_cluster_write) != 0) {
532  					peof = (pbase + HAMMER2_SEGMASK64) &
533  					       ~HAMMER2_SEGMASK64;
534  					peof -= dio->dbase;
535  					bp->b_flags |= B_CLUSTEROK;
536  					cluster_write(bp, peof, psize, hce);
537  				} else {
538  					bp->b_flags &= ~B_CLUSTEROK;
539  					bawrite(bp);
540  				}
541  			} else {
542  				bp->b_flags &= ~B_CLUSTEROK;
543  				bdwrite(bp);
544  			}
545  		} else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
546  			brelse(bp);
547  		} else {
548  			bqrelse(bp);
549  		}
550  	} else if (bp) {
551  		/*
552  		 * Errored disposal of bp
553  		 */
554  		brelse(bp);
555  	}
556  
557  	/*
558  	 * Update iofree_count before disposing of the dio
559  	 */
560  	hmp = dio->hmp;
561  	atomic_add_int(&hmp->iofree_count, 1);
562  
563  	/*
564  	 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear).
565  	 *
566  	 * Also clear FLUSH as it was handled above.
567  	 */
568  	for (;;) {
569  		orefs = dio->refs;
570  		cpu_ccfence();
571  		nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD |
572  				  HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH);
573  		if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) {
574  			if (orefs & HAMMER2_DIO_WAITING)
575  				wakeup(dio);
576  			break;
577  		}
578  		cpu_pause();
579  	}
580  
581  	/*
582  	 * We cache free buffers so re-use cases can use a shared lock, but
583  	 * if too many build up we have to clean them out.
584  	 */
585  	dio_limit = hammer2_dio_limit;
586  	if (dio_limit < 256)
587  		dio_limit = 256;
588  	if (dio_limit > 1024*1024)
589  		dio_limit = 1024*1024;
590  	if (hmp->iofree_count > dio_limit) {
591  		struct hammer2_cleanupcb_info info;
592  
593  		RB_INIT(&info.tmptree);
594  		hammer2_spin_ex(&hmp->io_spin);
595  		if (hmp->iofree_count > dio_limit) {
596  			info.count = hmp->iofree_count / 5;
597  			RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
598  				hammer2_io_cleanup_callback, &info);
599  		}
600  		hammer2_spin_unex(&hmp->io_spin);
601  		hammer2_io_cleanup(hmp, &info.tmptree);
602  	}
603  }
604  
605  /*
606   * Cleanup any dio's with (INPROG | refs) == 0.
607   *
608   * Called to clean up cached DIOs on umount after all activity has been
609   * flushed.
610   */
611  static
612  int
613  hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
614  {
615  	struct hammer2_cleanupcb_info *info = arg;
616  	hammer2_io_t *xio;
617  
618  	if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
619  		if (dio->act > 0) {
620  			int act;
621  
622  			act = dio->act - (ticks - dio->ticks) / hz - 1;
623  			if (act > 0) {
624  				dio->act = act;
625  				return 0;
626  			}
627  			dio->act = 0;
628  		}
629  		KKASSERT(dio->bp == NULL);
630  		if (info->count > 0) {
631  			RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
632  			xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
633  			KKASSERT(xio == NULL);
634  			--info->count;
635  		}
636  	}
637  	return 0;
638  }
639  
640  void
641  hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree)
642  {
643  	hammer2_io_t *dio;
644  
645  	while ((dio = RB_ROOT(tree)) != NULL) {
646  		RB_REMOVE(hammer2_io_tree, tree, dio);
647  		KKASSERT(dio->bp == NULL &&
648  		    (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
649  		if (dio->refs & HAMMER2_DIO_DIRTY) {
650  			kprintf("hammer2_io_cleanup: Dirty buffer "
651  				"%016jx/%d (bp=%p)\n",
652  				dio->pbase, dio->psize, dio->bp);
653  		}
654  		kfree(dio, M_HAMMER2);
655  		atomic_add_int(&hammer2_dio_count, -1);
656  		atomic_add_int(&hmp->iofree_count, -1);
657  	}
658  }
659  
660  /*
661   * Returns a pointer to the requested data.
662   */
663  char *
664  hammer2_io_data(hammer2_io_t *dio, off_t lbase)
665  {
666  	struct buf *bp;
667  	int off;
668  
669  	bp = dio->bp;
670  	KKASSERT(bp != NULL);
671  	bkvasync(bp);
672  	lbase -= dio->dbase;
673  	off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
674  	KKASSERT(off >= 0 && off < bp->b_bufsize);
675  	return(bp->b_data + off);
676  }
677  
678  int
679  hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
680  	       hammer2_io_t **diop)
681  {
682  	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW);
683  	return ((*diop)->error);
684  }
685  
686  int
687  hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
688  		 hammer2_io_t **diop)
689  {
690  	*diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ);
691  	return ((*diop)->error);
692  }
693  
694  int
695  _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize,
696  		hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
697  {
698  #ifdef HAMMER2_IO_DEBUG
699  	hammer2_io_t *dio;
700  #endif
701  
702  	*diop = _hammer2_io_getblk(hmp, btype, lbase, lsize,
703  				   HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL);
704  #ifdef HAMMER2_IO_DEBUG
705  	if ((dio = *diop) != NULL) {
706  		int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK;
707  		dio->debug_data[i] = debug_data;
708  	}
709  #endif
710  	return ((*diop)->error);
711  }
712  
713  hammer2_io_t *
714  _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase,
715  		     int lsize HAMMER2_IO_DEBUG_ARGS)
716  {
717  	hammer2_io_t *dio;
718  
719  	dio = _hammer2_io_getblk(hmp, 0, lbase, lsize,
720  				 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL);
721  	return dio;
722  }
723  
724  void
725  _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
726  {
727  	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
728  				      HAMMER2_DIO_FLUSH);
729  	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
730  }
731  
732  void
733  _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
734  {
735  	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY);
736  	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
737  }
738  
739  int
740  _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
741  {
742  	atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY |
743  				      HAMMER2_DIO_FLUSH);
744  	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
745  	return (0);	/* XXX */
746  }
747  
748  void
749  hammer2_io_setdirty(hammer2_io_t *dio)
750  {
751  	atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY);
752  }
753  
754  /*
755   * This routine is called when a MODIFIED chain is being DESTROYED,
756   * in an attempt to allow the related buffer cache buffer to be
757   * invalidated and discarded instead of flushing it to disk.
758   *
759   * At the moment this case is only really useful for file meta-data.
760   * File data is already handled via the logical buffer cache associated
761   * with the vnode, and will be discarded if it was never flushed to disk.
762   * File meta-data may include inodes, directory entries, and indirect blocks.
763   *
764   * XXX
765   * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being
766   * invalidated might be smaller.  Most of the meta-data structures above
767   * are in the 'smaller' category.  For now, don't try to invalidate the
768   * data areas.
769   */
770  void
771  hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes)
772  {
773  	/* NOP */
774  }
775  
776  void
777  _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
778  {
779  	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
780  }
781  
782  void
783  _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS)
784  {
785  	_hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL);
786  }
787  
788  /*
789   * Set dedup validation bits in a DIO.  We do not need the buffer cache
790   * buffer for this.  This must be done concurrent with setting bits in
791   * the freemap so as to interlock with bulkfree's clearing of those bits.
792   */
793  void
794  hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref)
795  {
796  	hammer2_io_t *dio;
797  	uint64_t mask;
798  	int lsize;
799  	int isgood;
800  
801  	dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood);
802  	if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX))
803  		lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
804  	else
805  		lsize = 0;
806  	mask = hammer2_dedup_mask(dio, bref->data_off, lsize);
807  	atomic_clear_64(&dio->dedup_valid, mask);
808  	atomic_set_64(&dio->dedup_alloc, mask);
809  	hammer2_io_putblk(&dio);
810  }
811  
812  /*
813   * Clear dedup validation bits in a DIO.  This is typically done when
814   * a modified chain is destroyed or by the bulkfree code.  No buffer
815   * is needed for this operation.  If the DIO no longer exists it is
816   * equivalent to the bits not being set.
817   */
818  void
819  hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype,
820  			hammer2_off_t data_off, u_int bytes)
821  {
822  	hammer2_io_t *dio;
823  	uint64_t mask;
824  	int isgood;
825  
826  	if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
827  		return;
828  	if (btype != HAMMER2_BREF_TYPE_DATA)
829  		return;
830  	dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood);
831  	if (dio) {
832  		if (data_off < dio->pbase ||
833  		    (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes >
834  		    dio->pbase + dio->psize) {
835  			panic("hammer2_io_dedup_delete: DATAOFF BAD "
836  			      "%016jx/%d %016jx\n",
837  			      data_off, bytes, dio->pbase);
838  		}
839  		mask = hammer2_dedup_mask(dio, data_off, bytes);
840  		atomic_clear_64(&dio->dedup_alloc, mask);
841  		atomic_clear_64(&dio->dedup_valid, mask);
842  		hammer2_io_putblk(&dio);
843  	}
844  }
845  
846  /*
847   * Assert that dedup allocation bits in a DIO are not set.  This operation
848   * does not require a buffer.  The DIO does not need to exist.
849   */
850  void
851  hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes)
852  {
853  	hammer2_io_t *dio;
854  	int isgood;
855  
856  	dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA,
857  			       0, &isgood);
858  	if (dio) {
859  		KASSERT((dio->dedup_alloc &
860  			  hammer2_dedup_mask(dio, data_off, bytes)) == 0,
861  			("hammer2_dedup_assert: %016jx/%d %016jx/%016jx",
862  			data_off,
863  			bytes,
864  			hammer2_dedup_mask(dio, data_off, bytes),
865  			dio->dedup_alloc));
866  		hammer2_io_putblk(&dio);
867  	}
868  }
869  
870  static
871  void
872  dio_write_stats_update(hammer2_io_t *dio, struct buf *bp)
873  {
874  	if (bp->b_flags & B_DELWRI)
875  		return;
876  	hammer2_adjwritecounter(dio->btype, dio->psize);
877  }
878  
879  void
880  hammer2_io_bkvasync(hammer2_io_t *dio)
881  {
882  	KKASSERT(dio->bp != NULL);
883  	bkvasync(dio->bp);
884  }
885  
886  /*
887   * Ref a dio that is already owned
888   */
889  void
890  _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS)
891  {
892  	DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL);
893  	atomic_add_64(&dio->refs, 1);
894  }
895