1 /* 2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 /* 38 * Implements an abstraction layer for synchronous and asynchronous 39 * buffered device I/O. Can be used for OS-abstraction but the main 40 * purpose is to allow larger buffers to be used against hammer2_chain's 41 * using smaller allocations, without causing deadlocks. 42 * 43 */ 44 static void hammer2_io_callback(struct bio *bio); 45 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 46 47 static int 48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 49 { 50 if (io2->pbase < io1->pbase) 51 return(-1); 52 if (io2->pbase > io1->pbase) 53 return(1); 54 return(0); 55 } 56 57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 59 off_t, pbase); 60 61 struct hammer2_cleanupcb_info { 62 struct hammer2_io_tree tmptree; 63 int count; 64 }; 65 66 67 #define HAMMER2_DIO_INPROG 0x80000000 68 #define HAMMER2_DIO_GOOD 0x40000000 69 #define HAMMER2_DIO_WAITING 0x20000000 70 #define HAMMER2_DIO_DIRTY 0x10000000 71 72 #define HAMMER2_DIO_MASK 0x0FFFFFFF 73 74 /* 75 * Acquire the requested dio, set *ownerp based on state. If state is good 76 * *ownerp is set to 0, otherwise *ownerp is set to DIO_INPROG and the 77 * caller must resolve the buffer. 78 */ 79 hammer2_io_t * 80 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize, int *ownerp) 81 { 82 hammer2_io_t *dio; 83 hammer2_io_t *xio; 84 off_t pbase; 85 off_t pmask; 86 int psize = hammer2_devblksize(lsize); 87 int refs; 88 89 pmask = ~(hammer2_off_t)(psize - 1); 90 91 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 92 lbase &= ~HAMMER2_OFF_MASK_RADIX; 93 pbase = lbase & pmask; 94 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 95 96 /* 97 * Access/Allocate the DIO 98 */ 99 spin_lock_shared(&hmp->io_spin); 100 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 101 if (dio) { 102 if ((atomic_fetchadd_int(&dio->refs, 1) & 103 HAMMER2_DIO_MASK) == 0) { 104 atomic_add_int(&dio->hmp->iofree_count, -1); 105 } 106 spin_unlock_shared(&hmp->io_spin); 107 } else { 108 spin_unlock_shared(&hmp->io_spin); 109 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO); 110 dio->hmp = hmp; 111 dio->pbase = pbase; 112 dio->psize = psize; 113 dio->refs = 1; 114 spin_lock(&hmp->io_spin); 115 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 116 if (xio == NULL) { 117 atomic_add_int(&hammer2_dio_count, 1); 118 spin_unlock(&hmp->io_spin); 119 } else { 120 if ((atomic_fetchadd_int(&xio->refs, 1) & 121 HAMMER2_DIO_MASK) == 0) { 122 atomic_add_int(&xio->hmp->iofree_count, -1); 123 } 124 spin_unlock(&hmp->io_spin); 125 kfree(dio, M_HAMMER2); 126 dio = xio; 127 } 128 } 129 130 /* 131 * Obtain/Validate the buffer. 132 */ 133 for (;;) { 134 refs = dio->refs; 135 cpu_ccfence(); 136 137 /* 138 * Stop if the buffer is good. Once set GOOD the flag cannot 139 * be cleared until refs drops to 0. 140 */ 141 if (refs & HAMMER2_DIO_GOOD) { 142 *ownerp = 0; 143 goto done; 144 } 145 146 /* 147 * We need to acquire the in-progress lock on the buffer 148 */ 149 if (refs & HAMMER2_DIO_INPROG) { 150 tsleep_interlock(dio, 0); 151 if (atomic_cmpset_int(&dio->refs, refs, 152 refs | HAMMER2_DIO_WAITING)) { 153 tsleep(dio, PINTERLOCKED, "h2dio", 0); 154 } 155 /* retry */ 156 } else { 157 if (atomic_cmpset_int(&dio->refs, refs, 158 refs | HAMMER2_DIO_INPROG)) { 159 break; 160 } 161 } 162 /* retry */ 163 } 164 165 /* 166 * We need to do more work before the buffer is usable 167 */ 168 *ownerp = HAMMER2_DIO_INPROG; 169 done: 170 if (dio->act < 5) 171 ++dio->act; 172 return(dio); 173 } 174 175 /* 176 * If part of an asynchronous I/O the asynchronous I/O is biodone()'d. 177 * 178 * If the caller owned INPROG then the dio will be set GOOD or not 179 * depending on whether the caller disposed of dio->bp or not. 180 */ 181 static 182 void 183 hammer2_io_complete(hammer2_io_t *dio, int owner) 184 { 185 int refs; 186 int good; 187 188 while (owner & HAMMER2_DIO_INPROG) { 189 refs = dio->refs; 190 cpu_ccfence(); 191 good = dio->bp ? HAMMER2_DIO_GOOD : 0; 192 if (atomic_cmpset_int(&dio->refs, refs, 193 (refs & ~(HAMMER2_DIO_WAITING | 194 HAMMER2_DIO_INPROG)) | 195 good)) { 196 if (refs & HAMMER2_DIO_WAITING) 197 wakeup(dio); 198 if (good) 199 BUF_KERNPROC(dio->bp); 200 break; 201 } 202 /* retry */ 203 } 204 } 205 206 /* 207 * Release our ref on *diop, dispose of the underlying buffer. 208 */ 209 void 210 hammer2_io_putblk(hammer2_io_t **diop) 211 { 212 hammer2_mount_t *hmp; 213 hammer2_io_t *dio; 214 struct buf *bp; 215 off_t peof; 216 off_t pbase; 217 int psize; 218 int refs; 219 220 dio = *diop; 221 *diop = NULL; 222 223 for (;;) { 224 refs = dio->refs; 225 226 if ((refs & HAMMER2_DIO_MASK) == 1) { 227 KKASSERT((refs & HAMMER2_DIO_INPROG) == 0); 228 if (atomic_cmpset_int(&dio->refs, refs, 229 ((refs - 1) & 230 ~(HAMMER2_DIO_GOOD | 231 HAMMER2_DIO_DIRTY)) | 232 HAMMER2_DIO_INPROG)) { 233 break; 234 } 235 /* retry */ 236 } else { 237 if (atomic_cmpset_int(&dio->refs, refs, refs - 1)) 238 return; 239 /* retry */ 240 } 241 /* retry */ 242 } 243 244 /* 245 * Locked INPROG on 1->0 transition and we cleared DIO_GOOD (which is 246 * legal only on the last ref). This allows us to dispose of the 247 * buffer. refs is now 0. 248 * 249 * The instant we call io_complete dio is a free agent again and 250 * can be ripped out from under us. Acquisition of the dio after 251 * this point will require a shared or exclusive spinlock. 252 */ 253 hmp = dio->hmp; 254 bp = dio->bp; 255 dio->bp = NULL; 256 pbase = dio->pbase; 257 psize = dio->psize; 258 atomic_add_int(&hmp->iofree_count, 1); 259 hammer2_io_complete(dio, HAMMER2_DIO_INPROG); /* clears INPROG */ 260 dio = NULL; /* dio stale */ 261 262 if (refs & HAMMER2_DIO_GOOD) { 263 KKASSERT(bp != NULL); 264 if (refs & HAMMER2_DIO_DIRTY) { 265 if (hammer2_cluster_enable) { 266 peof = (pbase + HAMMER2_SEGMASK64) & 267 ~HAMMER2_SEGMASK64; 268 cluster_write(bp, peof, psize, 4); 269 } else { 270 bp->b_flags |= B_CLUSTEROK; 271 bdwrite(bp); 272 } 273 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 274 brelse(bp); 275 } else { 276 bqrelse(bp); 277 } 278 } 279 280 /* 281 * We cache free buffers so re-use cases can use a shared lock, but 282 * if too many build up we have to clean them out. 283 */ 284 if (hmp->iofree_count > 1000) { 285 struct hammer2_cleanupcb_info info; 286 287 RB_INIT(&info.tmptree); 288 spin_lock(&hmp->io_spin); 289 if (hmp->iofree_count > 1000) { 290 info.count = hmp->iofree_count / 2; 291 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 292 hammer2_io_cleanup_callback, &info); 293 } 294 spin_unlock(&hmp->io_spin); 295 hammer2_io_cleanup(hmp, &info.tmptree); 296 } 297 } 298 299 /* 300 * Cleanup any dio's with no references which are not in-progress. 301 */ 302 static 303 int 304 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 305 { 306 struct hammer2_cleanupcb_info *info = arg; 307 hammer2_io_t *xio; 308 309 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 310 if (dio->act > 0) { 311 --dio->act; 312 return 0; 313 } 314 KKASSERT(dio->bp == NULL); 315 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 316 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 317 KKASSERT(xio == NULL); 318 if (--info->count <= 0) /* limit scan */ 319 return(-1); 320 } 321 return 0; 322 } 323 324 void 325 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree) 326 { 327 hammer2_io_t *dio; 328 329 while ((dio = RB_ROOT(tree)) != NULL) { 330 RB_REMOVE(hammer2_io_tree, tree, dio); 331 KKASSERT(dio->bp == NULL && 332 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 333 kfree(dio, M_HAMMER2); 334 atomic_add_int(&hammer2_dio_count, -1); 335 atomic_add_int(&hmp->iofree_count, -1); 336 } 337 } 338 339 char * 340 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 341 { 342 struct buf *bp; 343 int off; 344 345 bp = dio->bp; 346 KKASSERT(bp != NULL); 347 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 348 KKASSERT(off >= 0 && off < bp->b_bufsize); 349 return(bp->b_data + off); 350 } 351 352 static 353 int 354 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize, 355 hammer2_io_t **diop, int dozero, int quick) 356 { 357 hammer2_io_t *dio; 358 int owner; 359 int error; 360 361 dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner); 362 if (owner) { 363 if (lsize == dio->psize) { 364 dio->bp = getblk(hmp->devvp, 365 dio->pbase, dio->psize, 366 (quick ? GETBLK_NOWAIT : 0), 367 0); 368 if (dio->bp) { 369 vfs_bio_clrbuf(dio->bp); 370 if (quick) { 371 dio->bp->b_flags |= B_CACHE; 372 bqrelse(dio->bp); 373 dio->bp = NULL; 374 } 375 } 376 error = 0; 377 } else if (quick) { 378 /* do nothing */ 379 error = 0; 380 } else { 381 error = bread(hmp->devvp, dio->pbase, 382 dio->psize, &dio->bp); 383 } 384 if (error) { 385 brelse(dio->bp); 386 dio->bp = NULL; 387 } 388 hammer2_io_complete(dio, owner); 389 } else { 390 error = 0; 391 } 392 if (dio->bp) { 393 if (dozero) 394 bzero(hammer2_io_data(dio, lbase), lsize); 395 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY); 396 } 397 return error; 398 } 399 400 int 401 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize, 402 hammer2_io_t **diop) 403 { 404 return(_hammer2_io_new(hmp, lbase, lsize, diop, 1, 0)); 405 } 406 407 int 408 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize, 409 hammer2_io_t **diop) 410 { 411 return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 0)); 412 } 413 414 int 415 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize, 416 hammer2_io_t **diop) 417 { 418 return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 1)); 419 } 420 421 int 422 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize, 423 hammer2_io_t **diop) 424 { 425 hammer2_io_t *dio; 426 off_t peof; 427 int owner; 428 int error; 429 430 dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner); 431 if (owner) { 432 if (hammer2_cluster_enable) { 433 peof = (dio->pbase + HAMMER2_SEGMASK64) & 434 ~HAMMER2_SEGMASK64; 435 error = cluster_read(hmp->devvp, peof, dio->pbase, 436 dio->psize, 437 dio->psize, HAMMER2_PBUFSIZE*4, 438 &dio->bp); 439 } else { 440 error = bread(hmp->devvp, dio->pbase, 441 dio->psize, &dio->bp); 442 } 443 if (error) { 444 brelse(dio->bp); 445 dio->bp = NULL; 446 } 447 hammer2_io_complete(dio, owner); 448 } else { 449 error = 0; 450 } 451 return error; 452 } 453 454 void 455 hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize, 456 void (*callback)(hammer2_io_t *dio, 457 hammer2_cluster_t *arg_l, 458 hammer2_chain_t *arg_c, 459 void *arg_p, off_t arg_o), 460 hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c, 461 void *arg_p, off_t arg_o) 462 { 463 hammer2_io_t *dio; 464 int owner; 465 int error; 466 467 dio = hammer2_io_getblk(hmp, lbase, lsize, &owner); 468 if (owner) { 469 dio->callback = callback; 470 dio->arg_l = arg_l; 471 dio->arg_c = arg_c; 472 dio->arg_p = arg_p; 473 dio->arg_o = arg_o; 474 breadcb(hmp->devvp, dio->pbase, dio->psize, 475 hammer2_io_callback, dio); 476 } else { 477 error = 0; 478 callback(dio, arg_l, arg_c, arg_p, arg_o); 479 hammer2_io_bqrelse(&dio); 480 } 481 } 482 483 static void 484 hammer2_io_callback(struct bio *bio) 485 { 486 struct buf *dbp = bio->bio_buf; 487 hammer2_io_t *dio = bio->bio_caller_info1.ptr; 488 489 if ((bio->bio_flags & BIO_DONE) == 0) 490 bpdone(dbp, 0); 491 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC); 492 dio->bp = bio->bio_buf; 493 KKASSERT((dio->bp->b_flags & B_ERROR) == 0); /* XXX */ 494 hammer2_io_complete(dio, HAMMER2_DIO_INPROG); 495 496 /* 497 * We still have the ref and DIO_GOOD is now set so nothing else 498 * should mess with the callback fields until we release the dio. 499 */ 500 dio->callback(dio, dio->arg_l, dio->arg_c, dio->arg_p, dio->arg_o); 501 hammer2_io_bqrelse(&dio); 502 /* TODO: async load meta-data and assign chain->dio */ 503 } 504 505 void 506 hammer2_io_bawrite(hammer2_io_t **diop) 507 { 508 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 509 hammer2_io_putblk(diop); 510 } 511 512 void 513 hammer2_io_bdwrite(hammer2_io_t **diop) 514 { 515 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 516 hammer2_io_putblk(diop); 517 } 518 519 int 520 hammer2_io_bwrite(hammer2_io_t **diop) 521 { 522 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 523 hammer2_io_putblk(diop); 524 return (0); /* XXX */ 525 } 526 527 void 528 hammer2_io_setdirty(hammer2_io_t *dio) 529 { 530 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY); 531 } 532 533 void 534 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes) 535 { 536 if ((u_int)dio->psize == bytes) 537 dio->bp->b_flags |= B_INVAL | B_RELBUF; 538 } 539 540 void 541 hammer2_io_brelse(hammer2_io_t **diop) 542 { 543 hammer2_io_putblk(diop); 544 } 545 546 void 547 hammer2_io_bqrelse(hammer2_io_t **diop) 548 { 549 hammer2_io_putblk(diop); 550 } 551 552 int 553 hammer2_io_isdirty(hammer2_io_t *dio) 554 { 555 return((dio->refs & HAMMER2_DIO_DIRTY) != 0); 556 } 557