1 /* $NetBSD: udf_strat_rmw.c,v 1.30 2022/01/15 10:55:53 msaitoh Exp $ */ 2 3 /* 4 * Copyright (c) 2006, 2008 Reinoud Zandijk 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 */ 28 29 #include <sys/cdefs.h> 30 #ifndef lint 31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.30 2022/01/15 10:55:53 msaitoh Exp $"); 32 #endif /* not lint */ 33 34 35 #if defined(_KERNEL_OPT) 36 #include "opt_compat_netbsd.h" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/sysctl.h> 42 #include <sys/namei.h> 43 #include <sys/proc.h> 44 #include <sys/kernel.h> 45 #include <sys/vnode.h> 46 #include <miscfs/genfs/genfs_node.h> 47 #include <sys/mount.h> 48 #include <sys/buf.h> 49 #include <sys/file.h> 50 #include <sys/device.h> 51 #include <sys/disklabel.h> 52 #include <sys/ioctl.h> 53 #include <sys/malloc.h> 54 #include <sys/dirent.h> 55 #include <sys/stat.h> 56 #include <sys/conf.h> 57 #include <sys/kauth.h> 58 #include <sys/kthread.h> 59 #include <dev/clock_subr.h> 60 61 #include <fs/udf/ecma167-udf.h> 62 #include <fs/udf/udf_mount.h> 63 64 #include "udf.h" 65 #include "udf_subr.h" 66 #include "udf_bswap.h" 67 68 69 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data) 70 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private) 71 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private)) 72 73 /* --------------------------------------------------------------------- */ 74 75 #define UDF_MAX_PACKET_SIZE 64 /* DONT change this */ 76 77 /* sheduler states */ 78 #define UDF_SHED_WAITING 1 /* waiting on timeout */ 79 #define UDF_SHED_READING 2 80 #define UDF_SHED_WRITING 3 81 #define UDF_SHED_SEQWRITING 4 82 #define UDF_SHED_IDLE 5 /* refcnt'd */ 83 #define UDF_SHED_FREE 6 /* recycleable */ 84 #define UDF_SHED_MAX 6+1 85 86 /* flags */ 87 #define ECC_LOCKED 0x01 /* prevent access */ 88 #define ECC_WANTED 0x02 /* trying access */ 89 #define ECC_SEQWRITING 0x04 /* sequential queue */ 90 #define ECC_FLOATING 0x08 /* not queued yet */ 91 92 #define ECC_WAITTIME 10 93 94 95 TAILQ_HEAD(ecclineq, udf_eccline); 96 struct udf_eccline { 97 struct udf_mount *ump; 98 uint64_t present; /* preserve these */ 99 uint64_t readin; /* bitmap */ 100 uint64_t dirty; /* bitmap */ 101 uint64_t error; /* bitmap */ 102 uint32_t refcnt; 103 104 struct timespec wait_time; 105 uint32_t flags; 106 uint32_t start_sector; /* physical */ 107 108 const char *fname; 109 int sline; 110 111 struct buf *buf; 112 void *blob; 113 114 struct buf *bufs[UDF_MAX_PACKET_SIZE]; 115 uint32_t bufs_bpos[UDF_MAX_PACKET_SIZE]; 116 int bufs_len[UDF_MAX_PACKET_SIZE]; 117 118 int queued_on; /* on which BUFQ list */ 119 LIST_ENTRY(udf_eccline) hashchain; /* on sector lookup */ 120 }; 121 122 123 struct strat_private { 124 lwp_t *queue_lwp; 125 kcondvar_t discstrat_cv; /* to wait on */ 126 kmutex_t discstrat_mutex; /* disc strategy */ 127 kmutex_t seqwrite_mutex; /* protect mappings */ 128 129 int thread_running; /* thread control */ 130 int run_thread; /* thread control */ 131 int thread_finished; /* thread control */ 132 int cur_queue; 133 134 int num_floating; 135 int num_queued[UDF_SHED_MAX]; 136 struct bufq_state *queues[UDF_SHED_MAX]; 137 struct timespec last_queued[UDF_SHED_MAX]; 138 struct disk_strategy old_strategy_setting; 139 140 struct pool eccline_pool; 141 struct pool ecclineblob_pool; 142 LIST_HEAD(, udf_eccline) eccline_hash[UDF_ECCBUF_HASHSIZE]; 143 }; 144 145 /* --------------------------------------------------------------------- */ 146 147 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline, __FILE__, __LINE__) 148 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline, __FILE__, __LINE__) 149 150 /* can be called with or without discstrat lock */ 151 static void 152 udf_lock_eccline(struct udf_eccline *eccline, const char *fname, int sline) 153 { 154 struct strat_private *priv = PRIV(eccline->ump); 155 int waslocked, ret; 156 157 KASSERT(mutex_owned(&priv->discstrat_mutex)); 158 159 waslocked = mutex_owned(&priv->discstrat_mutex); 160 if (!waslocked) 161 mutex_enter(&priv->discstrat_mutex); 162 163 /* wait until its unlocked first */ 164 eccline->refcnt++; 165 while (eccline->flags & ECC_LOCKED) { 166 DPRINTF(ECCLINE, ("waiting for lock at %s:%d\n", 167 fname, sline)); 168 DPRINTF(ECCLINE, ("was locked at %s:%d\n", 169 eccline->fname, eccline->sline)); 170 eccline->flags |= ECC_WANTED; 171 ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, 172 hz/8); 173 if (ret == EWOULDBLOCK) 174 DPRINTF(LOCKING, ("eccline lock held, waiting for " 175 "release")); 176 } 177 eccline->flags |= ECC_LOCKED; 178 eccline->flags &= ~ECC_WANTED; 179 eccline->refcnt--; 180 181 eccline->fname = fname; 182 eccline->sline = sline; 183 184 if (!waslocked) 185 mutex_exit(&priv->discstrat_mutex); 186 } 187 188 189 /* can be called with or without discstrat lock */ 190 static void 191 udf_unlock_eccline(struct udf_eccline *eccline, const char *fname, int sline) 192 { 193 struct strat_private *priv = PRIV(eccline->ump); 194 int waslocked; 195 196 KASSERT(mutex_owned(&priv->discstrat_mutex)); 197 198 waslocked = mutex_owned(&priv->discstrat_mutex); 199 if (!waslocked) 200 mutex_enter(&priv->discstrat_mutex); 201 202 eccline->flags &= ~ECC_LOCKED; 203 cv_broadcast(&priv->discstrat_cv); 204 205 if (!waslocked) 206 mutex_exit(&priv->discstrat_mutex); 207 } 208 209 210 /* NOTE discstrat_mutex should be held! */ 211 static void 212 udf_dispose_eccline(struct udf_eccline *eccline) 213 { 214 struct strat_private *priv = PRIV(eccline->ump); 215 216 KASSERT(mutex_owned(&priv->discstrat_mutex)); 217 218 DPRINTF(ECCLINE, ("dispose eccline with start sector %d, " 219 "present %0"PRIx64"\n", eccline->start_sector, 220 eccline->present)); 221 222 KASSERT(eccline->refcnt == 0); 223 KASSERT(eccline->dirty == 0); 224 KASSERT(eccline->queued_on == 0); 225 KASSERT(eccline->flags & ECC_FLOATING); 226 KASSERT(eccline->flags & ECC_LOCKED); 227 228 LIST_REMOVE(eccline, hashchain); 229 priv->num_floating--; 230 231 putiobuf(eccline->buf); 232 pool_put(&priv->ecclineblob_pool, eccline->blob); 233 pool_put(&priv->eccline_pool, eccline); 234 } 235 236 237 /* NOTE discstrat_mutex should be held! */ 238 static void 239 udf_push_eccline(struct udf_eccline *eccline, int newqueue) 240 { 241 struct strat_private *priv = PRIV(eccline->ump); 242 243 KASSERT(mutex_owned(&priv->discstrat_mutex)); 244 245 DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue)); 246 247 KASSERT(eccline->queued_on == 0); 248 KASSERT(eccline->flags & ECC_FLOATING); 249 250 /* set buffer block numbers to make sure its queued correctly */ 251 eccline->buf->b_lblkno = eccline->start_sector; 252 eccline->buf->b_blkno = eccline->start_sector; 253 eccline->buf->b_rawblkno = eccline->start_sector; 254 255 vfs_timestamp(&priv->last_queued[newqueue]); 256 eccline->flags &= ~ECC_FLOATING; 257 priv->num_floating--; 258 eccline->queued_on = newqueue; 259 priv->num_queued[newqueue]++; 260 bufq_put(priv->queues[newqueue], eccline->buf); 261 262 UDF_UNLOCK_ECCLINE(eccline); 263 264 /* XXX tickle disc strategy statemachine */ 265 if (newqueue != UDF_SHED_IDLE) 266 cv_signal(&priv->discstrat_cv); 267 } 268 269 270 static struct udf_eccline * 271 udf_peek_eccline(struct strat_private *priv, int queued_on) 272 { 273 struct udf_eccline *eccline; 274 struct buf *buf; 275 276 KASSERT(mutex_owned(&priv->discstrat_mutex)); 277 278 for(;;) { 279 buf = bufq_peek(priv->queues[queued_on]); 280 /* could have been a race, but we'll revisit later */ 281 if (buf == NULL) 282 return NULL; 283 284 eccline = BTOE(buf); 285 UDF_LOCK_ECCLINE(eccline); 286 287 /* might have changed before we obtained the lock */ 288 if (eccline->queued_on == queued_on) 289 break; 290 291 UDF_UNLOCK_ECCLINE(eccline); 292 } 293 294 KASSERT(eccline->queued_on == queued_on); 295 KASSERT((eccline->flags & ECC_FLOATING) == 0); 296 297 DPRINTF(PARANOIA, ("DEBUG: buf %p peeked at queue %d\n", 298 eccline->buf, queued_on)); 299 300 return eccline; 301 } 302 303 304 static struct udf_eccline * 305 udf_pop_eccline(struct strat_private *priv, int queued_on) 306 { 307 struct udf_eccline *eccline; 308 struct buf *buf; 309 310 KASSERT(mutex_owned(&priv->discstrat_mutex)); 311 312 for(;;) { 313 buf = bufq_get(priv->queues[queued_on]); 314 if (buf == NULL) { 315 // KASSERT(priv->num_queued[queued_on] == 0); 316 return NULL; 317 } 318 319 eccline = BTOE(buf); 320 UDF_LOCK_ECCLINE(eccline); 321 322 /* might have changed before we obtained the lock */ 323 if (eccline->queued_on == queued_on) 324 break; 325 326 UDF_UNLOCK_ECCLINE(eccline); 327 } 328 329 KASSERT(eccline->queued_on == queued_on); 330 KASSERT((eccline->flags & ECC_FLOATING) == 0); 331 332 priv->num_queued[queued_on]--; 333 eccline->queued_on = 0; 334 335 eccline->flags |= ECC_FLOATING; 336 priv->num_floating++; 337 338 DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n", 339 eccline->buf, queued_on)); 340 341 return eccline; 342 } 343 344 345 static void 346 udf_unqueue_eccline(struct strat_private *priv, struct udf_eccline *eccline) 347 { 348 struct buf *ret __diagused; 349 350 UDF_LOCK_ECCLINE(eccline); 351 if (eccline->queued_on == 0) { 352 KASSERT(eccline->flags & ECC_FLOATING); 353 return; 354 } 355 356 ret = bufq_cancel(priv->queues[eccline->queued_on], eccline->buf); 357 KASSERT(ret == eccline->buf); 358 359 priv->num_queued[eccline->queued_on]--; 360 eccline->queued_on = 0; 361 362 eccline->flags |= ECC_FLOATING; 363 priv->num_floating++; 364 } 365 366 367 static struct udf_eccline * 368 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags) 369 { 370 struct strat_private *priv = PRIV(ump); 371 struct udf_eccline *eccline; 372 uint32_t start_sector, lb_size, blobsize; 373 uint8_t *eccline_blob; 374 int line, line_offset; 375 int num_busy; 376 377 mutex_enter(&priv->discstrat_mutex); 378 379 /* lookup in our line cache hashtable */ 380 line_offset = sector % ump->packet_size; 381 start_sector = sector - line_offset; 382 line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK; 383 384 KASSERT(priv->thread_running); 385 386 retry: 387 DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line)); 388 LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) { 389 if (eccline->start_sector == start_sector) { 390 DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n", 391 eccline->start_sector)); 392 udf_unqueue_eccline(priv, eccline); 393 394 mutex_exit(&priv->discstrat_mutex); 395 return eccline; 396 } 397 } 398 399 /* not found in eccline cache */ 400 DPRINTF(ECCLINE, ("\tnot found in eccline cache\n")); 401 402 lb_size = udf_rw32(ump->logical_vol->lb_size); 403 blobsize = ump->packet_size * lb_size; 404 405 /* dont allow too many pending requests */ 406 DPRINTF(ECCLINE, ("\tallocating new eccline\n")); 407 num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating); 408 if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) { 409 cv_timedwait(&priv->discstrat_cv, 410 &priv->discstrat_mutex, hz/8); 411 goto retry; 412 } 413 414 eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT); 415 eccline = pool_get(&priv->eccline_pool, PR_NOWAIT); 416 if ((eccline_blob == NULL) || (eccline == NULL)) { 417 if (eccline_blob) 418 pool_put(&priv->ecclineblob_pool, eccline_blob); 419 if (eccline) 420 pool_put(&priv->eccline_pool, eccline); 421 422 /* out of memory for now; canibalise freelist */ 423 eccline = udf_pop_eccline(priv, UDF_SHED_FREE); 424 if (eccline == NULL) { 425 /* serious trouble; wait and retry */ 426 cv_timedwait(&priv->discstrat_cv, 427 &priv->discstrat_mutex, hz/8); 428 goto retry; 429 } 430 431 /* push back line if we're waiting for it or its locked */ 432 if (eccline->flags & ECC_WANTED) { 433 /* we won a race, but someone else needed it */ 434 udf_push_eccline(eccline, UDF_SHED_FREE); 435 goto retry; 436 } 437 438 /* unlink this entry */ 439 LIST_REMOVE(eccline, hashchain); 440 KASSERT(eccline->flags & ECC_FLOATING); 441 KASSERT(eccline->queued_on == 0); 442 443 eccline_blob = eccline->blob; 444 eccline->flags = ECC_FLOATING | ECC_LOCKED; 445 } else { 446 eccline->flags = ECC_FLOATING | ECC_LOCKED; 447 priv->num_floating++; 448 } 449 450 eccline->queued_on = 0; 451 eccline->blob = eccline_blob; 452 eccline->buf = getiobuf(NULL, true); 453 eccline->buf->b_private = eccline; /* IMPORTANT */ 454 455 /* initialise eccline blob */ 456 /* XXX memset expensive and strictly not needed XXX */ 457 memset(eccline->blob, 0, blobsize); 458 459 eccline->ump = ump; 460 eccline->present = eccline->readin = eccline->dirty = 0; 461 eccline->error = 0; 462 eccline->refcnt = 0; 463 memset(eccline->bufs, 0, UDF_MAX_PACKET_SIZE * sizeof(struct buf *)); 464 465 eccline->start_sector = start_sector; 466 eccline->buf->b_lblkno = start_sector; 467 eccline->buf->b_blkno = start_sector; 468 eccline->buf->b_rawblkno = start_sector; 469 470 LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain); 471 472 /* 473 * TODO possible optimalisation for checking overlap with partitions 474 * to get a clue on future eccline usage 475 */ 476 477 KASSERT(eccline->refcnt == 0); 478 KASSERT(eccline->flags & ECC_FLOATING); 479 KASSERT(eccline->flags & ECC_LOCKED); 480 mutex_exit(&priv->discstrat_mutex); 481 482 return eccline; 483 } 484 485 486 static void 487 udf_puteccline(struct udf_eccline *eccline) 488 { 489 struct strat_private *priv = PRIV(eccline->ump); 490 struct udf_mount *ump = eccline->ump; 491 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1; 492 int new_queue; 493 494 mutex_enter(&priv->discstrat_mutex); 495 496 DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n", 497 eccline->start_sector, eccline->refcnt)); 498 499 KASSERT(eccline->flags & ECC_LOCKED); 500 KASSERT(eccline->flags & ECC_FLOATING); 501 502 /* clear all read bits that are already read in */ 503 if (eccline->readin & eccline->present) 504 eccline->readin &= (~eccline->present) & allbits; 505 506 /* if we have active nodes we dont set it on seqwriting */ 507 if (eccline->refcnt > 1) 508 eccline->flags &= ~ECC_SEQWRITING; 509 510 /* select state */ 511 new_queue = UDF_SHED_FREE; 512 if (eccline->refcnt > 0) 513 new_queue = UDF_SHED_IDLE; 514 if (eccline->flags & ECC_WANTED) 515 new_queue = UDF_SHED_IDLE; 516 if (eccline->readin) 517 new_queue = UDF_SHED_READING; 518 if (eccline->dirty) { 519 new_queue = UDF_SHED_WAITING; 520 vfs_timestamp(&eccline->wait_time); 521 eccline->wait_time.tv_sec += ECC_WAITTIME; 522 523 if (eccline->present == allbits) { 524 new_queue = UDF_SHED_WRITING; 525 if (eccline->flags & ECC_SEQWRITING) 526 new_queue = UDF_SHED_SEQWRITING; 527 } 528 } 529 udf_push_eccline(eccline, new_queue); 530 531 mutex_exit(&priv->discstrat_mutex); 532 } 533 534 /* --------------------------------------------------------------------- */ 535 536 static int 537 udf_create_nodedscr_rmw(struct udf_strat_args *args) 538 { 539 union dscrptr **dscrptr = &args->dscr; 540 struct udf_mount *ump = args->ump; 541 struct long_ad *icb = args->icb; 542 struct udf_eccline *eccline; 543 uint64_t bit; 544 uint32_t sectornr, lb_size, dummy; 545 uint8_t *mem; 546 int error, eccsect; 547 548 error = udf_translate_vtop(ump, icb, §ornr, &dummy); 549 if (error) 550 return error; 551 552 lb_size = udf_rw32(ump->logical_vol->lb_size); 553 554 /* get our eccline */ 555 eccline = udf_geteccline(ump, sectornr, 0); 556 eccsect = sectornr - eccline->start_sector; 557 558 bit = (uint64_t) 1 << eccsect; 559 eccline->readin &= ~bit; /* just in case */ 560 eccline->present |= bit; 561 eccline->dirty &= ~bit; /* Err... euhm... clean? */ 562 563 eccline->refcnt++; 564 565 /* clear space */ 566 mem = ((uint8_t *) eccline->blob) + eccsect * lb_size; 567 memset(mem, 0, lb_size); 568 569 udf_puteccline(eccline); 570 571 *dscrptr = (union dscrptr *) mem; 572 return 0; 573 } 574 575 576 static void 577 udf_free_nodedscr_rmw(struct udf_strat_args *args) 578 { 579 struct udf_mount *ump = args->ump; 580 struct long_ad *icb = args->icb; 581 struct udf_eccline *eccline; 582 uint64_t bit; 583 uint32_t sectornr, dummy; 584 int error, eccsect; 585 586 error = udf_translate_vtop(ump, icb, §ornr, &dummy); 587 if (error) 588 return; 589 590 /* get our eccline */ 591 eccline = udf_geteccline(ump, sectornr, 0); 592 eccsect = sectornr - eccline->start_sector; 593 594 bit = (uint64_t) 1 << eccsect; 595 KASSERT(eccline->present & bit); 596 597 eccline->readin &= ~bit; /* just in case */ 598 /* XXX eccline->dirty? */ 599 600 KASSERT(eccline->refcnt >= 1); 601 eccline->refcnt--; 602 603 udf_puteccline(eccline); 604 } 605 606 607 static int 608 udf_read_nodedscr_rmw(struct udf_strat_args *args) 609 { 610 union dscrptr **dscrptr = &args->dscr; 611 struct udf_mount *ump = args->ump; 612 struct long_ad *icb = args->icb; 613 struct strat_private *priv; 614 struct udf_eccline *eccline; 615 uint64_t bit; 616 uint32_t sectornr, dummy; 617 uint8_t *pos; 618 int sector_size = ump->discinfo.sector_size; 619 int lb_size __diagused = udf_rw32(ump->logical_vol->lb_size); 620 int i, error, dscrlen, eccsect; 621 622 KASSERT(sector_size == lb_size); 623 error = udf_translate_vtop(ump, icb, §ornr, &dummy); 624 if (error) 625 return error; 626 627 /* get our eccline */ 628 eccline = udf_geteccline(ump, sectornr, 0); 629 eccsect = sectornr - eccline->start_sector; 630 631 bit = (uint64_t) 1 << eccsect; 632 if ((eccline->present & bit) == 0) { 633 /* mark bit for readin */ 634 eccline->readin |= bit; 635 eccline->refcnt++; /* prevent recycling */ 636 KASSERT(eccline->bufs[eccsect] == NULL); 637 udf_puteccline(eccline); 638 639 /* wait for completion */ 640 priv = PRIV(eccline->ump); 641 mutex_enter(&priv->discstrat_mutex); 642 while (((eccline->present | eccline->error) & bit) == 0) { 643 error = cv_timedwait(&priv->discstrat_cv, 644 &priv->discstrat_mutex, 645 hz/8); 646 if (error == EWOULDBLOCK) 647 DPRINTF(LOCKING, ("eccline waiting for read\n")); 648 } 649 mutex_exit(&priv->discstrat_mutex); 650 651 /* reget our line */ 652 eccline = udf_geteccline(ump, sectornr, 0); 653 KASSERT(eccline->refcnt >= 1); 654 eccline->refcnt--; /* undo refcnt */ 655 656 if (eccline->error & bit) { 657 *dscrptr = NULL; 658 udf_puteccline(eccline); 659 return EIO; /* XXX error code */ 660 } 661 } 662 663 *dscrptr = (union dscrptr *) 664 (((uint8_t *) eccline->blob) + eccsect * sector_size); 665 666 /* code from read_phys_descr */ 667 /* check if its a valid tag */ 668 error = udf_check_tag(*dscrptr); 669 if (error) { 670 /* check if its an empty block */ 671 pos = (uint8_t *) *dscrptr; 672 for (i = 0; i < sector_size; i++, pos++) { 673 if (*pos) break; 674 } 675 if (i == sector_size) { 676 /* return no error but with no dscrptr */ 677 error = 0; 678 } 679 *dscrptr = NULL; 680 udf_puteccline(eccline); 681 return error; 682 } 683 684 /* calculate descriptor size */ 685 dscrlen = udf_tagsize(*dscrptr, sector_size); 686 error = udf_check_tag_payload(*dscrptr, dscrlen); 687 if (error) { 688 *dscrptr = NULL; 689 udf_puteccline(eccline); 690 return error; 691 } 692 693 /* we have a hold since it has a node descriptor */ 694 eccline->refcnt++; 695 udf_puteccline(eccline); 696 697 return 0; 698 } 699 700 701 static int 702 udf_write_nodedscr_rmw(struct udf_strat_args *args) 703 { 704 union dscrptr *dscrptr = args->dscr; 705 struct udf_mount *ump = args->ump; 706 struct long_ad *icb = args->icb; 707 struct udf_node *udf_node = args->udf_node; 708 struct udf_eccline *eccline; 709 uint64_t bit; 710 uint32_t sectornr, logsectornr, dummy; 711 // int waitfor = args->waitfor; 712 int sector_size = ump->discinfo.sector_size; 713 int lb_size __diagused = udf_rw32(ump->logical_vol->lb_size); 714 int error, eccsect; 715 716 KASSERT(sector_size == lb_size); 717 sectornr = 0; 718 error = udf_translate_vtop(ump, icb, §ornr, &dummy); 719 if (error) 720 return error; 721 722 /* get our eccline */ 723 eccline = udf_geteccline(ump, sectornr, 0); 724 eccsect = sectornr - eccline->start_sector; 725 726 bit = (uint64_t) 1 << eccsect; 727 728 /* old callback still pending? */ 729 if (eccline->bufs[eccsect]) { 730 DPRINTF(WRITE, ("udf_write_nodedscr_rmw: writing descriptor" 731 " over buffer?\n")); 732 nestiobuf_done(eccline->bufs[eccsect], 733 eccline->bufs_len[eccsect], 734 0); 735 eccline->bufs[eccsect] = NULL; 736 } 737 738 /* set sector number in the descriptor and validate */ 739 dscrptr = (union dscrptr *) 740 (((uint8_t *) eccline->blob) + eccsect * sector_size); 741 KASSERT(dscrptr == args->dscr); 742 743 logsectornr = udf_rw32(icb->loc.lb_num); 744 dscrptr->tag.tag_loc = udf_rw32(logsectornr); 745 udf_validate_tag_and_crc_sums(dscrptr); 746 747 udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE); 748 749 /* set our flags */ 750 KASSERT(eccline->present & bit); 751 eccline->dirty |= bit; 752 753 KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size); 754 755 udf_node->outstanding_nodedscr--; 756 if (udf_node->outstanding_nodedscr == 0) { 757 /* XXX still using wakeup! */ 758 UDF_UNLOCK_NODE(udf_node, 0); 759 wakeup(&udf_node->outstanding_nodedscr); 760 } 761 udf_puteccline(eccline); 762 763 /* XXX waitfor not used */ 764 return 0; 765 } 766 767 768 static void 769 udf_queuebuf_rmw(struct udf_strat_args *args) 770 { 771 struct udf_mount *ump = args->ump; 772 struct buf *buf = args->nestbuf; 773 struct desc_tag *tag; 774 struct strat_private *priv = PRIV(ump); 775 struct udf_eccline *eccline; 776 struct long_ad *node_ad_cpy; 777 uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr; 778 uint32_t buf_len, len, sectors, sectornr, our_sectornr; 779 uint32_t bpos; 780 uint16_t vpart_num; 781 uint8_t *fidblk, *src, *dst; 782 int sector_size = ump->discinfo.sector_size; 783 int blks = sector_size / DEV_BSIZE; 784 int eccsect, what, queue, error; 785 786 KASSERT(ump); 787 KASSERT(buf); 788 KASSERT(buf->b_iodone == nestiobuf_iodone); 789 790 blknr = buf->b_blkno; 791 our_sectornr = blknr / blks; 792 793 what = buf->b_udf_c_type; 794 queue = UDF_SHED_READING; 795 if ((buf->b_flags & B_READ) == 0) { 796 /* writing */ 797 queue = UDF_SHED_SEQWRITING; 798 if (what == UDF_C_ABSOLUTE) 799 queue = UDF_SHED_WRITING; 800 if (what == UDF_C_DSCR) 801 queue = UDF_SHED_WRITING; 802 if (what == UDF_C_NODE) 803 queue = UDF_SHED_WRITING; 804 } 805 806 if (queue == UDF_SHED_READING) { 807 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d," 808 "b_resid %d, b_bcount %d, b_bufsize %d\n", 809 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type, 810 buf->b_resid, buf->b_bcount, buf->b_bufsize)); 811 812 /* mark bits for reading */ 813 buf_len = buf->b_bcount; 814 sectornr = our_sectornr; 815 eccline = udf_geteccline(ump, sectornr, 0); 816 eccsect = sectornr - eccline->start_sector; 817 bpos = 0; 818 while (buf_len) { 819 len = MIN(buf_len, sector_size); 820 if ((eccsect < 0) || (eccsect >= ump->packet_size)) { 821 udf_puteccline(eccline); 822 eccline = udf_geteccline(ump, sectornr, 0); 823 eccsect = sectornr - eccline->start_sector; 824 } 825 bit = (uint64_t) 1 << eccsect; 826 error = eccline->error & bit ? EIO : 0; 827 if (eccline->present & bit) { 828 src = (uint8_t *) eccline->blob + 829 eccsect * sector_size; 830 dst = (uint8_t *) buf->b_data + bpos; 831 if (!error) 832 memcpy(dst, src, len); 833 nestiobuf_done(buf, len, error); 834 } else { 835 eccline->readin |= bit; 836 KASSERT(eccline->bufs[eccsect] == NULL); 837 eccline->bufs[eccsect] = buf; 838 eccline->bufs_bpos[eccsect] = bpos; 839 eccline->bufs_len[eccsect] = len; 840 } 841 bpos += sector_size; 842 eccsect++; 843 sectornr++; 844 buf_len -= len; 845 } 846 udf_puteccline(eccline); 847 return; 848 } 849 850 if (queue == UDF_SHED_WRITING) { 851 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d " 852 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n", 853 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type, 854 buf->b_resid, buf->b_bcount, buf->b_bufsize)); 855 856 /* if we have FIDs fixup using buffer's sector number(s) */ 857 if (buf->b_udf_c_type == UDF_C_FIDS) 858 panic("UDF_C_FIDS in SHED_WRITING!\n"); 859 860 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type); 861 862 /* copy parts into the bufs and set for writing */ 863 buf_len = buf->b_bcount; 864 sectornr = our_sectornr; 865 eccline = udf_geteccline(ump, sectornr, 0); 866 eccsect = sectornr - eccline->start_sector; 867 bpos = 0; 868 while (buf_len) { 869 len = MIN(buf_len, sector_size); 870 if ((eccsect < 0) || (eccsect >= ump->packet_size)) { 871 udf_puteccline(eccline); 872 eccline = udf_geteccline(ump, sectornr, 0); 873 eccsect = sectornr - eccline->start_sector; 874 } 875 bit = (uint64_t) 1 << eccsect; 876 KASSERT((eccline->readin & bit) == 0); 877 eccline->present |= bit; 878 eccline->dirty |= bit; 879 if (eccline->bufs[eccsect]) { 880 /* old callback still pending */ 881 nestiobuf_done(eccline->bufs[eccsect], 882 eccline->bufs_len[eccsect], 883 0); 884 eccline->bufs[eccsect] = NULL; 885 } 886 887 src = (uint8_t *) buf->b_data + bpos; 888 dst = (uint8_t *) eccline->blob + eccsect * sector_size; 889 if (len != sector_size) 890 memset(dst, 0, sector_size); 891 memcpy(dst, src, len); 892 893 /* note that its finished for this extent */ 894 eccline->bufs[eccsect] = NULL; 895 nestiobuf_done(buf, len, 0); 896 897 bpos += sector_size; 898 eccsect++; 899 sectornr++; 900 buf_len -= len; 901 } 902 udf_puteccline(eccline); 903 return; 904 905 } 906 907 /* sequential writing */ 908 KASSERT(queue == UDF_SHED_SEQWRITING); 909 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX " 910 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n", 911 buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount, 912 buf->b_bufsize)); 913 /* 914 * Buffers should not have been allocated to disc addresses yet on 915 * this queue. Note that a buffer can get multiple extents allocated. 916 * Note that it *looks* like the normal writing but its different in 917 * the details. 918 * 919 * lmapping contains lb_num relative to base partition. 920 * 921 * XXX should we try to claim/organize the allocated memory to 922 * block-aligned pieces? 923 */ 924 mutex_enter(&priv->seqwrite_mutex); 925 926 lmapping = ump->la_lmapping; 927 node_ad_cpy = ump->la_node_ad_cpy; 928 929 /* logically allocate buf and map it in the file */ 930 udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num); 931 932 /* if we have FIDs, fixup using the new allocation table */ 933 if (buf->b_udf_c_type == UDF_C_FIDS) { 934 buf_len = buf->b_bcount; 935 bpos = 0; 936 lmappos = lmapping; 937 while (buf_len) { 938 sectornr = *lmappos++; 939 len = MIN(buf_len, sector_size); 940 fidblk = (uint8_t *) buf->b_data + bpos; 941 udf_fixup_fid_block(fidblk, sector_size, 942 0, len, sectornr); 943 bpos += len; 944 buf_len -= len; 945 } 946 } 947 if (buf->b_udf_c_type == UDF_C_METADATA_SBM) { 948 if (buf->b_lblkno == 0) { 949 /* update the tag location inside */ 950 tag = (struct desc_tag *) buf->b_data; 951 tag->tag_loc = udf_rw32(*lmapping); 952 udf_validate_tag_and_crc_sums(buf->b_data); 953 } 954 } 955 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type); 956 957 /* 958 * Translate new mappings in lmapping to pmappings. 959 * pmapping to contain lb_nums as used for disc addressing. 960 */ 961 pmapping = ump->la_pmapping; 962 sectors = (buf->b_bcount + sector_size -1) / sector_size; 963 udf_translate_vtop_list(ump, sectors, vpart_num, lmapping, pmapping); 964 965 /* copy parts into the bufs and set for writing */ 966 pmappos = pmapping; 967 buf_len = buf->b_bcount; 968 sectornr = *pmappos++; 969 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING); 970 eccsect = sectornr - eccline->start_sector; 971 bpos = 0; 972 while (buf_len) { 973 len = MIN(buf_len, sector_size); 974 eccsect = sectornr - eccline->start_sector; 975 if ((eccsect < 0) || (eccsect >= ump->packet_size)) { 976 eccline->flags |= ECC_SEQWRITING; 977 udf_puteccline(eccline); 978 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING); 979 eccsect = sectornr - eccline->start_sector; 980 } 981 bit = (uint64_t) 1 << eccsect; 982 KASSERT((eccline->readin & bit) == 0); 983 eccline->present |= bit; 984 eccline->dirty |= bit; 985 eccline->bufs[eccsect] = NULL; 986 987 src = (uint8_t *) buf->b_data + bpos; 988 dst = (uint8_t *) 989 eccline->blob + eccsect * sector_size; 990 if (len != sector_size) 991 memset(dst, 0, sector_size); 992 memcpy(dst, src, len); 993 994 /* note that its finished for this extent */ 995 nestiobuf_done(buf, len, 0); 996 997 bpos += sector_size; 998 sectornr = *pmappos++; 999 buf_len -= len; 1000 } 1001 eccline->flags |= ECC_SEQWRITING; 1002 udf_puteccline(eccline); 1003 mutex_exit(&priv->seqwrite_mutex); 1004 } 1005 1006 /* --------------------------------------------------------------------- */ 1007 1008 static void 1009 udf_sync_caches_rmw(struct udf_strat_args *args) 1010 { 1011 struct udf_mount *ump = args->ump; 1012 1013 udf_mmc_synchronise_caches(ump); 1014 } 1015 1016 /* --------------------------------------------------------------------- */ 1017 1018 static void 1019 udf_shedule_read_callback(struct buf *buf) 1020 { 1021 struct udf_eccline *eccline = BTOE(buf); 1022 struct udf_mount *ump = eccline->ump; 1023 uint64_t bit; 1024 uint8_t *src, *dst; 1025 int sector_size = ump->discinfo.sector_size; 1026 int error, i, len; 1027 1028 DPRINTF(ECCLINE, ("read callback called on buf %p\n", buf)); 1029 1030 /* post process read action */ 1031 KASSERT(eccline->flags & ECC_LOCKED); 1032 error = buf->b_error; 1033 for (i = 0; i < ump->packet_size; i++) { 1034 bit = (uint64_t) 1 << i; 1035 src = (uint8_t *) buf->b_data + i * sector_size; 1036 dst = (uint8_t *) eccline->blob + i * sector_size; 1037 if (eccline->present & bit) 1038 continue; 1039 eccline->present |= bit; 1040 if (error) 1041 eccline->error |= bit; 1042 if (eccline->bufs[i]) { 1043 dst = (uint8_t *) eccline->bufs[i]->b_data + 1044 eccline->bufs_bpos[i]; 1045 len = eccline->bufs_len[i]; 1046 if (!error) 1047 memcpy(dst, src, len); 1048 nestiobuf_done(eccline->bufs[i], len, error); 1049 eccline->bufs[i] = NULL; 1050 } 1051 1052 } 1053 KASSERT(buf->b_data == eccline->blob); 1054 KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1); 1055 1056 /* 1057 * XXX TODO what to do on read errors? read in all sectors 1058 * synchronously and allocate a sparable entry? 1059 */ 1060 1061 udf_puteccline(eccline); 1062 DPRINTF(ECCLINE, ("read callback finished\n")); 1063 } 1064 1065 1066 static void 1067 udf_shedule_write_callback(struct buf *buf) 1068 { 1069 struct udf_eccline *eccline = BTOE(buf); 1070 struct udf_mount *ump = eccline->ump; 1071 uint64_t bit; 1072 int error, i; 1073 1074 DPRINTF(ECCLINE, ("write callback called on buf %p\n", buf)); 1075 1076 /* post process write action */ 1077 KASSERT(eccline->flags & ECC_LOCKED); 1078 error = buf->b_error; 1079 for (i = 0; i < ump->packet_size; i++) { 1080 bit = (uint64_t) 1 << i; 1081 if ((eccline->dirty & bit) == 0) 1082 continue; 1083 if (error) { 1084 eccline->error |= bit; 1085 } else { 1086 eccline->dirty &= ~bit; 1087 } 1088 1089 KASSERT(eccline->bufs[i] == 0); 1090 } 1091 KASSERT(eccline->dirty == 0); 1092 KASSERT(error == 0); 1093 1094 /* 1095 * XXX TODO on write errors allocate a sparable entry and reissue 1096 */ 1097 1098 udf_puteccline(eccline); 1099 DPRINTF(ECCLINE, ("write callback finished\n")); 1100 } 1101 1102 1103 static void 1104 udf_issue_eccline(struct udf_eccline *eccline, int queued_on) 1105 { 1106 struct udf_mount *ump = eccline->ump; 1107 struct strat_private *priv = PRIV(ump); 1108 struct buf *buf, *nestbuf; 1109 uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1; 1110 uint32_t start; 1111 int sector_size = ump->discinfo.sector_size; 1112 int blks = sector_size / DEV_BSIZE; 1113 int i; 1114 1115 KASSERT(eccline->flags & ECC_LOCKED); 1116 1117 if (queued_on == UDF_SHED_READING) { 1118 DPRINTF(SHEDULE, ("udf_issue_eccline reading : ")); 1119 /* read all bits that are not yet present */ 1120 eccline->readin = (~eccline->present) & allbits; 1121 KASSERT(eccline->readin); 1122 start = eccline->start_sector; 1123 buf = eccline->buf; 1124 buf->b_flags = B_READ | B_ASYNC; 1125 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */ 1126 buf->b_oflags = 0; 1127 buf->b_iodone = udf_shedule_read_callback; 1128 buf->b_data = eccline->blob; 1129 buf->b_bcount = ump->packet_size * sector_size; 1130 buf->b_resid = buf->b_bcount; 1131 buf->b_bufsize = buf->b_bcount; 1132 buf->b_private = eccline; 1133 BIO_SETPRIO(buf, BPRIO_DEFAULT); 1134 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks; 1135 buf->b_proc = NULL; 1136 1137 if (eccline->present != 0) { 1138 for (i = 0; i < ump->packet_size; i++) { 1139 bit = (uint64_t) 1 << i; 1140 if (eccline->present & bit) { 1141 nestiobuf_done(buf, sector_size, 0); 1142 continue; 1143 } 1144 nestbuf = getiobuf(NULL, true); 1145 nestiobuf_setup(buf, nestbuf, i * sector_size, 1146 sector_size); 1147 /* adjust blocknumber to read */ 1148 nestbuf->b_blkno = buf->b_blkno + i*blks; 1149 nestbuf->b_rawblkno = buf->b_rawblkno + i*blks; 1150 1151 DPRINTF(SHEDULE, ("sector %d ", start + i)); 1152 1153 /* mutex dance since it could lock */ 1154 mutex_exit(&priv->discstrat_mutex); 1155 /* call asynchronous */ 1156 VOP_STRATEGY(ump->devvp, nestbuf); 1157 mutex_enter(&priv->discstrat_mutex); 1158 } 1159 DPRINTF(SHEDULE, ("\n")); 1160 return; 1161 } 1162 } else { 1163 /* write or seqwrite */ 1164 DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : ")); 1165 DPRINTF(SHEDULE, ("\n\tpresent %"PRIx64", readin %"PRIx64", " 1166 "dirty %"PRIx64"\n\t", eccline->present, eccline->readin, 1167 eccline->dirty)); 1168 KASSERT(eccline->present == allbits); 1169 1170 start = eccline->start_sector; 1171 buf = eccline->buf; 1172 buf->b_flags = B_WRITE | B_ASYNC; 1173 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */ 1174 buf->b_oflags = 0; 1175 buf->b_iodone = udf_shedule_write_callback; 1176 buf->b_data = eccline->blob; 1177 buf->b_bcount = ump->packet_size * sector_size; 1178 buf->b_resid = buf->b_bcount; 1179 buf->b_bufsize = buf->b_bcount; 1180 buf->b_private = eccline; 1181 BIO_SETPRIO(buf, BPRIO_DEFAULT); 1182 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks; 1183 buf->b_proc = NULL; 1184 } 1185 1186 /* mutex dance since it could lock */ 1187 mutex_exit(&priv->discstrat_mutex); 1188 /* call asynchronous */ 1189 DPRINTF(SHEDULE, ("sector %d for %d\n", 1190 start, ump->packet_size)); 1191 VOP_STRATEGY(ump->devvp, buf); 1192 mutex_enter(&priv->discstrat_mutex); 1193 } 1194 1195 1196 static void 1197 udf_discstrat_thread(void *arg) 1198 { 1199 struct udf_mount *ump = (struct udf_mount *) arg; 1200 struct strat_private *priv = PRIV(ump); 1201 struct udf_eccline *eccline; 1202 struct timespec now, *last; 1203 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1; 1204 int new_queue, wait, work; 1205 1206 work = 1; 1207 priv->thread_running = 1; 1208 mutex_enter(&priv->discstrat_mutex); 1209 priv->num_floating = 0; 1210 while (priv->run_thread || work || priv->num_floating) { 1211 /* get our time */ 1212 vfs_timestamp(&now); 1213 1214 /* maintenance: handle eccline state machine */ 1215 for(;;) { 1216 /* only peek at it */ 1217 eccline = udf_peek_eccline(priv, UDF_SHED_WAITING); 1218 if (eccline == NULL) 1219 break; 1220 1221 /* if not reading, wait until the time has come */ 1222 if ((priv->cur_queue != UDF_SHED_READING) && 1223 (eccline->wait_time.tv_sec - now.tv_sec > 0)) { 1224 UDF_UNLOCK_ECCLINE(eccline); 1225 /* all others are later, so break off */ 1226 break; 1227 } 1228 1229 /* release */ 1230 UDF_UNLOCK_ECCLINE(eccline); 1231 1232 /* do get it */ 1233 eccline = udf_pop_eccline(priv, UDF_SHED_WAITING); 1234 1235 /* requeue according to state */ 1236 new_queue = UDF_SHED_FREE; /* unlikely */ 1237 if (eccline->refcnt > 0) 1238 new_queue = UDF_SHED_IDLE; 1239 if (eccline->flags & ECC_WANTED) 1240 new_queue = UDF_SHED_IDLE; 1241 if (eccline->readin) 1242 new_queue = UDF_SHED_READING; 1243 if (eccline->dirty) { 1244 new_queue = UDF_SHED_READING; 1245 if (eccline->present == allbits) { 1246 new_queue = UDF_SHED_WRITING; 1247 if (eccline->flags & ECC_SEQWRITING) 1248 new_queue = UDF_SHED_SEQWRITING; 1249 } 1250 } 1251 udf_push_eccline(eccline, new_queue); 1252 } 1253 1254 /* maintenance: free excess ecclines */ 1255 while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) { 1256 eccline = udf_pop_eccline(priv, UDF_SHED_FREE); 1257 KASSERT(eccline); 1258 KASSERT(eccline->refcnt == 0); 1259 if (eccline->flags & ECC_WANTED) { 1260 /* we won the race, but we dont want to win */ 1261 DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n")); 1262 udf_push_eccline(eccline, UDF_SHED_IDLE); 1263 } else { 1264 DPRINTF(ECCLINE, ("Removing entry from free list\n")); 1265 udf_dispose_eccline(eccline); 1266 } 1267 } 1268 1269 /* process the current selected queue */ 1270 /* get our time */ 1271 vfs_timestamp(&now); 1272 last = &priv->last_queued[priv->cur_queue]; 1273 1274 /* get our line */ 1275 eccline = udf_pop_eccline(priv, priv->cur_queue); 1276 if (eccline) { 1277 wait = 0; 1278 new_queue = priv->cur_queue; 1279 DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n")); 1280 1281 udf_issue_eccline(eccline, priv->cur_queue); 1282 } else { 1283 /* don't switch too quickly */ 1284 if (now.tv_sec - last->tv_sec < 2) { 1285 /* wait some time */ 1286 cv_timedwait(&priv->discstrat_cv, 1287 &priv->discstrat_mutex, hz); 1288 /* we assume there is work to be done */ 1289 work = 1; 1290 continue; 1291 } 1292 1293 /* XXX select on queue lengths ? */ 1294 wait = 1; 1295 /* check if we can/should switch */ 1296 new_queue = priv->cur_queue; 1297 if (bufq_peek(priv->queues[UDF_SHED_READING])) 1298 new_queue = UDF_SHED_READING; 1299 if (bufq_peek(priv->queues[UDF_SHED_WRITING])) 1300 new_queue = UDF_SHED_WRITING; 1301 if (bufq_peek(priv->queues[UDF_SHED_SEQWRITING])) 1302 new_queue = UDF_SHED_SEQWRITING; 1303 } 1304 1305 /* give room */ 1306 mutex_exit(&priv->discstrat_mutex); 1307 1308 if (new_queue != priv->cur_queue) { 1309 wait = 0; 1310 DPRINTF(SHEDULE, ("switching from %d to %d\n", 1311 priv->cur_queue, new_queue)); 1312 priv->cur_queue = new_queue; 1313 } 1314 mutex_enter(&priv->discstrat_mutex); 1315 1316 /* wait for more if needed */ 1317 if (wait) 1318 cv_timedwait(&priv->discstrat_cv, 1319 &priv->discstrat_mutex, hz/4); /* /8 */ 1320 1321 work = (bufq_peek(priv->queues[UDF_SHED_WAITING]) != NULL); 1322 work |= (bufq_peek(priv->queues[UDF_SHED_READING]) != NULL); 1323 work |= (bufq_peek(priv->queues[UDF_SHED_WRITING]) != NULL); 1324 work |= (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) != NULL); 1325 1326 DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n", 1327 (bufq_peek(priv->queues[UDF_SHED_READING]) != NULL), 1328 (bufq_peek(priv->queues[UDF_SHED_WRITING]) != NULL), 1329 (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) != NULL), 1330 work, priv->num_floating)); 1331 } 1332 1333 mutex_exit(&priv->discstrat_mutex); 1334 1335 /* tear down remaining ecclines */ 1336 mutex_enter(&priv->discstrat_mutex); 1337 KASSERT(bufq_peek(priv->queues[UDF_SHED_WAITING]) == NULL); 1338 KASSERT(bufq_peek(priv->queues[UDF_SHED_IDLE]) == NULL); 1339 KASSERT(bufq_peek(priv->queues[UDF_SHED_READING]) == NULL); 1340 KASSERT(bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL); 1341 KASSERT(bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL); 1342 1343 KASSERT(priv->num_queued[UDF_SHED_WAITING] == 0); 1344 KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0); 1345 KASSERT(priv->num_queued[UDF_SHED_READING] == 0); 1346 KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0); 1347 KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0); 1348 1349 eccline = udf_pop_eccline(priv, UDF_SHED_FREE); 1350 while (eccline) { 1351 udf_dispose_eccline(eccline); 1352 eccline = udf_pop_eccline(priv, UDF_SHED_FREE); 1353 } 1354 KASSERT(priv->num_queued[UDF_SHED_FREE] == 0); 1355 mutex_exit(&priv->discstrat_mutex); 1356 1357 priv->thread_running = 0; 1358 priv->thread_finished = 1; 1359 wakeup(&priv->run_thread); 1360 kthread_exit(0); 1361 /* not reached */ 1362 } 1363 1364 /* --------------------------------------------------------------------- */ 1365 1366 /* 1367 * Buffer memory pool allocator. 1368 */ 1369 1370 static void * 1371 ecclinepool_page_alloc(struct pool *pp, int flags) 1372 { 1373 return (void *)uvm_km_alloc(kernel_map, 1374 MAXBSIZE, MAXBSIZE, 1375 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK) 1376 | UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */); 1377 } 1378 1379 static void 1380 ecclinepool_page_free(struct pool *pp, void *v) 1381 { 1382 uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED); 1383 } 1384 1385 static struct pool_allocator ecclinepool_allocator = { 1386 .pa_alloc = ecclinepool_page_alloc, 1387 .pa_free = ecclinepool_page_free, 1388 .pa_pagesz = MAXBSIZE, 1389 }; 1390 1391 1392 static void 1393 udf_discstrat_init_rmw(struct udf_strat_args *args) 1394 { 1395 struct udf_mount *ump = args->ump; 1396 struct strat_private *priv = PRIV(ump); 1397 uint32_t lb_size, blobsize, hashline; 1398 int i; 1399 1400 KASSERT(ump); 1401 KASSERT(ump->logical_vol); 1402 KASSERT(priv == NULL); 1403 1404 lb_size = udf_rw32(ump->logical_vol->lb_size); 1405 blobsize = ump->packet_size * lb_size; 1406 KASSERT(lb_size > 0); 1407 KASSERT(ump->packet_size <= 64); 1408 1409 /* initialise our memory space */ 1410 ump->strategy_private = malloc(sizeof(struct strat_private), 1411 M_UDFTEMP, M_WAITOK); 1412 priv = ump->strategy_private; 1413 memset(priv, 0 , sizeof(struct strat_private)); 1414 1415 /* initialise locks */ 1416 cv_init(&priv->discstrat_cv, "udfstrat"); 1417 mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE); 1418 mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE); 1419 1420 /* initialise struct eccline pool */ 1421 pool_init(&priv->eccline_pool, sizeof(struct udf_eccline), 1422 0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE); 1423 1424 /* initialise eccline blob pool */ 1425 ecclinepool_allocator.pa_pagesz = blobsize; 1426 pool_init(&priv->ecclineblob_pool, blobsize, 1427 0, 0, 0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE); 1428 1429 /* initialise main queues */ 1430 for (i = 0; i < UDF_SHED_MAX; i++) { 1431 priv->num_queued[i] = 0; 1432 vfs_timestamp(&priv->last_queued[i]); 1433 } 1434 bufq_alloc(&priv->queues[UDF_SHED_WAITING], "fcfs", 1435 BUFQ_SORT_RAWBLOCK); 1436 bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort", 1437 BUFQ_SORT_RAWBLOCK); 1438 bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort", 1439 BUFQ_SORT_RAWBLOCK); 1440 bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0); 1441 1442 /* initialise administrative queues */ 1443 bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0); 1444 bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0); 1445 1446 for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) { 1447 LIST_INIT(&priv->eccline_hash[hashline]); 1448 } 1449 1450 /* create our disk strategy thread */ 1451 priv->cur_queue = UDF_SHED_READING; 1452 priv->thread_finished = 0; 1453 priv->thread_running = 0; 1454 priv->run_thread = 1; 1455 if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/, 1456 udf_discstrat_thread, ump, &priv->queue_lwp, 1457 "%s", "udf_rw")) { 1458 panic("fork udf_rw"); 1459 } 1460 1461 /* wait for thread to spin up */ 1462 while (!priv->thread_running) { 1463 tsleep(&priv->thread_running, PRIBIO+1, "udfshedstart", hz); 1464 } 1465 } 1466 1467 1468 static void 1469 udf_discstrat_finish_rmw(struct udf_strat_args *args) 1470 { 1471 struct udf_mount *ump = args->ump; 1472 struct strat_private *priv = PRIV(ump); 1473 1474 if (ump == NULL) 1475 return; 1476 1477 /* stop our sheduling thread */ 1478 KASSERT(priv->run_thread == 1); 1479 priv->run_thread = 0; 1480 wakeup(priv->queue_lwp); 1481 while (!priv->thread_finished) { 1482 tsleep(&priv->run_thread, PRIBIO + 1, "udfshedfin", hz); 1483 } 1484 /* kthread should be finished now */ 1485 1486 /* cleanup our pools */ 1487 pool_destroy(&priv->eccline_pool); 1488 pool_destroy(&priv->ecclineblob_pool); 1489 1490 cv_destroy(&priv->discstrat_cv); 1491 mutex_destroy(&priv->discstrat_mutex); 1492 mutex_destroy(&priv->seqwrite_mutex); 1493 1494 /* free our private space */ 1495 free(ump->strategy_private, M_UDFTEMP); 1496 ump->strategy_private = NULL; 1497 } 1498 1499 /* --------------------------------------------------------------------- */ 1500 1501 struct udf_strategy udf_strat_rmw = 1502 { 1503 udf_create_nodedscr_rmw, 1504 udf_free_nodedscr_rmw, 1505 udf_read_nodedscr_rmw, 1506 udf_write_nodedscr_rmw, 1507 udf_queuebuf_rmw, 1508 udf_sync_caches_rmw, 1509 udf_discstrat_init_rmw, 1510 udf_discstrat_finish_rmw 1511 }; 1512 1513