1 /* $NetBSD: rf_paritymap.c,v 1.8 2011/04/27 07:55:15 mrg Exp $ */ 2 3 /*- 4 * Copyright (c) 2009 Jed Davis. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: rf_paritymap.c,v 1.8 2011/04/27 07:55:15 mrg Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/callout.h> 34 #include <sys/kmem.h> 35 #include <sys/mutex.h> 36 #include <sys/rwlock.h> 37 #include <sys/systm.h> 38 #include <sys/types.h> 39 40 #include <dev/raidframe/rf_paritymap.h> 41 #include <dev/raidframe/rf_stripelocks.h> 42 #include <dev/raidframe/rf_layout.h> 43 #include <dev/raidframe/rf_raid.h> 44 #include <dev/raidframe/rf_parityscan.h> 45 #include <dev/raidframe/rf_kintf.h> 46 47 /* Important parameters: */ 48 #define REGION_MINSIZE (25ULL << 20) 49 #define DFL_TICKMS 40000 50 #define DFL_COOLDOWN 8 /* 7-8 intervals of 40s = 5min +/- 20s */ 51 52 /* Internal-use flag bits. */ 53 #define TICKING 1 54 #define TICKED 2 55 56 /* Prototypes! */ 57 static void rf_paritymap_write_locked(struct rf_paritymap *); 58 static void rf_paritymap_tick(void *); 59 static u_int rf_paritymap_nreg(RF_Raid_t *); 60 61 /* Extract the current status of the parity map. */ 62 void 63 rf_paritymap_status(struct rf_paritymap *pm, struct rf_pmstat *ps) 64 { 65 memset(ps, 0, sizeof(*ps)); 66 if (pm == NULL) 67 ps->enabled = 0; 68 else { 69 ps->enabled = 1; 70 ps->region_size = pm->region_size; 71 mutex_enter(&pm->lock); 72 memcpy(&ps->params, &pm->params, sizeof(ps->params)); 73 memcpy(ps->dirty, pm->disk_now, sizeof(ps->dirty)); 74 memcpy(&ps->ctrs, &pm->ctrs, sizeof(ps->ctrs)); 75 mutex_exit(&pm->lock); 76 } 77 } 78 79 /* 80 * Test whether parity in a given sector is suspected of being inconsistent 81 * on disk (assuming that any pending I/O to it is allowed to complete). 82 * This may be of interest to future work on parity scrubbing. 83 */ 84 int 85 rf_paritymap_test(struct rf_paritymap *pm, daddr_t sector) 86 { 87 unsigned region = sector / pm->region_size; 88 int retval; 89 90 mutex_enter(&pm->lock); 91 retval = isset(pm->disk_boot->bits, region) ? 1 : 0; 92 mutex_exit(&pm->lock); 93 return retval; 94 } 95 96 /* To be called before a write to the RAID is submitted. */ 97 void 98 rf_paritymap_begin(struct rf_paritymap *pm, daddr_t offset, daddr_t size) 99 { 100 unsigned i, b, e; 101 102 b = offset / pm->region_size; 103 e = (offset + size - 1) / pm->region_size; 104 105 for (i = b; i <= e; i++) 106 rf_paritymap_begin_region(pm, i); 107 } 108 109 /* To be called after a write to the RAID completes. */ 110 void 111 rf_paritymap_end(struct rf_paritymap *pm, daddr_t offset, daddr_t size) 112 { 113 unsigned i, b, e; 114 115 b = offset / pm->region_size; 116 e = (offset + size - 1) / pm->region_size; 117 118 for (i = b; i <= e; i++) 119 rf_paritymap_end_region(pm, i); 120 } 121 122 void 123 rf_paritymap_begin_region(struct rf_paritymap *pm, unsigned region) 124 { 125 int needs_write; 126 127 KASSERT(region < RF_PARITYMAP_NREG); 128 pm->ctrs.nwrite++; 129 130 /* If it was being kept warm, deal with that. */ 131 mutex_enter(&pm->lock); 132 if (pm->current->state[region] < 0) 133 pm->current->state[region] = 0; 134 135 /* This shouldn't happen unless RAIDOUTSTANDING is set too high. */ 136 KASSERT(pm->current->state[region] < 127); 137 pm->current->state[region]++; 138 139 needs_write = isclr(pm->disk_now->bits, region); 140 141 if (needs_write) { 142 KASSERT(pm->current->state[region] == 1); 143 rf_paritymap_write_locked(pm); 144 } 145 146 mutex_exit(&pm->lock); 147 } 148 149 void 150 rf_paritymap_end_region(struct rf_paritymap *pm, unsigned region) 151 { 152 KASSERT(region < RF_PARITYMAP_NREG); 153 154 mutex_enter(&pm->lock); 155 KASSERT(pm->current->state[region] > 0); 156 --pm->current->state[region]; 157 158 if (pm->current->state[region] <= 0) { 159 pm->current->state[region] = -pm->params.cooldown; 160 KASSERT(pm->current->state[region] <= 0); 161 mutex_enter(&pm->lk_flags); 162 if (!(pm->flags & TICKING)) { 163 pm->flags |= TICKING; 164 mutex_exit(&pm->lk_flags); 165 callout_schedule(&pm->ticker, 166 mstohz(pm->params.tickms)); 167 } else 168 mutex_exit(&pm->lk_flags); 169 } 170 mutex_exit(&pm->lock); 171 } 172 173 /* 174 * Updates the parity map to account for any changes in current activity 175 * and/or an ongoing parity scan, then writes it to disk with appropriate 176 * synchronization. 177 */ 178 void 179 rf_paritymap_write(struct rf_paritymap *pm) 180 { 181 mutex_enter(&pm->lock); 182 rf_paritymap_write_locked(pm); 183 mutex_exit(&pm->lock); 184 } 185 186 /* As above, but to be used when pm->lock is already held. */ 187 static void 188 rf_paritymap_write_locked(struct rf_paritymap *pm) 189 { 190 char w, w0; 191 int i, j, setting, clearing; 192 193 setting = clearing = 0; 194 for (i = 0; i < RF_PARITYMAP_NBYTE; i++) { 195 w0 = pm->disk_now->bits[i]; 196 w = pm->disk_boot->bits[i]; 197 198 for (j = 0; j < NBBY; j++) 199 if (pm->current->state[i * NBBY + j] != 0) 200 w |= 1 << j; 201 202 if (w & ~w0) 203 setting = 1; 204 if (w0 & ~w) 205 clearing = 1; 206 207 pm->disk_now->bits[i] = w; 208 } 209 pm->ctrs.ncachesync += setting + clearing; 210 pm->ctrs.nclearing += clearing; 211 212 /* 213 * If bits are being set in the parity map, then a sync is 214 * required afterwards, so that the regions are marked dirty 215 * on disk before any writes to them take place. If bits are 216 * being cleared, then a sync is required before the write, so 217 * that any writes to those regions are processed before the 218 * region is marked clean. (Synchronization is somewhat 219 * overkill; a write ordering barrier would suffice, but we 220 * currently have no way to express that directly.) 221 */ 222 if (clearing) 223 rf_sync_component_caches(pm->raid); 224 rf_paritymap_kern_write(pm->raid, pm->disk_now); 225 if (setting) 226 rf_sync_component_caches(pm->raid); 227 } 228 229 /* Mark all parity as being in need of rewrite. */ 230 void 231 rf_paritymap_invalidate(struct rf_paritymap *pm) 232 { 233 mutex_enter(&pm->lock); 234 memset(pm->disk_boot, ~(unsigned char)0, 235 sizeof(struct rf_paritymap_ondisk)); 236 mutex_exit(&pm->lock); 237 } 238 239 /* Mark all parity as being correct. */ 240 void 241 rf_paritymap_forceclean(struct rf_paritymap *pm) 242 { 243 mutex_enter(&pm->lock); 244 memset(pm->disk_boot, (unsigned char)0, 245 sizeof(struct rf_paritymap_ondisk)); 246 mutex_exit(&pm->lock); 247 } 248 249 /* 250 * The cooldown callout routine just defers its work to a thread; it can't do 251 * the parity map write itself as it would block, and although mutex-induced 252 * blocking is permitted it seems wise to avoid tying up the softint. 253 */ 254 static void 255 rf_paritymap_tick(void *arg) 256 { 257 struct rf_paritymap *pm = arg; 258 259 mutex_enter(&pm->lk_flags); 260 pm->flags |= TICKED; 261 mutex_exit(&pm->lk_flags); 262 263 rf_lock_mutex2(pm->raid->iodone_lock); 264 rf_signal_cond2(pm->raid->iodone_cv); /* XXX */ 265 rf_unlock_mutex2(pm->raid->iodone_lock); 266 } 267 268 /* 269 * This is where the parity cooling work (and rearming the callout if needed) 270 * is done; the raidio thread calls it when woken up, as by the above. 271 */ 272 void 273 rf_paritymap_checkwork(struct rf_paritymap *pm) 274 { 275 int i, zerop, progressp; 276 277 mutex_enter(&pm->lk_flags); 278 if (pm->flags & TICKED) { 279 zerop = progressp = 0; 280 281 pm->flags &= ~TICKED; 282 mutex_exit(&pm->lk_flags); 283 284 mutex_enter(&pm->lock); 285 for (i = 0; i < RF_PARITYMAP_NREG; i++) { 286 if (pm->current->state[i] < 0) { 287 progressp = 1; 288 pm->current->state[i]++; 289 if (pm->current->state[i] == 0) 290 zerop = 1; 291 } 292 } 293 294 if (progressp) 295 callout_schedule(&pm->ticker, 296 mstohz(pm->params.tickms)); 297 else { 298 mutex_enter(&pm->lk_flags); 299 pm->flags &= ~TICKING; 300 mutex_exit(&pm->lk_flags); 301 } 302 303 if (zerop) 304 rf_paritymap_write_locked(pm); 305 mutex_exit(&pm->lock); 306 } else 307 mutex_exit(&pm->lk_flags); 308 } 309 310 /* 311 * Set parity map parameters; used both to alter parameters on the fly and to 312 * establish their initial values. Note that setting a parameter to 0 means 313 * to leave the previous setting unchanged, and that if this is done for the 314 * initial setting of "regions", then a default value will be computed based 315 * on the RAID component size. 316 */ 317 int 318 rf_paritymap_set_params(struct rf_paritymap *pm, 319 const struct rf_pmparams *params, int todisk) 320 { 321 int cooldown, tickms; 322 u_int regions; 323 RF_RowCol_t col; 324 RF_ComponentLabel_t *clabel; 325 RF_Raid_t *raidPtr; 326 327 cooldown = params->cooldown != 0 328 ? params->cooldown : pm->params.cooldown; 329 tickms = params->tickms != 0 330 ? params->tickms : pm->params.tickms; 331 regions = params->regions != 0 332 ? params->regions : pm->params.regions; 333 334 if (cooldown < 1 || cooldown > 128) { 335 printf("raid%d: cooldown %d out of range\n", pm->raid->raidid, 336 cooldown); 337 return (-1); 338 } 339 if (tickms < 10) { 340 printf("raid%d: tick time %dms out of range\n", 341 pm->raid->raidid, tickms); 342 return (-1); 343 } 344 if (regions == 0) { 345 regions = rf_paritymap_nreg(pm->raid); 346 } else if (regions > RF_PARITYMAP_NREG) { 347 printf("raid%d: region count %u too large (more than %u)\n", 348 pm->raid->raidid, regions, RF_PARITYMAP_NREG); 349 return (-1); 350 } 351 352 /* XXX any currently warm parity will be used with the new tickms! */ 353 pm->params.cooldown = cooldown; 354 pm->params.tickms = tickms; 355 /* Apply the initial region count, but do not change it after that. */ 356 if (pm->params.regions == 0) 357 pm->params.regions = regions; 358 359 /* So that the newly set parameters can be tested: */ 360 pm->ctrs.nwrite = pm->ctrs.ncachesync = pm->ctrs.nclearing = 0; 361 362 if (todisk) { 363 raidPtr = pm->raid; 364 for (col = 0; col < raidPtr->numCol; col++) { 365 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 366 continue; 367 368 clabel = raidget_component_label(raidPtr, col); 369 clabel->parity_map_ntick = cooldown; 370 clabel->parity_map_tickms = tickms; 371 clabel->parity_map_regions = regions; 372 373 /* Don't touch the disk if it's been spared */ 374 if (clabel->status == rf_ds_spared) 375 continue; 376 377 raidflush_component_label(raidPtr, col); 378 } 379 380 /* handle the spares too... */ 381 for (col = 0; col < raidPtr->numSpare; col++) { 382 if (raidPtr->Disks[raidPtr->numCol+col].status == rf_ds_used_spare) { 383 clabel = raidget_component_label(raidPtr, raidPtr->numCol+col); 384 clabel->parity_map_ntick = cooldown; 385 clabel->parity_map_tickms = tickms; 386 clabel->parity_map_regions = regions; 387 raidflush_component_label(raidPtr, raidPtr->numCol+col); 388 } 389 } 390 } 391 return 0; 392 } 393 394 /* 395 * The number of regions may not be as many as can fit into the map, because 396 * when regions are too small, the overhead of setting parity map bits 397 * becomes significant in comparison to the actual I/O, while the 398 * corresponding gains in parity verification time become negligible. Thus, 399 * a minimum region size (defined above) is imposed. 400 * 401 * Note that, if the number of regions is less than the maximum, then some of 402 * the regions will be "fictional", corresponding to no actual disk; some 403 * parts of the code may process them as normal, but they can not ever be 404 * written to. 405 */ 406 static u_int 407 rf_paritymap_nreg(RF_Raid_t *raid) 408 { 409 daddr_t bytes_per_disk, nreg; 410 411 bytes_per_disk = raid->sectorsPerDisk << raid->logBytesPerSector; 412 nreg = bytes_per_disk / REGION_MINSIZE; 413 if (nreg > RF_PARITYMAP_NREG) 414 nreg = RF_PARITYMAP_NREG; 415 if (nreg < 1) 416 nreg = 1; 417 418 return (u_int)nreg; 419 } 420 421 /* 422 * Initialize a parity map given specific parameters. This neither reads nor 423 * writes the parity map config in the component labels; for that, see below. 424 */ 425 int 426 rf_paritymap_init(struct rf_paritymap *pm, RF_Raid_t *raid, 427 const struct rf_pmparams *params) 428 { 429 daddr_t rstripes; 430 struct rf_pmparams safe; 431 432 pm->raid = raid; 433 pm->params.regions = 0; 434 if (0 != rf_paritymap_set_params(pm, params, 0)) { 435 /* 436 * If the parameters are out-of-range, then bring the 437 * parity map up with something reasonable, so that 438 * the admin can at least go and fix it (or ignore it 439 * entirely). 440 */ 441 safe.cooldown = DFL_COOLDOWN; 442 safe.tickms = DFL_TICKMS; 443 safe.regions = 0; 444 445 if (0 != rf_paritymap_set_params(pm, &safe, 0)) 446 return (-1); 447 } 448 449 rstripes = howmany(raid->Layout.numStripe, pm->params.regions); 450 pm->region_size = rstripes * raid->Layout.dataSectorsPerStripe; 451 452 callout_init(&pm->ticker, CALLOUT_MPSAFE); 453 callout_setfunc(&pm->ticker, rf_paritymap_tick, pm); 454 pm->flags = 0; 455 456 pm->disk_boot = kmem_alloc(sizeof(struct rf_paritymap_ondisk), 457 KM_SLEEP); 458 pm->disk_now = kmem_alloc(sizeof(struct rf_paritymap_ondisk), 459 KM_SLEEP); 460 pm->current = kmem_zalloc(sizeof(struct rf_paritymap_current), 461 KM_SLEEP); 462 463 rf_paritymap_kern_read(pm->raid, pm->disk_boot); 464 memcpy(pm->disk_now, pm->disk_boot, sizeof(*pm->disk_now)); 465 466 mutex_init(&pm->lock, MUTEX_DEFAULT, IPL_NONE); 467 mutex_init(&pm->lk_flags, MUTEX_DEFAULT, IPL_SOFTCLOCK); 468 469 return 0; 470 } 471 472 /* 473 * Destroys a parity map; unless "force" is set, also cleans parity for any 474 * regions which were still in cooldown (but are not dirty on disk). 475 */ 476 void 477 rf_paritymap_destroy(struct rf_paritymap *pm, int force) 478 { 479 int i; 480 481 callout_halt(&pm->ticker, NULL); /* XXX stop? halt? */ 482 callout_destroy(&pm->ticker); 483 484 if (!force) { 485 for (i = 0; i < RF_PARITYMAP_NREG; i++) { 486 /* XXX check for > 0 ? */ 487 if (pm->current->state[i] < 0) 488 pm->current->state[i] = 0; 489 } 490 491 rf_paritymap_write_locked(pm); 492 } 493 494 mutex_destroy(&pm->lock); 495 mutex_destroy(&pm->lk_flags); 496 497 kmem_free(pm->disk_boot, sizeof(struct rf_paritymap_ondisk)); 498 kmem_free(pm->disk_now, sizeof(struct rf_paritymap_ondisk)); 499 kmem_free(pm->current, sizeof(struct rf_paritymap_current)); 500 } 501 502 /* 503 * Rewrite parity, taking parity map into account; this is the equivalent of 504 * the old rf_RewriteParity, and is likewise to be called from a suitable 505 * thread and shouldn't have multiple copies running in parallel and so on. 506 * 507 * Note that the fictional regions are "cleaned" in one shot, so that very 508 * small RAIDs (useful for testing) will not experience potentially severe 509 * regressions in rewrite time. 510 */ 511 int 512 rf_paritymap_rewrite(struct rf_paritymap *pm) 513 { 514 int i, ret_val = 0; 515 daddr_t reg_b, reg_e; 516 517 /* Process only the actual regions. */ 518 for (i = 0; i < pm->params.regions; i++) { 519 mutex_enter(&pm->lock); 520 if (isset(pm->disk_boot->bits, i)) { 521 mutex_exit(&pm->lock); 522 523 reg_b = i * pm->region_size; 524 reg_e = reg_b + pm->region_size; 525 if (reg_e > pm->raid->totalSectors) 526 reg_e = pm->raid->totalSectors; 527 528 if (rf_RewriteParityRange(pm->raid, reg_b, 529 reg_e - reg_b)) { 530 ret_val = 1; 531 if (pm->raid->waitShutdown) 532 return ret_val; 533 } else { 534 mutex_enter(&pm->lock); 535 clrbit(pm->disk_boot->bits, i); 536 rf_paritymap_write_locked(pm); 537 mutex_exit(&pm->lock); 538 } 539 } else { 540 mutex_exit(&pm->lock); 541 } 542 } 543 544 /* Now, clear the fictional regions, if any. */ 545 rf_paritymap_forceclean(pm); 546 rf_paritymap_write(pm); 547 548 return ret_val; 549 } 550 551 /* 552 * How to merge the on-disk parity maps when reading them in from the 553 * various components; returns whether they differ. In the case that 554 * they do differ, sets *dst to the union of *dst and *src. 555 * 556 * In theory, it should be safe to take the intersection (or just pick 557 * a single component arbitrarily), but the paranoid approach costs 558 * little. 559 * 560 * Appropriate locking, if any, is the responsibility of the caller. 561 */ 562 int 563 rf_paritymap_merge(struct rf_paritymap_ondisk *dst, 564 struct rf_paritymap_ondisk *src) 565 { 566 int i, discrep = 0; 567 568 for (i = 0; i < RF_PARITYMAP_NBYTE; i++) { 569 if (dst->bits[i] != src->bits[i]) 570 discrep = 1; 571 dst->bits[i] |= src->bits[i]; 572 } 573 574 return discrep; 575 } 576 577 /* 578 * Detach a parity map from its RAID. This is not meant to be applied except 579 * when unconfiguring the RAID after all I/O has been resolved, as otherwise 580 * an out-of-date parity map could be treated as current. 581 */ 582 void 583 rf_paritymap_detach(RF_Raid_t *raidPtr) 584 { 585 if (raidPtr->parity_map == NULL) 586 return; 587 588 rf_lock_mutex2(raidPtr->iodone_lock); 589 struct rf_paritymap *pm = raidPtr->parity_map; 590 raidPtr->parity_map = NULL; 591 rf_unlock_mutex2(raidPtr->iodone_lock); 592 /* XXXjld is that enough locking? Or too much? */ 593 rf_paritymap_destroy(pm, 0); 594 kmem_free(pm, sizeof(*pm)); 595 } 596 597 /* 598 * Is this RAID set ineligible for parity-map use due to not actually 599 * having any parity? (If so, rf_paritymap_attach is a no-op, but 600 * rf_paritymap_{get,set}_disable will still pointlessly act on the 601 * component labels.) 602 */ 603 int 604 rf_paritymap_ineligible(RF_Raid_t *raidPtr) 605 { 606 return raidPtr->Layout.map->faultsTolerated == 0; 607 } 608 609 /* 610 * Attach a parity map to a RAID set if appropriate. Includes 611 * configure-time processing of parity-map fields of component label. 612 */ 613 void 614 rf_paritymap_attach(RF_Raid_t *raidPtr, int force) 615 { 616 RF_RowCol_t col; 617 int pm_use, pm_zap; 618 int g_tickms, g_ntick, g_regions; 619 int good; 620 RF_ComponentLabel_t *clabel; 621 u_int flags, regions; 622 struct rf_pmparams params; 623 624 if (rf_paritymap_ineligible(raidPtr)) { 625 /* There isn't any parity. */ 626 return; 627 } 628 629 pm_use = 1; 630 pm_zap = 0; 631 g_tickms = DFL_TICKMS; 632 g_ntick = DFL_COOLDOWN; 633 g_regions = 0; 634 635 /* 636 * Collect opinions on the set config. If this is the initial 637 * config (raidctl -C), treat all labels as invalid, since 638 * there may be random data present. 639 */ 640 if (!force) { 641 for (col = 0; col < raidPtr->numCol; col++) { 642 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 643 continue; 644 clabel = raidget_component_label(raidPtr, col); 645 flags = clabel->parity_map_flags; 646 /* Check for use by non-parity-map kernel. */ 647 if (clabel->parity_map_modcount 648 != clabel->mod_counter) { 649 flags &= ~RF_PMLABEL_WASUSED; 650 } 651 652 if (flags & RF_PMLABEL_VALID) { 653 g_tickms = clabel->parity_map_tickms; 654 g_ntick = clabel->parity_map_ntick; 655 regions = clabel->parity_map_regions; 656 if (g_regions == 0) 657 g_regions = regions; 658 else if (g_regions != regions) { 659 pm_zap = 1; /* important! */ 660 } 661 662 if (flags & RF_PMLABEL_DISABLE) { 663 pm_use = 0; 664 } 665 if (!(flags & RF_PMLABEL_WASUSED)) { 666 pm_zap = 1; 667 } 668 } else { 669 pm_zap = 1; 670 } 671 } 672 } else { 673 pm_zap = 1; 674 } 675 676 /* Finally, create and attach the parity map. */ 677 if (pm_use) { 678 params.cooldown = g_ntick; 679 params.tickms = g_tickms; 680 params.regions = g_regions; 681 682 raidPtr->parity_map = kmem_alloc(sizeof(struct rf_paritymap), 683 KM_SLEEP); 684 if (0 != rf_paritymap_init(raidPtr->parity_map, raidPtr, 685 ¶ms)) { 686 /* It failed; do without. */ 687 kmem_free(raidPtr->parity_map, 688 sizeof(struct rf_paritymap)); 689 raidPtr->parity_map = NULL; 690 return; 691 } 692 693 if (g_regions == 0) 694 /* Pick up the autoconfigured region count. */ 695 g_regions = raidPtr->parity_map->params.regions; 696 697 if (pm_zap) { 698 good = raidPtr->parity_good && !force; 699 700 if (good) 701 rf_paritymap_forceclean(raidPtr->parity_map); 702 else 703 rf_paritymap_invalidate(raidPtr->parity_map); 704 /* This needs to be on disk before WASUSED is set. */ 705 rf_paritymap_write(raidPtr->parity_map); 706 } 707 } 708 709 /* Alter labels in-core to reflect the current view of things. */ 710 for (col = 0; col < raidPtr->numCol; col++) { 711 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 712 continue; 713 clabel = raidget_component_label(raidPtr, col); 714 715 if (pm_use) 716 flags = RF_PMLABEL_VALID | RF_PMLABEL_WASUSED; 717 else 718 flags = RF_PMLABEL_VALID | RF_PMLABEL_DISABLE; 719 720 clabel->parity_map_flags = flags; 721 clabel->parity_map_tickms = g_tickms; 722 clabel->parity_map_ntick = g_ntick; 723 clabel->parity_map_regions = g_regions; 724 raidflush_component_label(raidPtr, col); 725 } 726 /* Note that we're just in 'attach' here, and there won't 727 be any spare disks at this point. */ 728 } 729 730 /* 731 * For initializing the parity-map fields of a component label, both on 732 * initial creation and on reconstruct/copyback/etc. */ 733 void 734 rf_paritymap_init_label(struct rf_paritymap *pm, RF_ComponentLabel_t *clabel) 735 { 736 if (pm != NULL) { 737 clabel->parity_map_flags = 738 RF_PMLABEL_VALID | RF_PMLABEL_WASUSED; 739 clabel->parity_map_tickms = pm->params.tickms; 740 clabel->parity_map_ntick = pm->params.cooldown; 741 /* 742 * XXXjld: If the number of regions is changed on disk, and 743 * then a new component is labeled before the next configure, 744 * then it will get the old value and they will conflict on 745 * the next boot (and the default will be used instead). 746 */ 747 clabel->parity_map_regions = pm->params.regions; 748 } else { 749 /* 750 * XXXjld: if the map is disabled, and all the components are 751 * replaced without an intervening unconfigure/reconfigure, 752 * then it will become enabled on the next unconfig/reconfig. 753 */ 754 } 755 } 756 757 758 /* Will the parity map be disabled next time? */ 759 int 760 rf_paritymap_get_disable(RF_Raid_t *raidPtr) 761 { 762 RF_ComponentLabel_t *clabel; 763 RF_RowCol_t col; 764 int dis; 765 766 dis = 0; 767 for (col = 0; col < raidPtr->numCol; col++) { 768 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 769 continue; 770 clabel = raidget_component_label(raidPtr, col); 771 if (clabel->parity_map_flags & RF_PMLABEL_DISABLE) 772 dis = 1; 773 } 774 for (col = 0; col < raidPtr->numSpare; col++) { 775 if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare) 776 continue; 777 clabel = raidget_component_label(raidPtr, raidPtr->numCol+col); 778 if (clabel->parity_map_flags & RF_PMLABEL_DISABLE) 779 dis = 1; 780 } 781 782 return dis; 783 } 784 785 /* Set whether the parity map will be disabled next time. */ 786 void 787 rf_paritymap_set_disable(RF_Raid_t *raidPtr, int dis) 788 { 789 RF_ComponentLabel_t *clabel; 790 RF_RowCol_t col; 791 792 for (col = 0; col < raidPtr->numCol; col++) { 793 if (RF_DEAD_DISK(raidPtr->Disks[col].status)) 794 continue; 795 clabel = raidget_component_label(raidPtr, col); 796 if (dis) 797 clabel->parity_map_flags |= RF_PMLABEL_DISABLE; 798 else 799 clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE; 800 raidflush_component_label(raidPtr, col); 801 } 802 803 /* update any used spares as well */ 804 for (col = 0; col < raidPtr->numSpare; col++) { 805 if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare) 806 continue; 807 808 clabel = raidget_component_label(raidPtr, raidPtr->numCol+col); 809 if (dis) 810 clabel->parity_map_flags |= RF_PMLABEL_DISABLE; 811 else 812 clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE; 813 raidflush_component_label(raidPtr, raidPtr->numCol+col); 814 } 815 } 816