1 /* $NetBSD: rf_netbsdkintf.c,v 1.312 2014/07/25 08:10:38 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.312 2014/07/25 08:10:38 dholland Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_compat_netbsd.h" 108 #include "opt_raid_autoconfig.h" 109 #endif 110 111 #include <sys/param.h> 112 #include <sys/errno.h> 113 #include <sys/pool.h> 114 #include <sys/proc.h> 115 #include <sys/queue.h> 116 #include <sys/disk.h> 117 #include <sys/device.h> 118 #include <sys/stat.h> 119 #include <sys/ioctl.h> 120 #include <sys/fcntl.h> 121 #include <sys/systm.h> 122 #include <sys/vnode.h> 123 #include <sys/disklabel.h> 124 #include <sys/conf.h> 125 #include <sys/buf.h> 126 #include <sys/bufq.h> 127 #include <sys/reboot.h> 128 #include <sys/kauth.h> 129 130 #include <prop/proplib.h> 131 132 #include <dev/raidframe/raidframevar.h> 133 #include <dev/raidframe/raidframeio.h> 134 #include <dev/raidframe/rf_paritymap.h> 135 136 #include "rf_raid.h" 137 #include "rf_copyback.h" 138 #include "rf_dag.h" 139 #include "rf_dagflags.h" 140 #include "rf_desc.h" 141 #include "rf_diskqueue.h" 142 #include "rf_etimer.h" 143 #include "rf_general.h" 144 #include "rf_kintf.h" 145 #include "rf_options.h" 146 #include "rf_driver.h" 147 #include "rf_parityscan.h" 148 #include "rf_threadstuff.h" 149 150 #ifdef COMPAT_50 151 #include "rf_compat50.h" 152 #endif 153 154 #ifdef DEBUG 155 int rf_kdebug_level = 0; 156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 157 #else /* DEBUG */ 158 #define db1_printf(a) { } 159 #endif /* DEBUG */ 160 161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 162 static rf_declare_mutex2(rf_sparet_wait_mutex); 163 static rf_declare_cond2(rf_sparet_wait_cv); 164 static rf_declare_cond2(rf_sparet_resp_cv); 165 166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 167 * spare table */ 168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 169 * installation process */ 170 #endif 171 172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 173 174 /* prototypes */ 175 static void KernelWakeupFunc(struct buf *); 176 static void InitBP(struct buf *, struct vnode *, unsigned, 177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 178 void *, int, struct proc *); 179 struct raid_softc; 180 static void raidinit(struct raid_softc *); 181 182 void raidattach(int); 183 static int raid_match(device_t, cfdata_t, void *); 184 static void raid_attach(device_t, device_t, void *); 185 static int raid_detach(device_t, int); 186 187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 188 daddr_t, daddr_t); 189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 190 daddr_t, daddr_t, int); 191 192 static int raidwrite_component_label(unsigned, 193 dev_t, struct vnode *, RF_ComponentLabel_t *); 194 static int raidread_component_label(unsigned, 195 dev_t, struct vnode *, RF_ComponentLabel_t *); 196 197 198 dev_type_open(raidopen); 199 dev_type_close(raidclose); 200 dev_type_read(raidread); 201 dev_type_write(raidwrite); 202 dev_type_ioctl(raidioctl); 203 dev_type_strategy(raidstrategy); 204 dev_type_dump(raiddump); 205 dev_type_size(raidsize); 206 207 const struct bdevsw raid_bdevsw = { 208 .d_open = raidopen, 209 .d_close = raidclose, 210 .d_strategy = raidstrategy, 211 .d_ioctl = raidioctl, 212 .d_dump = raiddump, 213 .d_psize = raidsize, 214 .d_discard = nodiscard, 215 .d_flag = D_DISK 216 }; 217 218 const struct cdevsw raid_cdevsw = { 219 .d_open = raidopen, 220 .d_close = raidclose, 221 .d_read = raidread, 222 .d_write = raidwrite, 223 .d_ioctl = raidioctl, 224 .d_stop = nostop, 225 .d_tty = notty, 226 .d_poll = nopoll, 227 .d_mmap = nommap, 228 .d_kqfilter = nokqfilter, 229 .d_discard = nodiscard, 230 .d_flag = D_DISK 231 }; 232 233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 234 235 struct raid_softc { 236 device_t sc_dev; 237 int sc_unit; 238 int sc_flags; /* flags */ 239 int sc_cflags; /* configuration flags */ 240 uint64_t sc_size; /* size of the raid device */ 241 char sc_xname[20]; /* XXX external name */ 242 struct disk sc_dkdev; /* generic disk device info */ 243 struct bufq_state *buf_queue; /* used for the device queue */ 244 RF_Raid_t sc_r; 245 LIST_ENTRY(raid_softc) sc_link; 246 }; 247 /* sc_flags */ 248 #define RAIDF_INITED 0x01 /* unit has been initialized */ 249 #define RAIDF_WLABEL 0x02 /* label area is writable */ 250 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 251 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */ 252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 253 #define RAIDF_LOCKED 0x80 /* unit is locked */ 254 255 #define raidunit(x) DISKUNIT(x) 256 257 extern struct cfdriver raid_cd; 258 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 259 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 260 DVF_DETACH_SHUTDOWN); 261 262 /* 263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 264 * Be aware that large numbers can allow the driver to consume a lot of 265 * kernel memory, especially on writes, and in degraded mode reads. 266 * 267 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 268 * a single 64K write will typically require 64K for the old data, 269 * 64K for the old parity, and 64K for the new parity, for a total 270 * of 192K (if the parity buffer is not re-used immediately). 271 * Even it if is used immediately, that's still 128K, which when multiplied 272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 273 * 274 * Now in degraded mode, for example, a 64K read on the above setup may 275 * require data reconstruction, which will require *all* of the 4 remaining 276 * disks to participate -- 4 * 32K/disk == 128K again. 277 */ 278 279 #ifndef RAIDOUTSTANDING 280 #define RAIDOUTSTANDING 6 281 #endif 282 283 #define RAIDLABELDEV(dev) \ 284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 285 286 /* declared here, and made public, for the benefit of KVM stuff.. */ 287 288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 289 struct disklabel *); 290 static void raidgetdisklabel(dev_t); 291 static void raidmakedisklabel(struct raid_softc *); 292 293 static int raidlock(struct raid_softc *); 294 static void raidunlock(struct raid_softc *); 295 296 static int raid_detach_unlocked(struct raid_softc *); 297 298 static void rf_markalldirty(RF_Raid_t *); 299 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); 300 301 void rf_ReconThread(struct rf_recon_req *); 302 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 303 void rf_CopybackThread(RF_Raid_t *raidPtr); 304 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 305 int rf_autoconfig(device_t); 306 void rf_buildroothack(RF_ConfigSet_t *); 307 308 RF_AutoConfig_t *rf_find_raid_components(void); 309 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 310 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 311 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t); 312 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 313 int rf_set_autoconfig(RF_Raid_t *, int); 314 int rf_set_rootpartition(RF_Raid_t *, int); 315 void rf_release_all_vps(RF_ConfigSet_t *); 316 void rf_cleanup_config_set(RF_ConfigSet_t *); 317 int rf_have_enough_components(RF_ConfigSet_t *); 318 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); 319 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 320 321 /* 322 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 323 * Note that this is overridden by having RAID_AUTOCONFIG as an option 324 * in the kernel config file. 325 */ 326 #ifdef RAID_AUTOCONFIG 327 int raidautoconfig = 1; 328 #else 329 int raidautoconfig = 0; 330 #endif 331 static bool raidautoconfigdone = false; 332 333 struct RF_Pools_s rf_pools; 334 335 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); 336 static kmutex_t raid_lock; 337 338 static struct raid_softc * 339 raidcreate(int unit) { 340 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 341 if (sc == NULL) { 342 #ifdef DIAGNOSTIC 343 printf("%s: out of memory\n", __func__); 344 #endif 345 return NULL; 346 } 347 sc->sc_unit = unit; 348 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK); 349 return sc; 350 } 351 352 static void 353 raiddestroy(struct raid_softc *sc) { 354 bufq_free(sc->buf_queue); 355 kmem_free(sc, sizeof(*sc)); 356 } 357 358 static struct raid_softc * 359 raidget(int unit) { 360 struct raid_softc *sc; 361 if (unit < 0) { 362 #ifdef DIAGNOSTIC 363 panic("%s: unit %d!", __func__, unit); 364 #endif 365 return NULL; 366 } 367 mutex_enter(&raid_lock); 368 LIST_FOREACH(sc, &raids, sc_link) { 369 if (sc->sc_unit == unit) { 370 mutex_exit(&raid_lock); 371 return sc; 372 } 373 } 374 mutex_exit(&raid_lock); 375 if ((sc = raidcreate(unit)) == NULL) 376 return NULL; 377 mutex_enter(&raid_lock); 378 LIST_INSERT_HEAD(&raids, sc, sc_link); 379 mutex_exit(&raid_lock); 380 return sc; 381 } 382 383 static void 384 raidput(struct raid_softc *sc) { 385 mutex_enter(&raid_lock); 386 LIST_REMOVE(sc, sc_link); 387 mutex_exit(&raid_lock); 388 raiddestroy(sc); 389 } 390 391 void 392 raidattach(int num) 393 { 394 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); 395 /* This is where all the initialization stuff gets done. */ 396 397 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 398 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 399 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 400 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 401 402 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 403 #endif 404 405 if (rf_BootRaidframe() == 0) 406 aprint_verbose("Kernelized RAIDframe activated\n"); 407 else 408 panic("Serious error booting RAID!!"); 409 410 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 411 aprint_error("raidattach: config_cfattach_attach failed?\n"); 412 } 413 414 raidautoconfigdone = false; 415 416 /* 417 * Register a finalizer which will be used to auto-config RAID 418 * sets once all real hardware devices have been found. 419 */ 420 if (config_finalize_register(NULL, rf_autoconfig) != 0) 421 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 422 } 423 424 int 425 rf_autoconfig(device_t self) 426 { 427 RF_AutoConfig_t *ac_list; 428 RF_ConfigSet_t *config_sets; 429 430 if (!raidautoconfig || raidautoconfigdone == true) 431 return (0); 432 433 /* XXX This code can only be run once. */ 434 raidautoconfigdone = true; 435 436 #ifdef __HAVE_CPU_BOOTCONF 437 /* 438 * 0. find the boot device if needed first so we can use it later 439 * this needs to be done before we autoconfigure any raid sets, 440 * because if we use wedges we are not going to be able to open 441 * the boot device later 442 */ 443 if (booted_device == NULL) 444 cpu_bootconf(); 445 #endif 446 /* 1. locate all RAID components on the system */ 447 aprint_debug("Searching for RAID components...\n"); 448 ac_list = rf_find_raid_components(); 449 450 /* 2. Sort them into their respective sets. */ 451 config_sets = rf_create_auto_sets(ac_list); 452 453 /* 454 * 3. Evaluate each set and configure the valid ones. 455 * This gets done in rf_buildroothack(). 456 */ 457 rf_buildroothack(config_sets); 458 459 return 1; 460 } 461 462 static int 463 rf_containsboot(RF_Raid_t *r, device_t bdv) { 464 const char *bootname = device_xname(bdv); 465 size_t len = strlen(bootname); 466 467 for (int col = 0; col < r->numCol; col++) { 468 const char *devname = r->Disks[col].devname; 469 devname += sizeof("/dev/") - 1; 470 if (strncmp(devname, "dk", 2) == 0) { 471 const char *parent = 472 dkwedge_get_parent_name(r->Disks[col].dev); 473 if (parent != NULL) 474 devname = parent; 475 } 476 if (strncmp(devname, bootname, len) == 0) { 477 struct raid_softc *sc = r->softc; 478 aprint_debug("raid%d includes boot device %s\n", 479 sc->sc_unit, devname); 480 return 1; 481 } 482 } 483 return 0; 484 } 485 486 void 487 rf_buildroothack(RF_ConfigSet_t *config_sets) 488 { 489 RF_ConfigSet_t *cset; 490 RF_ConfigSet_t *next_cset; 491 int num_root; 492 struct raid_softc *sc, *rsc; 493 494 sc = rsc = NULL; 495 num_root = 0; 496 cset = config_sets; 497 while (cset != NULL) { 498 next_cset = cset->next; 499 if (rf_have_enough_components(cset) && 500 cset->ac->clabel->autoconfigure == 1) { 501 sc = rf_auto_config_set(cset); 502 if (sc != NULL) { 503 aprint_debug("raid%d: configured ok\n", 504 sc->sc_unit); 505 if (cset->rootable) { 506 rsc = sc; 507 num_root++; 508 } 509 } else { 510 /* The autoconfig didn't work :( */ 511 aprint_debug("Autoconfig failed\n"); 512 rf_release_all_vps(cset); 513 } 514 } else { 515 /* we're not autoconfiguring this set... 516 release the associated resources */ 517 rf_release_all_vps(cset); 518 } 519 /* cleanup */ 520 rf_cleanup_config_set(cset); 521 cset = next_cset; 522 } 523 524 /* if the user has specified what the root device should be 525 then we don't touch booted_device or boothowto... */ 526 527 if (rootspec != NULL) 528 return; 529 530 /* we found something bootable... */ 531 532 /* 533 * XXX: The following code assumes that the root raid 534 * is the first ('a') partition. This is about the best 535 * we can do with a BSD disklabel, but we might be able 536 * to do better with a GPT label, by setting a specified 537 * attribute to indicate the root partition. We can then 538 * stash the partition number in the r->root_partition 539 * high bits (the bottom 2 bits are already used). For 540 * now we just set booted_partition to 0 when we override 541 * root. 542 */ 543 if (num_root == 1) { 544 device_t candidate_root; 545 if (rsc->sc_dkdev.dk_nwedges != 0) { 546 char cname[sizeof(cset->ac->devname)]; 547 /* XXX: assume 'a' */ 548 snprintf(cname, sizeof(cname), "%s%c", 549 device_xname(rsc->sc_dev), 'a'); 550 candidate_root = dkwedge_find_by_wname(cname); 551 } else 552 candidate_root = rsc->sc_dev; 553 if (booted_device == NULL || 554 rsc->sc_r.root_partition == 1 || 555 rf_containsboot(&rsc->sc_r, booted_device)) { 556 booted_device = candidate_root; 557 booted_partition = 0; /* XXX assume 'a' */ 558 } 559 } else if (num_root > 1) { 560 561 /* 562 * Maybe the MD code can help. If it cannot, then 563 * setroot() will discover that we have no 564 * booted_device and will ask the user if nothing was 565 * hardwired in the kernel config file 566 */ 567 if (booted_device == NULL) 568 return; 569 570 num_root = 0; 571 mutex_enter(&raid_lock); 572 LIST_FOREACH(sc, &raids, sc_link) { 573 RF_Raid_t *r = &sc->sc_r; 574 if (r->valid == 0) 575 continue; 576 577 if (r->root_partition == 0) 578 continue; 579 580 if (rf_containsboot(r, booted_device)) { 581 num_root++; 582 rsc = sc; 583 } 584 } 585 mutex_exit(&raid_lock); 586 587 if (num_root == 1) { 588 booted_device = rsc->sc_dev; 589 booted_partition = 0; /* XXX assume 'a' */ 590 } else { 591 /* we can't guess.. require the user to answer... */ 592 boothowto |= RB_ASKNAME; 593 } 594 } 595 } 596 597 598 int 599 raidsize(dev_t dev) 600 { 601 struct raid_softc *rs; 602 struct disklabel *lp; 603 int part, unit, omask, size; 604 605 unit = raidunit(dev); 606 if ((rs = raidget(unit)) == NULL) 607 return -1; 608 if ((rs->sc_flags & RAIDF_INITED) == 0) 609 return (-1); 610 611 part = DISKPART(dev); 612 omask = rs->sc_dkdev.dk_openmask & (1 << part); 613 lp = rs->sc_dkdev.dk_label; 614 615 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 616 return (-1); 617 618 if (lp->d_partitions[part].p_fstype != FS_SWAP) 619 size = -1; 620 else 621 size = lp->d_partitions[part].p_size * 622 (lp->d_secsize / DEV_BSIZE); 623 624 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 625 return (-1); 626 627 return (size); 628 629 } 630 631 int 632 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 633 { 634 int unit = raidunit(dev); 635 struct raid_softc *rs; 636 const struct bdevsw *bdev; 637 struct disklabel *lp; 638 RF_Raid_t *raidPtr; 639 daddr_t offset; 640 int part, c, sparecol, j, scol, dumpto; 641 int error = 0; 642 643 if ((rs = raidget(unit)) == NULL) 644 return ENXIO; 645 646 raidPtr = &rs->sc_r; 647 648 if ((rs->sc_flags & RAIDF_INITED) == 0) 649 return ENXIO; 650 651 /* we only support dumping to RAID 1 sets */ 652 if (raidPtr->Layout.numDataCol != 1 || 653 raidPtr->Layout.numParityCol != 1) 654 return EINVAL; 655 656 657 if ((error = raidlock(rs)) != 0) 658 return error; 659 660 if (size % DEV_BSIZE != 0) { 661 error = EINVAL; 662 goto out; 663 } 664 665 if (blkno + size / DEV_BSIZE > rs->sc_size) { 666 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 667 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 668 size / DEV_BSIZE, rs->sc_size); 669 error = EINVAL; 670 goto out; 671 } 672 673 part = DISKPART(dev); 674 lp = rs->sc_dkdev.dk_label; 675 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 676 677 /* figure out what device is alive.. */ 678 679 /* 680 Look for a component to dump to. The preference for the 681 component to dump to is as follows: 682 1) the master 683 2) a used_spare of the master 684 3) the slave 685 4) a used_spare of the slave 686 */ 687 688 dumpto = -1; 689 for (c = 0; c < raidPtr->numCol; c++) { 690 if (raidPtr->Disks[c].status == rf_ds_optimal) { 691 /* this might be the one */ 692 dumpto = c; 693 break; 694 } 695 } 696 697 /* 698 At this point we have possibly selected a live master or a 699 live slave. We now check to see if there is a spared 700 master (or a spared slave), if we didn't find a live master 701 or a live slave. 702 */ 703 704 for (c = 0; c < raidPtr->numSpare; c++) { 705 sparecol = raidPtr->numCol + c; 706 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 707 /* How about this one? */ 708 scol = -1; 709 for(j=0;j<raidPtr->numCol;j++) { 710 if (raidPtr->Disks[j].spareCol == sparecol) { 711 scol = j; 712 break; 713 } 714 } 715 if (scol == 0) { 716 /* 717 We must have found a spared master! 718 We'll take that over anything else 719 found so far. (We couldn't have 720 found a real master before, since 721 this is a used spare, and it's 722 saying that it's replacing the 723 master.) On reboot (with 724 autoconfiguration turned on) 725 sparecol will become the 1st 726 component (component0) of this set. 727 */ 728 dumpto = sparecol; 729 break; 730 } else if (scol != -1) { 731 /* 732 Must be a spared slave. We'll dump 733 to that if we havn't found anything 734 else so far. 735 */ 736 if (dumpto == -1) 737 dumpto = sparecol; 738 } 739 } 740 } 741 742 if (dumpto == -1) { 743 /* we couldn't find any live components to dump to!?!? 744 */ 745 error = EINVAL; 746 goto out; 747 } 748 749 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 750 751 /* 752 Note that blkno is relative to this particular partition. 753 By adding the offset of this partition in the RAID 754 set, and also adding RF_PROTECTED_SECTORS, we get a 755 value that is relative to the partition used for the 756 underlying component. 757 */ 758 759 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 760 blkno + offset, va, size); 761 762 out: 763 raidunlock(rs); 764 765 return error; 766 } 767 /* ARGSUSED */ 768 int 769 raidopen(dev_t dev, int flags, int fmt, 770 struct lwp *l) 771 { 772 int unit = raidunit(dev); 773 struct raid_softc *rs; 774 struct disklabel *lp; 775 int part, pmask; 776 int error = 0; 777 778 if ((rs = raidget(unit)) == NULL) 779 return ENXIO; 780 if ((error = raidlock(rs)) != 0) 781 return (error); 782 783 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 784 error = EBUSY; 785 goto bad; 786 } 787 788 lp = rs->sc_dkdev.dk_label; 789 790 part = DISKPART(dev); 791 792 /* 793 * If there are wedges, and this is not RAW_PART, then we 794 * need to fail. 795 */ 796 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 797 error = EBUSY; 798 goto bad; 799 } 800 pmask = (1 << part); 801 802 if ((rs->sc_flags & RAIDF_INITED) && 803 (rs->sc_dkdev.dk_openmask == 0)) 804 raidgetdisklabel(dev); 805 806 /* make sure that this partition exists */ 807 808 if (part != RAW_PART) { 809 if (((rs->sc_flags & RAIDF_INITED) == 0) || 810 ((part >= lp->d_npartitions) || 811 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 812 error = ENXIO; 813 goto bad; 814 } 815 } 816 /* Prevent this unit from being unconfigured while open. */ 817 switch (fmt) { 818 case S_IFCHR: 819 rs->sc_dkdev.dk_copenmask |= pmask; 820 break; 821 822 case S_IFBLK: 823 rs->sc_dkdev.dk_bopenmask |= pmask; 824 break; 825 } 826 827 if ((rs->sc_dkdev.dk_openmask == 0) && 828 ((rs->sc_flags & RAIDF_INITED) != 0)) { 829 /* First one... mark things as dirty... Note that we *MUST* 830 have done a configure before this. I DO NOT WANT TO BE 831 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 832 THAT THEY BELONG TOGETHER!!!!! */ 833 /* XXX should check to see if we're only open for reading 834 here... If so, we needn't do this, but then need some 835 other way of keeping track of what's happened.. */ 836 837 rf_markalldirty(&rs->sc_r); 838 } 839 840 841 rs->sc_dkdev.dk_openmask = 842 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 843 844 bad: 845 raidunlock(rs); 846 847 return (error); 848 849 850 } 851 /* ARGSUSED */ 852 int 853 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 854 { 855 int unit = raidunit(dev); 856 struct raid_softc *rs; 857 int error = 0; 858 int part; 859 860 if ((rs = raidget(unit)) == NULL) 861 return ENXIO; 862 863 if ((error = raidlock(rs)) != 0) 864 return (error); 865 866 part = DISKPART(dev); 867 868 /* ...that much closer to allowing unconfiguration... */ 869 switch (fmt) { 870 case S_IFCHR: 871 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 872 break; 873 874 case S_IFBLK: 875 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 876 break; 877 } 878 rs->sc_dkdev.dk_openmask = 879 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 880 881 if ((rs->sc_dkdev.dk_openmask == 0) && 882 ((rs->sc_flags & RAIDF_INITED) != 0)) { 883 /* Last one... device is not unconfigured yet. 884 Device shutdown has taken care of setting the 885 clean bits if RAIDF_INITED is not set 886 mark things as clean... */ 887 888 rf_update_component_labels(&rs->sc_r, 889 RF_FINAL_COMPONENT_UPDATE); 890 891 /* If the kernel is shutting down, it will detach 892 * this RAID set soon enough. 893 */ 894 } 895 896 raidunlock(rs); 897 return (0); 898 899 } 900 901 void 902 raidstrategy(struct buf *bp) 903 { 904 unsigned int unit = raidunit(bp->b_dev); 905 RF_Raid_t *raidPtr; 906 int wlabel; 907 struct raid_softc *rs; 908 909 if ((rs = raidget(unit)) == NULL) { 910 bp->b_error = ENXIO; 911 goto done; 912 } 913 if ((rs->sc_flags & RAIDF_INITED) == 0) { 914 bp->b_error = ENXIO; 915 goto done; 916 } 917 raidPtr = &rs->sc_r; 918 if (!raidPtr->valid) { 919 bp->b_error = ENODEV; 920 goto done; 921 } 922 if (bp->b_bcount == 0) { 923 db1_printf(("b_bcount is zero..\n")); 924 goto done; 925 } 926 927 /* 928 * Do bounds checking and adjust transfer. If there's an 929 * error, the bounds check will flag that for us. 930 */ 931 932 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 933 if (DISKPART(bp->b_dev) == RAW_PART) { 934 uint64_t size; /* device size in DEV_BSIZE unit */ 935 936 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 937 size = raidPtr->totalSectors << 938 (raidPtr->logBytesPerSector - DEV_BSHIFT); 939 } else { 940 size = raidPtr->totalSectors >> 941 (DEV_BSHIFT - raidPtr->logBytesPerSector); 942 } 943 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 944 goto done; 945 } 946 } else { 947 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 948 db1_printf(("Bounds check failed!!:%d %d\n", 949 (int) bp->b_blkno, (int) wlabel)); 950 goto done; 951 } 952 } 953 954 rf_lock_mutex2(raidPtr->iodone_lock); 955 956 bp->b_resid = 0; 957 958 /* stuff it onto our queue */ 959 bufq_put(rs->buf_queue, bp); 960 961 /* scheduled the IO to happen at the next convenient time */ 962 rf_signal_cond2(raidPtr->iodone_cv); 963 rf_unlock_mutex2(raidPtr->iodone_lock); 964 965 return; 966 967 done: 968 bp->b_resid = bp->b_bcount; 969 biodone(bp); 970 } 971 /* ARGSUSED */ 972 int 973 raidread(dev_t dev, struct uio *uio, int flags) 974 { 975 int unit = raidunit(dev); 976 struct raid_softc *rs; 977 978 if ((rs = raidget(unit)) == NULL) 979 return ENXIO; 980 981 if ((rs->sc_flags & RAIDF_INITED) == 0) 982 return (ENXIO); 983 984 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 985 986 } 987 /* ARGSUSED */ 988 int 989 raidwrite(dev_t dev, struct uio *uio, int flags) 990 { 991 int unit = raidunit(dev); 992 struct raid_softc *rs; 993 994 if ((rs = raidget(unit)) == NULL) 995 return ENXIO; 996 997 if ((rs->sc_flags & RAIDF_INITED) == 0) 998 return (ENXIO); 999 1000 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 1001 1002 } 1003 1004 static int 1005 raid_detach_unlocked(struct raid_softc *rs) 1006 { 1007 int error; 1008 RF_Raid_t *raidPtr; 1009 1010 raidPtr = &rs->sc_r; 1011 1012 /* 1013 * If somebody has a partition mounted, we shouldn't 1014 * shutdown. 1015 */ 1016 if (rs->sc_dkdev.dk_openmask != 0) 1017 return EBUSY; 1018 1019 if ((rs->sc_flags & RAIDF_INITED) == 0) 1020 ; /* not initialized: nothing to do */ 1021 else if ((error = rf_Shutdown(raidPtr)) != 0) 1022 return error; 1023 else 1024 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN); 1025 1026 /* Detach the disk. */ 1027 dkwedge_delall(&rs->sc_dkdev); 1028 disk_detach(&rs->sc_dkdev); 1029 disk_destroy(&rs->sc_dkdev); 1030 1031 aprint_normal_dev(rs->sc_dev, "detached\n"); 1032 1033 return 0; 1034 } 1035 1036 int 1037 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1038 { 1039 int unit = raidunit(dev); 1040 int error = 0; 1041 int part, pmask, s; 1042 cfdata_t cf; 1043 struct raid_softc *rs; 1044 RF_Config_t *k_cfg, *u_cfg; 1045 RF_Raid_t *raidPtr; 1046 RF_RaidDisk_t *diskPtr; 1047 RF_AccTotals_t *totals; 1048 RF_DeviceConfig_t *d_cfg, **ucfgp; 1049 u_char *specific_buf; 1050 int retcode = 0; 1051 int column; 1052 /* int raidid; */ 1053 struct rf_recon_req *rrcopy, *rr; 1054 RF_ComponentLabel_t *clabel; 1055 RF_ComponentLabel_t *ci_label; 1056 RF_ComponentLabel_t **clabel_ptr; 1057 RF_SingleComponent_t *sparePtr,*componentPtr; 1058 RF_SingleComponent_t component; 1059 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 1060 int i, j, d; 1061 #ifdef __HAVE_OLD_DISKLABEL 1062 struct disklabel newlabel; 1063 #endif 1064 struct dkwedge_info *dkw; 1065 1066 if ((rs = raidget(unit)) == NULL) 1067 return ENXIO; 1068 raidPtr = &rs->sc_r; 1069 1070 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1071 (int) DISKPART(dev), (int) unit, cmd)); 1072 1073 /* Must be open for writes for these commands... */ 1074 switch (cmd) { 1075 #ifdef DIOCGSECTORSIZE 1076 case DIOCGSECTORSIZE: 1077 *(u_int *)data = raidPtr->bytesPerSector; 1078 return 0; 1079 case DIOCGMEDIASIZE: 1080 *(off_t *)data = 1081 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1082 return 0; 1083 #endif 1084 case DIOCSDINFO: 1085 case DIOCWDINFO: 1086 #ifdef __HAVE_OLD_DISKLABEL 1087 case ODIOCWDINFO: 1088 case ODIOCSDINFO: 1089 #endif 1090 case DIOCWLABEL: 1091 case DIOCAWEDGE: 1092 case DIOCDWEDGE: 1093 case DIOCSSTRATEGY: 1094 if ((flag & FWRITE) == 0) 1095 return (EBADF); 1096 } 1097 1098 /* Must be initialized for these... */ 1099 switch (cmd) { 1100 case DIOCGDINFO: 1101 case DIOCSDINFO: 1102 case DIOCWDINFO: 1103 #ifdef __HAVE_OLD_DISKLABEL 1104 case ODIOCGDINFO: 1105 case ODIOCWDINFO: 1106 case ODIOCSDINFO: 1107 case ODIOCGDEFLABEL: 1108 #endif 1109 case DIOCGPART: 1110 case DIOCWLABEL: 1111 case DIOCGDEFLABEL: 1112 case DIOCAWEDGE: 1113 case DIOCDWEDGE: 1114 case DIOCLWEDGES: 1115 case DIOCCACHESYNC: 1116 case RAIDFRAME_SHUTDOWN: 1117 case RAIDFRAME_REWRITEPARITY: 1118 case RAIDFRAME_GET_INFO: 1119 case RAIDFRAME_RESET_ACCTOTALS: 1120 case RAIDFRAME_GET_ACCTOTALS: 1121 case RAIDFRAME_KEEP_ACCTOTALS: 1122 case RAIDFRAME_GET_SIZE: 1123 case RAIDFRAME_FAIL_DISK: 1124 case RAIDFRAME_COPYBACK: 1125 case RAIDFRAME_CHECK_RECON_STATUS: 1126 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1127 case RAIDFRAME_GET_COMPONENT_LABEL: 1128 case RAIDFRAME_SET_COMPONENT_LABEL: 1129 case RAIDFRAME_ADD_HOT_SPARE: 1130 case RAIDFRAME_REMOVE_HOT_SPARE: 1131 case RAIDFRAME_INIT_LABELS: 1132 case RAIDFRAME_REBUILD_IN_PLACE: 1133 case RAIDFRAME_CHECK_PARITY: 1134 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1135 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1136 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1137 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1138 case RAIDFRAME_SET_AUTOCONFIG: 1139 case RAIDFRAME_SET_ROOT: 1140 case RAIDFRAME_DELETE_COMPONENT: 1141 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1142 case RAIDFRAME_PARITYMAP_STATUS: 1143 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1144 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1145 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1146 case DIOCGSTRATEGY: 1147 case DIOCSSTRATEGY: 1148 if ((rs->sc_flags & RAIDF_INITED) == 0) 1149 return (ENXIO); 1150 } 1151 1152 switch (cmd) { 1153 #ifdef COMPAT_50 1154 case RAIDFRAME_GET_INFO50: 1155 return rf_get_info50(raidPtr, data); 1156 1157 case RAIDFRAME_CONFIGURE50: 1158 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1159 return retcode; 1160 goto config; 1161 #endif 1162 /* configure the system */ 1163 case RAIDFRAME_CONFIGURE: 1164 1165 if (raidPtr->valid) { 1166 /* There is a valid RAID set running on this unit! */ 1167 printf("raid%d: Device already configured!\n",unit); 1168 return(EINVAL); 1169 } 1170 1171 /* copy-in the configuration information */ 1172 /* data points to a pointer to the configuration structure */ 1173 1174 u_cfg = *((RF_Config_t **) data); 1175 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1176 if (k_cfg == NULL) { 1177 return (ENOMEM); 1178 } 1179 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1180 if (retcode) { 1181 RF_Free(k_cfg, sizeof(RF_Config_t)); 1182 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1183 retcode)); 1184 return (retcode); 1185 } 1186 goto config; 1187 config: 1188 /* allocate a buffer for the layout-specific data, and copy it 1189 * in */ 1190 if (k_cfg->layoutSpecificSize) { 1191 if (k_cfg->layoutSpecificSize > 10000) { 1192 /* sanity check */ 1193 RF_Free(k_cfg, sizeof(RF_Config_t)); 1194 return (EINVAL); 1195 } 1196 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1197 (u_char *)); 1198 if (specific_buf == NULL) { 1199 RF_Free(k_cfg, sizeof(RF_Config_t)); 1200 return (ENOMEM); 1201 } 1202 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1203 k_cfg->layoutSpecificSize); 1204 if (retcode) { 1205 RF_Free(k_cfg, sizeof(RF_Config_t)); 1206 RF_Free(specific_buf, 1207 k_cfg->layoutSpecificSize); 1208 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1209 retcode)); 1210 return (retcode); 1211 } 1212 } else 1213 specific_buf = NULL; 1214 k_cfg->layoutSpecific = specific_buf; 1215 1216 /* should do some kind of sanity check on the configuration. 1217 * Store the sum of all the bytes in the last byte? */ 1218 1219 /* configure the system */ 1220 1221 /* 1222 * Clear the entire RAID descriptor, just to make sure 1223 * there is no stale data left in the case of a 1224 * reconfiguration 1225 */ 1226 memset(raidPtr, 0, sizeof(*raidPtr)); 1227 raidPtr->softc = rs; 1228 raidPtr->raidid = unit; 1229 1230 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1231 1232 if (retcode == 0) { 1233 1234 /* allow this many simultaneous IO's to 1235 this RAID device */ 1236 raidPtr->openings = RAIDOUTSTANDING; 1237 1238 raidinit(rs); 1239 rf_markalldirty(raidPtr); 1240 } 1241 /* free the buffers. No return code here. */ 1242 if (k_cfg->layoutSpecificSize) { 1243 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1244 } 1245 RF_Free(k_cfg, sizeof(RF_Config_t)); 1246 1247 return (retcode); 1248 1249 /* shutdown the system */ 1250 case RAIDFRAME_SHUTDOWN: 1251 1252 part = DISKPART(dev); 1253 pmask = (1 << part); 1254 1255 if ((error = raidlock(rs)) != 0) 1256 return (error); 1257 1258 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1259 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1260 (rs->sc_dkdev.dk_copenmask & pmask))) 1261 retcode = EBUSY; 1262 else { 1263 rs->sc_flags |= RAIDF_SHUTDOWN; 1264 rs->sc_dkdev.dk_copenmask &= ~pmask; 1265 rs->sc_dkdev.dk_bopenmask &= ~pmask; 1266 rs->sc_dkdev.dk_openmask &= ~pmask; 1267 retcode = 0; 1268 } 1269 1270 raidunlock(rs); 1271 1272 if (retcode != 0) 1273 return retcode; 1274 1275 /* free the pseudo device attach bits */ 1276 1277 cf = device_cfdata(rs->sc_dev); 1278 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0) 1279 free(cf, M_RAIDFRAME); 1280 1281 return (retcode); 1282 case RAIDFRAME_GET_COMPONENT_LABEL: 1283 clabel_ptr = (RF_ComponentLabel_t **) data; 1284 /* need to read the component label for the disk indicated 1285 by row,column in clabel */ 1286 1287 /* 1288 * Perhaps there should be an option to skip the in-core 1289 * copy and hit the disk, as with disklabel(8). 1290 */ 1291 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *)); 1292 1293 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel)); 1294 1295 if (retcode) { 1296 RF_Free(clabel, sizeof(*clabel)); 1297 return retcode; 1298 } 1299 1300 clabel->row = 0; /* Don't allow looking at anything else.*/ 1301 1302 column = clabel->column; 1303 1304 if ((column < 0) || (column >= raidPtr->numCol + 1305 raidPtr->numSpare)) { 1306 RF_Free(clabel, sizeof(*clabel)); 1307 return EINVAL; 1308 } 1309 1310 RF_Free(clabel, sizeof(*clabel)); 1311 1312 clabel = raidget_component_label(raidPtr, column); 1313 1314 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr)); 1315 1316 #if 0 1317 case RAIDFRAME_SET_COMPONENT_LABEL: 1318 clabel = (RF_ComponentLabel_t *) data; 1319 1320 /* XXX check the label for valid stuff... */ 1321 /* Note that some things *should not* get modified -- 1322 the user should be re-initing the labels instead of 1323 trying to patch things. 1324 */ 1325 1326 raidid = raidPtr->raidid; 1327 #ifdef DEBUG 1328 printf("raid%d: Got component label:\n", raidid); 1329 printf("raid%d: Version: %d\n", raidid, clabel->version); 1330 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1331 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1332 printf("raid%d: Column: %d\n", raidid, clabel->column); 1333 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1334 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1335 printf("raid%d: Status: %d\n", raidid, clabel->status); 1336 #endif 1337 clabel->row = 0; 1338 column = clabel->column; 1339 1340 if ((column < 0) || (column >= raidPtr->numCol)) { 1341 return(EINVAL); 1342 } 1343 1344 /* XXX this isn't allowed to do anything for now :-) */ 1345 1346 /* XXX and before it is, we need to fill in the rest 1347 of the fields!?!?!?! */ 1348 memcpy(raidget_component_label(raidPtr, column), 1349 clabel, sizeof(*clabel)); 1350 raidflush_component_label(raidPtr, column); 1351 return (0); 1352 #endif 1353 1354 case RAIDFRAME_INIT_LABELS: 1355 clabel = (RF_ComponentLabel_t *) data; 1356 /* 1357 we only want the serial number from 1358 the above. We get all the rest of the information 1359 from the config that was used to create this RAID 1360 set. 1361 */ 1362 1363 raidPtr->serial_number = clabel->serial_number; 1364 1365 for(column=0;column<raidPtr->numCol;column++) { 1366 diskPtr = &raidPtr->Disks[column]; 1367 if (!RF_DEAD_DISK(diskPtr->status)) { 1368 ci_label = raidget_component_label(raidPtr, 1369 column); 1370 /* Zeroing this is important. */ 1371 memset(ci_label, 0, sizeof(*ci_label)); 1372 raid_init_component_label(raidPtr, ci_label); 1373 ci_label->serial_number = 1374 raidPtr->serial_number; 1375 ci_label->row = 0; /* we dont' pretend to support more */ 1376 rf_component_label_set_partitionsize(ci_label, 1377 diskPtr->partitionSize); 1378 ci_label->column = column; 1379 raidflush_component_label(raidPtr, column); 1380 } 1381 /* XXXjld what about the spares? */ 1382 } 1383 1384 return (retcode); 1385 case RAIDFRAME_SET_AUTOCONFIG: 1386 d = rf_set_autoconfig(raidPtr, *(int *) data); 1387 printf("raid%d: New autoconfig value is: %d\n", 1388 raidPtr->raidid, d); 1389 *(int *) data = d; 1390 return (retcode); 1391 1392 case RAIDFRAME_SET_ROOT: 1393 d = rf_set_rootpartition(raidPtr, *(int *) data); 1394 printf("raid%d: New rootpartition value is: %d\n", 1395 raidPtr->raidid, d); 1396 *(int *) data = d; 1397 return (retcode); 1398 1399 /* initialize all parity */ 1400 case RAIDFRAME_REWRITEPARITY: 1401 1402 if (raidPtr->Layout.map->faultsTolerated == 0) { 1403 /* Parity for RAID 0 is trivially correct */ 1404 raidPtr->parity_good = RF_RAID_CLEAN; 1405 return(0); 1406 } 1407 1408 if (raidPtr->parity_rewrite_in_progress == 1) { 1409 /* Re-write is already in progress! */ 1410 return(EINVAL); 1411 } 1412 1413 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1414 rf_RewriteParityThread, 1415 raidPtr,"raid_parity"); 1416 return (retcode); 1417 1418 1419 case RAIDFRAME_ADD_HOT_SPARE: 1420 sparePtr = (RF_SingleComponent_t *) data; 1421 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1422 retcode = rf_add_hot_spare(raidPtr, &component); 1423 return(retcode); 1424 1425 case RAIDFRAME_REMOVE_HOT_SPARE: 1426 return(retcode); 1427 1428 case RAIDFRAME_DELETE_COMPONENT: 1429 componentPtr = (RF_SingleComponent_t *)data; 1430 memcpy( &component, componentPtr, 1431 sizeof(RF_SingleComponent_t)); 1432 retcode = rf_delete_component(raidPtr, &component); 1433 return(retcode); 1434 1435 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1436 componentPtr = (RF_SingleComponent_t *)data; 1437 memcpy( &component, componentPtr, 1438 sizeof(RF_SingleComponent_t)); 1439 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1440 return(retcode); 1441 1442 case RAIDFRAME_REBUILD_IN_PLACE: 1443 1444 if (raidPtr->Layout.map->faultsTolerated == 0) { 1445 /* Can't do this on a RAID 0!! */ 1446 return(EINVAL); 1447 } 1448 1449 if (raidPtr->recon_in_progress == 1) { 1450 /* a reconstruct is already in progress! */ 1451 return(EINVAL); 1452 } 1453 1454 componentPtr = (RF_SingleComponent_t *) data; 1455 memcpy( &component, componentPtr, 1456 sizeof(RF_SingleComponent_t)); 1457 component.row = 0; /* we don't support any more */ 1458 column = component.column; 1459 1460 if ((column < 0) || (column >= raidPtr->numCol)) { 1461 return(EINVAL); 1462 } 1463 1464 rf_lock_mutex2(raidPtr->mutex); 1465 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1466 (raidPtr->numFailures > 0)) { 1467 /* XXX 0 above shouldn't be constant!!! */ 1468 /* some component other than this has failed. 1469 Let's not make things worse than they already 1470 are... */ 1471 printf("raid%d: Unable to reconstruct to disk at:\n", 1472 raidPtr->raidid); 1473 printf("raid%d: Col: %d Too many failures.\n", 1474 raidPtr->raidid, column); 1475 rf_unlock_mutex2(raidPtr->mutex); 1476 return (EINVAL); 1477 } 1478 if (raidPtr->Disks[column].status == 1479 rf_ds_reconstructing) { 1480 printf("raid%d: Unable to reconstruct to disk at:\n", 1481 raidPtr->raidid); 1482 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column); 1483 1484 rf_unlock_mutex2(raidPtr->mutex); 1485 return (EINVAL); 1486 } 1487 if (raidPtr->Disks[column].status == rf_ds_spared) { 1488 rf_unlock_mutex2(raidPtr->mutex); 1489 return (EINVAL); 1490 } 1491 rf_unlock_mutex2(raidPtr->mutex); 1492 1493 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1494 if (rrcopy == NULL) 1495 return(ENOMEM); 1496 1497 rrcopy->raidPtr = (void *) raidPtr; 1498 rrcopy->col = column; 1499 1500 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1501 rf_ReconstructInPlaceThread, 1502 rrcopy,"raid_reconip"); 1503 return(retcode); 1504 1505 case RAIDFRAME_GET_INFO: 1506 if (!raidPtr->valid) 1507 return (ENODEV); 1508 ucfgp = (RF_DeviceConfig_t **) data; 1509 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1510 (RF_DeviceConfig_t *)); 1511 if (d_cfg == NULL) 1512 return (ENOMEM); 1513 d_cfg->rows = 1; /* there is only 1 row now */ 1514 d_cfg->cols = raidPtr->numCol; 1515 d_cfg->ndevs = raidPtr->numCol; 1516 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1517 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1518 return (ENOMEM); 1519 } 1520 d_cfg->nspares = raidPtr->numSpare; 1521 if (d_cfg->nspares >= RF_MAX_DISKS) { 1522 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1523 return (ENOMEM); 1524 } 1525 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1526 d = 0; 1527 for (j = 0; j < d_cfg->cols; j++) { 1528 d_cfg->devs[d] = raidPtr->Disks[j]; 1529 d++; 1530 } 1531 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1532 d_cfg->spares[i] = raidPtr->Disks[j]; 1533 } 1534 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1535 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1536 1537 return (retcode); 1538 1539 case RAIDFRAME_CHECK_PARITY: 1540 *(int *) data = raidPtr->parity_good; 1541 return (0); 1542 1543 case RAIDFRAME_PARITYMAP_STATUS: 1544 if (rf_paritymap_ineligible(raidPtr)) 1545 return EINVAL; 1546 rf_paritymap_status(raidPtr->parity_map, 1547 (struct rf_pmstat *)data); 1548 return 0; 1549 1550 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1551 if (rf_paritymap_ineligible(raidPtr)) 1552 return EINVAL; 1553 if (raidPtr->parity_map == NULL) 1554 return ENOENT; /* ??? */ 1555 if (0 != rf_paritymap_set_params(raidPtr->parity_map, 1556 (struct rf_pmparams *)data, 1)) 1557 return EINVAL; 1558 return 0; 1559 1560 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1561 if (rf_paritymap_ineligible(raidPtr)) 1562 return EINVAL; 1563 *(int *) data = rf_paritymap_get_disable(raidPtr); 1564 return 0; 1565 1566 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1567 if (rf_paritymap_ineligible(raidPtr)) 1568 return EINVAL; 1569 rf_paritymap_set_disable(raidPtr, *(int *)data); 1570 /* XXX should errors be passed up? */ 1571 return 0; 1572 1573 case RAIDFRAME_RESET_ACCTOTALS: 1574 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1575 return (0); 1576 1577 case RAIDFRAME_GET_ACCTOTALS: 1578 totals = (RF_AccTotals_t *) data; 1579 *totals = raidPtr->acc_totals; 1580 return (0); 1581 1582 case RAIDFRAME_KEEP_ACCTOTALS: 1583 raidPtr->keep_acc_totals = *(int *)data; 1584 return (0); 1585 1586 case RAIDFRAME_GET_SIZE: 1587 *(int *) data = raidPtr->totalSectors; 1588 return (0); 1589 1590 /* fail a disk & optionally start reconstruction */ 1591 case RAIDFRAME_FAIL_DISK: 1592 1593 if (raidPtr->Layout.map->faultsTolerated == 0) { 1594 /* Can't do this on a RAID 0!! */ 1595 return(EINVAL); 1596 } 1597 1598 rr = (struct rf_recon_req *) data; 1599 rr->row = 0; 1600 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1601 return (EINVAL); 1602 1603 1604 rf_lock_mutex2(raidPtr->mutex); 1605 if (raidPtr->status == rf_rs_reconstructing) { 1606 /* you can't fail a disk while we're reconstructing! */ 1607 /* XXX wrong for RAID6 */ 1608 rf_unlock_mutex2(raidPtr->mutex); 1609 return (EINVAL); 1610 } 1611 if ((raidPtr->Disks[rr->col].status == 1612 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1613 /* some other component has failed. Let's not make 1614 things worse. XXX wrong for RAID6 */ 1615 rf_unlock_mutex2(raidPtr->mutex); 1616 return (EINVAL); 1617 } 1618 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1619 /* Can't fail a spared disk! */ 1620 rf_unlock_mutex2(raidPtr->mutex); 1621 return (EINVAL); 1622 } 1623 rf_unlock_mutex2(raidPtr->mutex); 1624 1625 /* make a copy of the recon request so that we don't rely on 1626 * the user's buffer */ 1627 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1628 if (rrcopy == NULL) 1629 return(ENOMEM); 1630 memcpy(rrcopy, rr, sizeof(*rr)); 1631 rrcopy->raidPtr = (void *) raidPtr; 1632 1633 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1634 rf_ReconThread, 1635 rrcopy,"raid_recon"); 1636 return (0); 1637 1638 /* invoke a copyback operation after recon on whatever disk 1639 * needs it, if any */ 1640 case RAIDFRAME_COPYBACK: 1641 1642 if (raidPtr->Layout.map->faultsTolerated == 0) { 1643 /* This makes no sense on a RAID 0!! */ 1644 return(EINVAL); 1645 } 1646 1647 if (raidPtr->copyback_in_progress == 1) { 1648 /* Copyback is already in progress! */ 1649 return(EINVAL); 1650 } 1651 1652 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1653 rf_CopybackThread, 1654 raidPtr,"raid_copyback"); 1655 return (retcode); 1656 1657 /* return the percentage completion of reconstruction */ 1658 case RAIDFRAME_CHECK_RECON_STATUS: 1659 if (raidPtr->Layout.map->faultsTolerated == 0) { 1660 /* This makes no sense on a RAID 0, so tell the 1661 user it's done. */ 1662 *(int *) data = 100; 1663 return(0); 1664 } 1665 if (raidPtr->status != rf_rs_reconstructing) 1666 *(int *) data = 100; 1667 else { 1668 if (raidPtr->reconControl->numRUsTotal > 0) { 1669 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1670 } else { 1671 *(int *) data = 0; 1672 } 1673 } 1674 return (0); 1675 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1676 progressInfoPtr = (RF_ProgressInfo_t **) data; 1677 if (raidPtr->status != rf_rs_reconstructing) { 1678 progressInfo.remaining = 0; 1679 progressInfo.completed = 100; 1680 progressInfo.total = 100; 1681 } else { 1682 progressInfo.total = 1683 raidPtr->reconControl->numRUsTotal; 1684 progressInfo.completed = 1685 raidPtr->reconControl->numRUsComplete; 1686 progressInfo.remaining = progressInfo.total - 1687 progressInfo.completed; 1688 } 1689 retcode = copyout(&progressInfo, *progressInfoPtr, 1690 sizeof(RF_ProgressInfo_t)); 1691 return (retcode); 1692 1693 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1694 if (raidPtr->Layout.map->faultsTolerated == 0) { 1695 /* This makes no sense on a RAID 0, so tell the 1696 user it's done. */ 1697 *(int *) data = 100; 1698 return(0); 1699 } 1700 if (raidPtr->parity_rewrite_in_progress == 1) { 1701 *(int *) data = 100 * 1702 raidPtr->parity_rewrite_stripes_done / 1703 raidPtr->Layout.numStripe; 1704 } else { 1705 *(int *) data = 100; 1706 } 1707 return (0); 1708 1709 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1710 progressInfoPtr = (RF_ProgressInfo_t **) data; 1711 if (raidPtr->parity_rewrite_in_progress == 1) { 1712 progressInfo.total = raidPtr->Layout.numStripe; 1713 progressInfo.completed = 1714 raidPtr->parity_rewrite_stripes_done; 1715 progressInfo.remaining = progressInfo.total - 1716 progressInfo.completed; 1717 } else { 1718 progressInfo.remaining = 0; 1719 progressInfo.completed = 100; 1720 progressInfo.total = 100; 1721 } 1722 retcode = copyout(&progressInfo, *progressInfoPtr, 1723 sizeof(RF_ProgressInfo_t)); 1724 return (retcode); 1725 1726 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1727 if (raidPtr->Layout.map->faultsTolerated == 0) { 1728 /* This makes no sense on a RAID 0 */ 1729 *(int *) data = 100; 1730 return(0); 1731 } 1732 if (raidPtr->copyback_in_progress == 1) { 1733 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1734 raidPtr->Layout.numStripe; 1735 } else { 1736 *(int *) data = 100; 1737 } 1738 return (0); 1739 1740 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1741 progressInfoPtr = (RF_ProgressInfo_t **) data; 1742 if (raidPtr->copyback_in_progress == 1) { 1743 progressInfo.total = raidPtr->Layout.numStripe; 1744 progressInfo.completed = 1745 raidPtr->copyback_stripes_done; 1746 progressInfo.remaining = progressInfo.total - 1747 progressInfo.completed; 1748 } else { 1749 progressInfo.remaining = 0; 1750 progressInfo.completed = 100; 1751 progressInfo.total = 100; 1752 } 1753 retcode = copyout(&progressInfo, *progressInfoPtr, 1754 sizeof(RF_ProgressInfo_t)); 1755 return (retcode); 1756 1757 /* the sparetable daemon calls this to wait for the kernel to 1758 * need a spare table. this ioctl does not return until a 1759 * spare table is needed. XXX -- calling mpsleep here in the 1760 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1761 * -- I should either compute the spare table in the kernel, 1762 * or have a different -- XXX XXX -- interface (a different 1763 * character device) for delivering the table -- XXX */ 1764 #if 0 1765 case RAIDFRAME_SPARET_WAIT: 1766 rf_lock_mutex2(rf_sparet_wait_mutex); 1767 while (!rf_sparet_wait_queue) 1768 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1769 waitreq = rf_sparet_wait_queue; 1770 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1771 rf_unlock_mutex2(rf_sparet_wait_mutex); 1772 1773 /* structure assignment */ 1774 *((RF_SparetWait_t *) data) = *waitreq; 1775 1776 RF_Free(waitreq, sizeof(*waitreq)); 1777 return (0); 1778 1779 /* wakes up a process waiting on SPARET_WAIT and puts an error 1780 * code in it that will cause the dameon to exit */ 1781 case RAIDFRAME_ABORT_SPARET_WAIT: 1782 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1783 waitreq->fcol = -1; 1784 rf_lock_mutex2(rf_sparet_wait_mutex); 1785 waitreq->next = rf_sparet_wait_queue; 1786 rf_sparet_wait_queue = waitreq; 1787 rf_broadcast_conf2(rf_sparet_wait_cv); 1788 rf_unlock_mutex2(rf_sparet_wait_mutex); 1789 return (0); 1790 1791 /* used by the spare table daemon to deliver a spare table 1792 * into the kernel */ 1793 case RAIDFRAME_SEND_SPARET: 1794 1795 /* install the spare table */ 1796 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1797 1798 /* respond to the requestor. the return status of the spare 1799 * table installation is passed in the "fcol" field */ 1800 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1801 waitreq->fcol = retcode; 1802 rf_lock_mutex2(rf_sparet_wait_mutex); 1803 waitreq->next = rf_sparet_resp_queue; 1804 rf_sparet_resp_queue = waitreq; 1805 rf_broadcast_cond2(rf_sparet_resp_cv); 1806 rf_unlock_mutex2(rf_sparet_wait_mutex); 1807 1808 return (retcode); 1809 #endif 1810 1811 default: 1812 break; /* fall through to the os-specific code below */ 1813 1814 } 1815 1816 if (!raidPtr->valid) 1817 return (EINVAL); 1818 1819 /* 1820 * Add support for "regular" device ioctls here. 1821 */ 1822 1823 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l); 1824 if (error != EPASSTHROUGH) 1825 return (error); 1826 1827 switch (cmd) { 1828 case DIOCGDINFO: 1829 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1830 break; 1831 #ifdef __HAVE_OLD_DISKLABEL 1832 case ODIOCGDINFO: 1833 newlabel = *(rs->sc_dkdev.dk_label); 1834 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1835 return ENOTTY; 1836 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1837 break; 1838 #endif 1839 1840 case DIOCGPART: 1841 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1842 ((struct partinfo *) data)->part = 1843 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1844 break; 1845 1846 case DIOCWDINFO: 1847 case DIOCSDINFO: 1848 #ifdef __HAVE_OLD_DISKLABEL 1849 case ODIOCWDINFO: 1850 case ODIOCSDINFO: 1851 #endif 1852 { 1853 struct disklabel *lp; 1854 #ifdef __HAVE_OLD_DISKLABEL 1855 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1856 memset(&newlabel, 0, sizeof newlabel); 1857 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1858 lp = &newlabel; 1859 } else 1860 #endif 1861 lp = (struct disklabel *)data; 1862 1863 if ((error = raidlock(rs)) != 0) 1864 return (error); 1865 1866 rs->sc_flags |= RAIDF_LABELLING; 1867 1868 error = setdisklabel(rs->sc_dkdev.dk_label, 1869 lp, 0, rs->sc_dkdev.dk_cpulabel); 1870 if (error == 0) { 1871 if (cmd == DIOCWDINFO 1872 #ifdef __HAVE_OLD_DISKLABEL 1873 || cmd == ODIOCWDINFO 1874 #endif 1875 ) 1876 error = writedisklabel(RAIDLABELDEV(dev), 1877 raidstrategy, rs->sc_dkdev.dk_label, 1878 rs->sc_dkdev.dk_cpulabel); 1879 } 1880 rs->sc_flags &= ~RAIDF_LABELLING; 1881 1882 raidunlock(rs); 1883 1884 if (error) 1885 return (error); 1886 break; 1887 } 1888 1889 case DIOCWLABEL: 1890 if (*(int *) data != 0) 1891 rs->sc_flags |= RAIDF_WLABEL; 1892 else 1893 rs->sc_flags &= ~RAIDF_WLABEL; 1894 break; 1895 1896 case DIOCGDEFLABEL: 1897 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1898 break; 1899 1900 #ifdef __HAVE_OLD_DISKLABEL 1901 case ODIOCGDEFLABEL: 1902 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1903 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1904 return ENOTTY; 1905 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1906 break; 1907 #endif 1908 1909 case DIOCAWEDGE: 1910 case DIOCDWEDGE: 1911 dkw = (void *)data; 1912 1913 /* If the ioctl happens here, the parent is us. */ 1914 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1915 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1916 1917 case DIOCLWEDGES: 1918 return dkwedge_list(&rs->sc_dkdev, 1919 (struct dkwedge_list *)data, l); 1920 case DIOCCACHESYNC: 1921 return rf_sync_component_caches(raidPtr); 1922 1923 case DIOCGSTRATEGY: 1924 { 1925 struct disk_strategy *dks = (void *)data; 1926 1927 s = splbio(); 1928 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue), 1929 sizeof(dks->dks_name)); 1930 splx(s); 1931 dks->dks_paramlen = 0; 1932 1933 return 0; 1934 } 1935 1936 case DIOCSSTRATEGY: 1937 { 1938 struct disk_strategy *dks = (void *)data; 1939 struct bufq_state *new; 1940 struct bufq_state *old; 1941 1942 if (dks->dks_param != NULL) { 1943 return EINVAL; 1944 } 1945 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */ 1946 error = bufq_alloc(&new, dks->dks_name, 1947 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK); 1948 if (error) { 1949 return error; 1950 } 1951 s = splbio(); 1952 old = rs->buf_queue; 1953 bufq_move(new, old); 1954 rs->buf_queue = new; 1955 splx(s); 1956 bufq_free(old); 1957 1958 return 0; 1959 } 1960 1961 default: 1962 retcode = ENOTTY; 1963 } 1964 return (retcode); 1965 1966 } 1967 1968 1969 /* raidinit -- complete the rest of the initialization for the 1970 RAIDframe device. */ 1971 1972 1973 static void 1974 raidinit(struct raid_softc *rs) 1975 { 1976 cfdata_t cf; 1977 int unit; 1978 RF_Raid_t *raidPtr = &rs->sc_r; 1979 1980 unit = raidPtr->raidid; 1981 1982 1983 /* XXX should check return code first... */ 1984 rs->sc_flags |= RAIDF_INITED; 1985 1986 /* XXX doesn't check bounds. */ 1987 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1988 1989 /* attach the pseudo device */ 1990 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1991 cf->cf_name = raid_cd.cd_name; 1992 cf->cf_atname = raid_cd.cd_name; 1993 cf->cf_unit = unit; 1994 cf->cf_fstate = FSTATE_STAR; 1995 1996 rs->sc_dev = config_attach_pseudo(cf); 1997 1998 if (rs->sc_dev == NULL) { 1999 printf("raid%d: config_attach_pseudo failed\n", 2000 raidPtr->raidid); 2001 rs->sc_flags &= ~RAIDF_INITED; 2002 free(cf, M_RAIDFRAME); 2003 return; 2004 } 2005 2006 /* disk_attach actually creates space for the CPU disklabel, among 2007 * other things, so it's critical to call this *BEFORE* we try putzing 2008 * with disklabels. */ 2009 2010 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 2011 disk_attach(&rs->sc_dkdev); 2012 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector); 2013 2014 /* XXX There may be a weird interaction here between this, and 2015 * protectedSectors, as used in RAIDframe. */ 2016 2017 rs->sc_size = raidPtr->totalSectors; 2018 2019 dkwedge_discover(&rs->sc_dkdev); 2020 2021 rf_set_geometry(rs, raidPtr); 2022 2023 } 2024 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 2025 /* wake up the daemon & tell it to get us a spare table 2026 * XXX 2027 * the entries in the queues should be tagged with the raidPtr 2028 * so that in the extremely rare case that two recons happen at once, 2029 * we know for which device were requesting a spare table 2030 * XXX 2031 * 2032 * XXX This code is not currently used. GO 2033 */ 2034 int 2035 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 2036 { 2037 int retcode; 2038 2039 rf_lock_mutex2(rf_sparet_wait_mutex); 2040 req->next = rf_sparet_wait_queue; 2041 rf_sparet_wait_queue = req; 2042 rf_broadcast_cond2(rf_sparet_wait_cv); 2043 2044 /* mpsleep unlocks the mutex */ 2045 while (!rf_sparet_resp_queue) { 2046 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 2047 } 2048 req = rf_sparet_resp_queue; 2049 rf_sparet_resp_queue = req->next; 2050 rf_unlock_mutex2(rf_sparet_wait_mutex); 2051 2052 retcode = req->fcol; 2053 RF_Free(req, sizeof(*req)); /* this is not the same req as we 2054 * alloc'd */ 2055 return (retcode); 2056 } 2057 #endif 2058 2059 /* a wrapper around rf_DoAccess that extracts appropriate info from the 2060 * bp & passes it down. 2061 * any calls originating in the kernel must use non-blocking I/O 2062 * do some extra sanity checking to return "appropriate" error values for 2063 * certain conditions (to make some standard utilities work) 2064 * 2065 * Formerly known as: rf_DoAccessKernel 2066 */ 2067 void 2068 raidstart(RF_Raid_t *raidPtr) 2069 { 2070 RF_SectorCount_t num_blocks, pb, sum; 2071 RF_RaidAddr_t raid_addr; 2072 struct partition *pp; 2073 daddr_t blocknum; 2074 struct raid_softc *rs; 2075 int do_async; 2076 struct buf *bp; 2077 int rc; 2078 2079 rs = raidPtr->softc; 2080 /* quick check to see if anything has died recently */ 2081 rf_lock_mutex2(raidPtr->mutex); 2082 if (raidPtr->numNewFailures > 0) { 2083 rf_unlock_mutex2(raidPtr->mutex); 2084 rf_update_component_labels(raidPtr, 2085 RF_NORMAL_COMPONENT_UPDATE); 2086 rf_lock_mutex2(raidPtr->mutex); 2087 raidPtr->numNewFailures--; 2088 } 2089 2090 /* Check to see if we're at the limit... */ 2091 while (raidPtr->openings > 0) { 2092 rf_unlock_mutex2(raidPtr->mutex); 2093 2094 /* get the next item, if any, from the queue */ 2095 if ((bp = bufq_get(rs->buf_queue)) == NULL) { 2096 /* nothing more to do */ 2097 return; 2098 } 2099 2100 /* Ok, for the bp we have here, bp->b_blkno is relative to the 2101 * partition.. Need to make it absolute to the underlying 2102 * device.. */ 2103 2104 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector; 2105 if (DISKPART(bp->b_dev) != RAW_PART) { 2106 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 2107 blocknum += pp->p_offset; 2108 } 2109 2110 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2111 (int) blocknum)); 2112 2113 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2114 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2115 2116 /* *THIS* is where we adjust what block we're going to... 2117 * but DO NOT TOUCH bp->b_blkno!!! */ 2118 raid_addr = blocknum; 2119 2120 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2121 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2122 sum = raid_addr + num_blocks + pb; 2123 if (1 || rf_debugKernelAccess) { 2124 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2125 (int) raid_addr, (int) sum, (int) num_blocks, 2126 (int) pb, (int) bp->b_resid)); 2127 } 2128 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2129 || (sum < num_blocks) || (sum < pb)) { 2130 bp->b_error = ENOSPC; 2131 bp->b_resid = bp->b_bcount; 2132 biodone(bp); 2133 rf_lock_mutex2(raidPtr->mutex); 2134 continue; 2135 } 2136 /* 2137 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2138 */ 2139 2140 if (bp->b_bcount & raidPtr->sectorMask) { 2141 bp->b_error = EINVAL; 2142 bp->b_resid = bp->b_bcount; 2143 biodone(bp); 2144 rf_lock_mutex2(raidPtr->mutex); 2145 continue; 2146 2147 } 2148 db1_printf(("Calling DoAccess..\n")); 2149 2150 2151 rf_lock_mutex2(raidPtr->mutex); 2152 raidPtr->openings--; 2153 rf_unlock_mutex2(raidPtr->mutex); 2154 2155 /* 2156 * Everything is async. 2157 */ 2158 do_async = 1; 2159 2160 disk_busy(&rs->sc_dkdev); 2161 2162 /* XXX we're still at splbio() here... do we *really* 2163 need to be? */ 2164 2165 /* don't ever condition on bp->b_flags & B_WRITE. 2166 * always condition on B_READ instead */ 2167 2168 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2169 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2170 do_async, raid_addr, num_blocks, 2171 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2172 2173 if (rc) { 2174 bp->b_error = rc; 2175 bp->b_resid = bp->b_bcount; 2176 biodone(bp); 2177 /* continue loop */ 2178 } 2179 2180 rf_lock_mutex2(raidPtr->mutex); 2181 } 2182 rf_unlock_mutex2(raidPtr->mutex); 2183 } 2184 2185 2186 2187 2188 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2189 2190 int 2191 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2192 { 2193 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2194 struct buf *bp; 2195 2196 req->queue = queue; 2197 bp = req->bp; 2198 2199 switch (req->type) { 2200 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2201 /* XXX need to do something extra here.. */ 2202 /* I'm leaving this in, as I've never actually seen it used, 2203 * and I'd like folks to report it... GO */ 2204 printf(("WAKEUP CALLED\n")); 2205 queue->numOutstanding++; 2206 2207 bp->b_flags = 0; 2208 bp->b_private = req; 2209 2210 KernelWakeupFunc(bp); 2211 break; 2212 2213 case RF_IO_TYPE_READ: 2214 case RF_IO_TYPE_WRITE: 2215 #if RF_ACC_TRACE > 0 2216 if (req->tracerec) { 2217 RF_ETIMER_START(req->tracerec->timer); 2218 } 2219 #endif 2220 InitBP(bp, queue->rf_cinfo->ci_vp, 2221 op, queue->rf_cinfo->ci_dev, 2222 req->sectorOffset, req->numSector, 2223 req->buf, KernelWakeupFunc, (void *) req, 2224 queue->raidPtr->logBytesPerSector, req->b_proc); 2225 2226 if (rf_debugKernelAccess) { 2227 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2228 (long) bp->b_blkno)); 2229 } 2230 queue->numOutstanding++; 2231 queue->last_deq_sector = req->sectorOffset; 2232 /* acc wouldn't have been let in if there were any pending 2233 * reqs at any other priority */ 2234 queue->curPriority = req->priority; 2235 2236 db1_printf(("Going for %c to unit %d col %d\n", 2237 req->type, queue->raidPtr->raidid, 2238 queue->col)); 2239 db1_printf(("sector %d count %d (%d bytes) %d\n", 2240 (int) req->sectorOffset, (int) req->numSector, 2241 (int) (req->numSector << 2242 queue->raidPtr->logBytesPerSector), 2243 (int) queue->raidPtr->logBytesPerSector)); 2244 2245 /* 2246 * XXX: drop lock here since this can block at 2247 * least with backing SCSI devices. Retake it 2248 * to minimize fuss with calling interfaces. 2249 */ 2250 2251 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2252 bdev_strategy(bp); 2253 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2254 break; 2255 2256 default: 2257 panic("bad req->type in rf_DispatchKernelIO"); 2258 } 2259 db1_printf(("Exiting from DispatchKernelIO\n")); 2260 2261 return (0); 2262 } 2263 /* this is the callback function associated with a I/O invoked from 2264 kernel code. 2265 */ 2266 static void 2267 KernelWakeupFunc(struct buf *bp) 2268 { 2269 RF_DiskQueueData_t *req = NULL; 2270 RF_DiskQueue_t *queue; 2271 2272 db1_printf(("recovering the request queue:\n")); 2273 2274 req = bp->b_private; 2275 2276 queue = (RF_DiskQueue_t *) req->queue; 2277 2278 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2279 2280 #if RF_ACC_TRACE > 0 2281 if (req->tracerec) { 2282 RF_ETIMER_STOP(req->tracerec->timer); 2283 RF_ETIMER_EVAL(req->tracerec->timer); 2284 rf_lock_mutex2(rf_tracing_mutex); 2285 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2286 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2287 req->tracerec->num_phys_ios++; 2288 rf_unlock_mutex2(rf_tracing_mutex); 2289 } 2290 #endif 2291 2292 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2293 * ballistic, and mark the component as hosed... */ 2294 2295 if (bp->b_error != 0) { 2296 /* Mark the disk as dead */ 2297 /* but only mark it once... */ 2298 /* and only if it wouldn't leave this RAID set 2299 completely broken */ 2300 if (((queue->raidPtr->Disks[queue->col].status == 2301 rf_ds_optimal) || 2302 (queue->raidPtr->Disks[queue->col].status == 2303 rf_ds_used_spare)) && 2304 (queue->raidPtr->numFailures < 2305 queue->raidPtr->Layout.map->faultsTolerated)) { 2306 printf("raid%d: IO Error. Marking %s as failed.\n", 2307 queue->raidPtr->raidid, 2308 queue->raidPtr->Disks[queue->col].devname); 2309 queue->raidPtr->Disks[queue->col].status = 2310 rf_ds_failed; 2311 queue->raidPtr->status = rf_rs_degraded; 2312 queue->raidPtr->numFailures++; 2313 queue->raidPtr->numNewFailures++; 2314 } else { /* Disk is already dead... */ 2315 /* printf("Disk already marked as dead!\n"); */ 2316 } 2317 2318 } 2319 2320 /* Fill in the error value */ 2321 req->error = bp->b_error; 2322 2323 /* Drop this one on the "finished" queue... */ 2324 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2325 2326 /* Let the raidio thread know there is work to be done. */ 2327 rf_signal_cond2(queue->raidPtr->iodone_cv); 2328 2329 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2330 } 2331 2332 2333 /* 2334 * initialize a buf structure for doing an I/O in the kernel. 2335 */ 2336 static void 2337 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2338 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2339 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2340 struct proc *b_proc) 2341 { 2342 /* bp->b_flags = B_PHYS | rw_flag; */ 2343 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2344 bp->b_oflags = 0; 2345 bp->b_cflags = 0; 2346 bp->b_bcount = numSect << logBytesPerSector; 2347 bp->b_bufsize = bp->b_bcount; 2348 bp->b_error = 0; 2349 bp->b_dev = dev; 2350 bp->b_data = bf; 2351 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2352 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2353 if (bp->b_bcount == 0) { 2354 panic("bp->b_bcount is zero in InitBP!!"); 2355 } 2356 bp->b_proc = b_proc; 2357 bp->b_iodone = cbFunc; 2358 bp->b_private = cbArg; 2359 } 2360 2361 static void 2362 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2363 struct disklabel *lp) 2364 { 2365 memset(lp, 0, sizeof(*lp)); 2366 2367 /* fabricate a label... */ 2368 lp->d_secperunit = raidPtr->totalSectors; 2369 lp->d_secsize = raidPtr->bytesPerSector; 2370 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2371 lp->d_ntracks = 4 * raidPtr->numCol; 2372 lp->d_ncylinders = raidPtr->totalSectors / 2373 (lp->d_nsectors * lp->d_ntracks); 2374 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2375 2376 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2377 lp->d_type = DTYPE_RAID; 2378 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2379 lp->d_rpm = 3600; 2380 lp->d_interleave = 1; 2381 lp->d_flags = 0; 2382 2383 lp->d_partitions[RAW_PART].p_offset = 0; 2384 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2385 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2386 lp->d_npartitions = RAW_PART + 1; 2387 2388 lp->d_magic = DISKMAGIC; 2389 lp->d_magic2 = DISKMAGIC; 2390 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2391 2392 } 2393 /* 2394 * Read the disklabel from the raid device. If one is not present, fake one 2395 * up. 2396 */ 2397 static void 2398 raidgetdisklabel(dev_t dev) 2399 { 2400 int unit = raidunit(dev); 2401 struct raid_softc *rs; 2402 const char *errstring; 2403 struct disklabel *lp; 2404 struct cpu_disklabel *clp; 2405 RF_Raid_t *raidPtr; 2406 2407 if ((rs = raidget(unit)) == NULL) 2408 return; 2409 2410 lp = rs->sc_dkdev.dk_label; 2411 clp = rs->sc_dkdev.dk_cpulabel; 2412 2413 db1_printf(("Getting the disklabel...\n")); 2414 2415 memset(clp, 0, sizeof(*clp)); 2416 2417 raidPtr = &rs->sc_r; 2418 2419 raidgetdefaultlabel(raidPtr, rs, lp); 2420 2421 /* 2422 * Call the generic disklabel extraction routine. 2423 */ 2424 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2425 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2426 if (errstring) 2427 raidmakedisklabel(rs); 2428 else { 2429 int i; 2430 struct partition *pp; 2431 2432 /* 2433 * Sanity check whether the found disklabel is valid. 2434 * 2435 * This is necessary since total size of the raid device 2436 * may vary when an interleave is changed even though exactly 2437 * same components are used, and old disklabel may used 2438 * if that is found. 2439 */ 2440 if (lp->d_secperunit != rs->sc_size) 2441 printf("raid%d: WARNING: %s: " 2442 "total sector size in disklabel (%" PRIu32 ") != " 2443 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname, 2444 lp->d_secperunit, rs->sc_size); 2445 for (i = 0; i < lp->d_npartitions; i++) { 2446 pp = &lp->d_partitions[i]; 2447 if (pp->p_offset + pp->p_size > rs->sc_size) 2448 printf("raid%d: WARNING: %s: end of partition `%c' " 2449 "exceeds the size of raid (%" PRIu64 ")\n", 2450 unit, rs->sc_xname, 'a' + i, rs->sc_size); 2451 } 2452 } 2453 2454 } 2455 /* 2456 * Take care of things one might want to take care of in the event 2457 * that a disklabel isn't present. 2458 */ 2459 static void 2460 raidmakedisklabel(struct raid_softc *rs) 2461 { 2462 struct disklabel *lp = rs->sc_dkdev.dk_label; 2463 db1_printf(("Making a label..\n")); 2464 2465 /* 2466 * For historical reasons, if there's no disklabel present 2467 * the raw partition must be marked FS_BSDFFS. 2468 */ 2469 2470 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2471 2472 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2473 2474 lp->d_checksum = dkcksum(lp); 2475 } 2476 /* 2477 * Wait interruptibly for an exclusive lock. 2478 * 2479 * XXX 2480 * Several drivers do this; it should be abstracted and made MP-safe. 2481 * (Hmm... where have we seen this warning before :-> GO ) 2482 */ 2483 static int 2484 raidlock(struct raid_softc *rs) 2485 { 2486 int error; 2487 2488 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2489 rs->sc_flags |= RAIDF_WANTED; 2490 if ((error = 2491 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2492 return (error); 2493 } 2494 rs->sc_flags |= RAIDF_LOCKED; 2495 return (0); 2496 } 2497 /* 2498 * Unlock and wake up any waiters. 2499 */ 2500 static void 2501 raidunlock(struct raid_softc *rs) 2502 { 2503 2504 rs->sc_flags &= ~RAIDF_LOCKED; 2505 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2506 rs->sc_flags &= ~RAIDF_WANTED; 2507 wakeup(rs); 2508 } 2509 } 2510 2511 2512 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2513 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2514 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2515 2516 static daddr_t 2517 rf_component_info_offset(void) 2518 { 2519 2520 return RF_COMPONENT_INFO_OFFSET; 2521 } 2522 2523 static daddr_t 2524 rf_component_info_size(unsigned secsize) 2525 { 2526 daddr_t info_size; 2527 2528 KASSERT(secsize); 2529 if (secsize > RF_COMPONENT_INFO_SIZE) 2530 info_size = secsize; 2531 else 2532 info_size = RF_COMPONENT_INFO_SIZE; 2533 2534 return info_size; 2535 } 2536 2537 static daddr_t 2538 rf_parity_map_offset(RF_Raid_t *raidPtr) 2539 { 2540 daddr_t map_offset; 2541 2542 KASSERT(raidPtr->bytesPerSector); 2543 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2544 map_offset = raidPtr->bytesPerSector; 2545 else 2546 map_offset = RF_COMPONENT_INFO_SIZE; 2547 map_offset += rf_component_info_offset(); 2548 2549 return map_offset; 2550 } 2551 2552 static daddr_t 2553 rf_parity_map_size(RF_Raid_t *raidPtr) 2554 { 2555 daddr_t map_size; 2556 2557 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2558 map_size = raidPtr->bytesPerSector; 2559 else 2560 map_size = RF_PARITY_MAP_SIZE; 2561 2562 return map_size; 2563 } 2564 2565 int 2566 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2567 { 2568 RF_ComponentLabel_t *clabel; 2569 2570 clabel = raidget_component_label(raidPtr, col); 2571 clabel->clean = RF_RAID_CLEAN; 2572 raidflush_component_label(raidPtr, col); 2573 return(0); 2574 } 2575 2576 2577 int 2578 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2579 { 2580 RF_ComponentLabel_t *clabel; 2581 2582 clabel = raidget_component_label(raidPtr, col); 2583 clabel->clean = RF_RAID_DIRTY; 2584 raidflush_component_label(raidPtr, col); 2585 return(0); 2586 } 2587 2588 int 2589 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2590 { 2591 KASSERT(raidPtr->bytesPerSector); 2592 return raidread_component_label(raidPtr->bytesPerSector, 2593 raidPtr->Disks[col].dev, 2594 raidPtr->raid_cinfo[col].ci_vp, 2595 &raidPtr->raid_cinfo[col].ci_label); 2596 } 2597 2598 RF_ComponentLabel_t * 2599 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2600 { 2601 return &raidPtr->raid_cinfo[col].ci_label; 2602 } 2603 2604 int 2605 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2606 { 2607 RF_ComponentLabel_t *label; 2608 2609 label = &raidPtr->raid_cinfo[col].ci_label; 2610 label->mod_counter = raidPtr->mod_counter; 2611 #ifndef RF_NO_PARITY_MAP 2612 label->parity_map_modcount = label->mod_counter; 2613 #endif 2614 return raidwrite_component_label(raidPtr->bytesPerSector, 2615 raidPtr->Disks[col].dev, 2616 raidPtr->raid_cinfo[col].ci_vp, label); 2617 } 2618 2619 2620 static int 2621 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2622 RF_ComponentLabel_t *clabel) 2623 { 2624 return raidread_component_area(dev, b_vp, clabel, 2625 sizeof(RF_ComponentLabel_t), 2626 rf_component_info_offset(), 2627 rf_component_info_size(secsize)); 2628 } 2629 2630 /* ARGSUSED */ 2631 static int 2632 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2633 size_t msize, daddr_t offset, daddr_t dsize) 2634 { 2635 struct buf *bp; 2636 const struct bdevsw *bdev; 2637 int error; 2638 2639 /* XXX should probably ensure that we don't try to do this if 2640 someone has changed rf_protected_sectors. */ 2641 2642 if (b_vp == NULL) { 2643 /* For whatever reason, this component is not valid. 2644 Don't try to read a component label from it. */ 2645 return(EINVAL); 2646 } 2647 2648 /* get a block of the appropriate size... */ 2649 bp = geteblk((int)dsize); 2650 bp->b_dev = dev; 2651 2652 /* get our ducks in a row for the read */ 2653 bp->b_blkno = offset / DEV_BSIZE; 2654 bp->b_bcount = dsize; 2655 bp->b_flags |= B_READ; 2656 bp->b_resid = dsize; 2657 2658 bdev = bdevsw_lookup(bp->b_dev); 2659 if (bdev == NULL) 2660 return (ENXIO); 2661 (*bdev->d_strategy)(bp); 2662 2663 error = biowait(bp); 2664 2665 if (!error) { 2666 memcpy(data, bp->b_data, msize); 2667 } 2668 2669 brelse(bp, 0); 2670 return(error); 2671 } 2672 2673 2674 static int 2675 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2676 RF_ComponentLabel_t *clabel) 2677 { 2678 return raidwrite_component_area(dev, b_vp, clabel, 2679 sizeof(RF_ComponentLabel_t), 2680 rf_component_info_offset(), 2681 rf_component_info_size(secsize), 0); 2682 } 2683 2684 /* ARGSUSED */ 2685 static int 2686 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2687 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2688 { 2689 struct buf *bp; 2690 const struct bdevsw *bdev; 2691 int error; 2692 2693 /* get a block of the appropriate size... */ 2694 bp = geteblk((int)dsize); 2695 bp->b_dev = dev; 2696 2697 /* get our ducks in a row for the write */ 2698 bp->b_blkno = offset / DEV_BSIZE; 2699 bp->b_bcount = dsize; 2700 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2701 bp->b_resid = dsize; 2702 2703 memset(bp->b_data, 0, dsize); 2704 memcpy(bp->b_data, data, msize); 2705 2706 bdev = bdevsw_lookup(bp->b_dev); 2707 if (bdev == NULL) 2708 return (ENXIO); 2709 (*bdev->d_strategy)(bp); 2710 if (asyncp) 2711 return 0; 2712 error = biowait(bp); 2713 brelse(bp, 0); 2714 if (error) { 2715 #if 1 2716 printf("Failed to write RAID component info!\n"); 2717 #endif 2718 } 2719 2720 return(error); 2721 } 2722 2723 void 2724 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2725 { 2726 int c; 2727 2728 for (c = 0; c < raidPtr->numCol; c++) { 2729 /* Skip dead disks. */ 2730 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2731 continue; 2732 /* XXXjld: what if an error occurs here? */ 2733 raidwrite_component_area(raidPtr->Disks[c].dev, 2734 raidPtr->raid_cinfo[c].ci_vp, map, 2735 RF_PARITYMAP_NBYTE, 2736 rf_parity_map_offset(raidPtr), 2737 rf_parity_map_size(raidPtr), 0); 2738 } 2739 } 2740 2741 void 2742 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2743 { 2744 struct rf_paritymap_ondisk tmp; 2745 int c,first; 2746 2747 first=1; 2748 for (c = 0; c < raidPtr->numCol; c++) { 2749 /* Skip dead disks. */ 2750 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2751 continue; 2752 raidread_component_area(raidPtr->Disks[c].dev, 2753 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2754 RF_PARITYMAP_NBYTE, 2755 rf_parity_map_offset(raidPtr), 2756 rf_parity_map_size(raidPtr)); 2757 if (first) { 2758 memcpy(map, &tmp, sizeof(*map)); 2759 first = 0; 2760 } else { 2761 rf_paritymap_merge(map, &tmp); 2762 } 2763 } 2764 } 2765 2766 void 2767 rf_markalldirty(RF_Raid_t *raidPtr) 2768 { 2769 RF_ComponentLabel_t *clabel; 2770 int sparecol; 2771 int c; 2772 int j; 2773 int scol = -1; 2774 2775 raidPtr->mod_counter++; 2776 for (c = 0; c < raidPtr->numCol; c++) { 2777 /* we don't want to touch (at all) a disk that has 2778 failed */ 2779 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2780 clabel = raidget_component_label(raidPtr, c); 2781 if (clabel->status == rf_ds_spared) { 2782 /* XXX do something special... 2783 but whatever you do, don't 2784 try to access it!! */ 2785 } else { 2786 raidmarkdirty(raidPtr, c); 2787 } 2788 } 2789 } 2790 2791 for( c = 0; c < raidPtr->numSpare ; c++) { 2792 sparecol = raidPtr->numCol + c; 2793 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2794 /* 2795 2796 we claim this disk is "optimal" if it's 2797 rf_ds_used_spare, as that means it should be 2798 directly substitutable for the disk it replaced. 2799 We note that too... 2800 2801 */ 2802 2803 for(j=0;j<raidPtr->numCol;j++) { 2804 if (raidPtr->Disks[j].spareCol == sparecol) { 2805 scol = j; 2806 break; 2807 } 2808 } 2809 2810 clabel = raidget_component_label(raidPtr, sparecol); 2811 /* make sure status is noted */ 2812 2813 raid_init_component_label(raidPtr, clabel); 2814 2815 clabel->row = 0; 2816 clabel->column = scol; 2817 /* Note: we *don't* change status from rf_ds_used_spare 2818 to rf_ds_optimal */ 2819 /* clabel.status = rf_ds_optimal; */ 2820 2821 raidmarkdirty(raidPtr, sparecol); 2822 } 2823 } 2824 } 2825 2826 2827 void 2828 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2829 { 2830 RF_ComponentLabel_t *clabel; 2831 int sparecol; 2832 int c; 2833 int j; 2834 int scol; 2835 2836 scol = -1; 2837 2838 /* XXX should do extra checks to make sure things really are clean, 2839 rather than blindly setting the clean bit... */ 2840 2841 raidPtr->mod_counter++; 2842 2843 for (c = 0; c < raidPtr->numCol; c++) { 2844 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2845 clabel = raidget_component_label(raidPtr, c); 2846 /* make sure status is noted */ 2847 clabel->status = rf_ds_optimal; 2848 2849 /* note what unit we are configured as */ 2850 clabel->last_unit = raidPtr->raidid; 2851 2852 raidflush_component_label(raidPtr, c); 2853 if (final == RF_FINAL_COMPONENT_UPDATE) { 2854 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2855 raidmarkclean(raidPtr, c); 2856 } 2857 } 2858 } 2859 /* else we don't touch it.. */ 2860 } 2861 2862 for( c = 0; c < raidPtr->numSpare ; c++) { 2863 sparecol = raidPtr->numCol + c; 2864 /* Need to ensure that the reconstruct actually completed! */ 2865 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2866 /* 2867 2868 we claim this disk is "optimal" if it's 2869 rf_ds_used_spare, as that means it should be 2870 directly substitutable for the disk it replaced. 2871 We note that too... 2872 2873 */ 2874 2875 for(j=0;j<raidPtr->numCol;j++) { 2876 if (raidPtr->Disks[j].spareCol == sparecol) { 2877 scol = j; 2878 break; 2879 } 2880 } 2881 2882 /* XXX shouldn't *really* need this... */ 2883 clabel = raidget_component_label(raidPtr, sparecol); 2884 /* make sure status is noted */ 2885 2886 raid_init_component_label(raidPtr, clabel); 2887 2888 clabel->column = scol; 2889 clabel->status = rf_ds_optimal; 2890 clabel->last_unit = raidPtr->raidid; 2891 2892 raidflush_component_label(raidPtr, sparecol); 2893 if (final == RF_FINAL_COMPONENT_UPDATE) { 2894 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2895 raidmarkclean(raidPtr, sparecol); 2896 } 2897 } 2898 } 2899 } 2900 } 2901 2902 void 2903 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2904 { 2905 2906 if (vp != NULL) { 2907 if (auto_configured == 1) { 2908 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2909 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2910 vput(vp); 2911 2912 } else { 2913 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2914 } 2915 } 2916 } 2917 2918 2919 void 2920 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2921 { 2922 int r,c; 2923 struct vnode *vp; 2924 int acd; 2925 2926 2927 /* We take this opportunity to close the vnodes like we should.. */ 2928 2929 for (c = 0; c < raidPtr->numCol; c++) { 2930 vp = raidPtr->raid_cinfo[c].ci_vp; 2931 acd = raidPtr->Disks[c].auto_configured; 2932 rf_close_component(raidPtr, vp, acd); 2933 raidPtr->raid_cinfo[c].ci_vp = NULL; 2934 raidPtr->Disks[c].auto_configured = 0; 2935 } 2936 2937 for (r = 0; r < raidPtr->numSpare; r++) { 2938 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2939 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2940 rf_close_component(raidPtr, vp, acd); 2941 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2942 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2943 } 2944 } 2945 2946 2947 void 2948 rf_ReconThread(struct rf_recon_req *req) 2949 { 2950 int s; 2951 RF_Raid_t *raidPtr; 2952 2953 s = splbio(); 2954 raidPtr = (RF_Raid_t *) req->raidPtr; 2955 raidPtr->recon_in_progress = 1; 2956 2957 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2958 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2959 2960 RF_Free(req, sizeof(*req)); 2961 2962 raidPtr->recon_in_progress = 0; 2963 splx(s); 2964 2965 /* That's all... */ 2966 kthread_exit(0); /* does not return */ 2967 } 2968 2969 void 2970 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2971 { 2972 int retcode; 2973 int s; 2974 2975 raidPtr->parity_rewrite_stripes_done = 0; 2976 raidPtr->parity_rewrite_in_progress = 1; 2977 s = splbio(); 2978 retcode = rf_RewriteParity(raidPtr); 2979 splx(s); 2980 if (retcode) { 2981 printf("raid%d: Error re-writing parity (%d)!\n", 2982 raidPtr->raidid, retcode); 2983 } else { 2984 /* set the clean bit! If we shutdown correctly, 2985 the clean bit on each component label will get 2986 set */ 2987 raidPtr->parity_good = RF_RAID_CLEAN; 2988 } 2989 raidPtr->parity_rewrite_in_progress = 0; 2990 2991 /* Anyone waiting for us to stop? If so, inform them... */ 2992 if (raidPtr->waitShutdown) { 2993 wakeup(&raidPtr->parity_rewrite_in_progress); 2994 } 2995 2996 /* That's all... */ 2997 kthread_exit(0); /* does not return */ 2998 } 2999 3000 3001 void 3002 rf_CopybackThread(RF_Raid_t *raidPtr) 3003 { 3004 int s; 3005 3006 raidPtr->copyback_in_progress = 1; 3007 s = splbio(); 3008 rf_CopybackReconstructedData(raidPtr); 3009 splx(s); 3010 raidPtr->copyback_in_progress = 0; 3011 3012 /* That's all... */ 3013 kthread_exit(0); /* does not return */ 3014 } 3015 3016 3017 void 3018 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 3019 { 3020 int s; 3021 RF_Raid_t *raidPtr; 3022 3023 s = splbio(); 3024 raidPtr = req->raidPtr; 3025 raidPtr->recon_in_progress = 1; 3026 rf_ReconstructInPlace(raidPtr, req->col); 3027 RF_Free(req, sizeof(*req)); 3028 raidPtr->recon_in_progress = 0; 3029 splx(s); 3030 3031 /* That's all... */ 3032 kthread_exit(0); /* does not return */ 3033 } 3034 3035 static RF_AutoConfig_t * 3036 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 3037 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 3038 unsigned secsize) 3039 { 3040 int good_one = 0; 3041 RF_ComponentLabel_t *clabel; 3042 RF_AutoConfig_t *ac; 3043 3044 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 3045 if (clabel == NULL) { 3046 oomem: 3047 while(ac_list) { 3048 ac = ac_list; 3049 if (ac->clabel) 3050 free(ac->clabel, M_RAIDFRAME); 3051 ac_list = ac_list->next; 3052 free(ac, M_RAIDFRAME); 3053 } 3054 printf("RAID auto config: out of memory!\n"); 3055 return NULL; /* XXX probably should panic? */ 3056 } 3057 3058 if (!raidread_component_label(secsize, dev, vp, clabel)) { 3059 /* Got the label. Does it look reasonable? */ 3060 if (rf_reasonable_label(clabel, numsecs) && 3061 (rf_component_label_partitionsize(clabel) <= size)) { 3062 #ifdef DEBUG 3063 printf("Component on: %s: %llu\n", 3064 cname, (unsigned long long)size); 3065 rf_print_component_label(clabel); 3066 #endif 3067 /* if it's reasonable, add it, else ignore it. */ 3068 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 3069 M_NOWAIT); 3070 if (ac == NULL) { 3071 free(clabel, M_RAIDFRAME); 3072 goto oomem; 3073 } 3074 strlcpy(ac->devname, cname, sizeof(ac->devname)); 3075 ac->dev = dev; 3076 ac->vp = vp; 3077 ac->clabel = clabel; 3078 ac->next = ac_list; 3079 ac_list = ac; 3080 good_one = 1; 3081 } 3082 } 3083 if (!good_one) { 3084 /* cleanup */ 3085 free(clabel, M_RAIDFRAME); 3086 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3087 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3088 vput(vp); 3089 } 3090 return ac_list; 3091 } 3092 3093 RF_AutoConfig_t * 3094 rf_find_raid_components(void) 3095 { 3096 struct vnode *vp; 3097 struct disklabel label; 3098 device_t dv; 3099 deviter_t di; 3100 dev_t dev; 3101 int bmajor, bminor, wedge, rf_part_found; 3102 int error; 3103 int i; 3104 RF_AutoConfig_t *ac_list; 3105 uint64_t numsecs; 3106 unsigned secsize; 3107 3108 /* initialize the AutoConfig list */ 3109 ac_list = NULL; 3110 3111 /* we begin by trolling through *all* the devices on the system */ 3112 3113 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 3114 dv = deviter_next(&di)) { 3115 3116 /* we are only interested in disks... */ 3117 if (device_class(dv) != DV_DISK) 3118 continue; 3119 3120 /* we don't care about floppies... */ 3121 if (device_is_a(dv, "fd")) { 3122 continue; 3123 } 3124 3125 /* we don't care about CD's... */ 3126 if (device_is_a(dv, "cd")) { 3127 continue; 3128 } 3129 3130 /* we don't care about md's... */ 3131 if (device_is_a(dv, "md")) { 3132 continue; 3133 } 3134 3135 /* hdfd is the Atari/Hades floppy driver */ 3136 if (device_is_a(dv, "hdfd")) { 3137 continue; 3138 } 3139 3140 /* fdisa is the Atari/Milan floppy driver */ 3141 if (device_is_a(dv, "fdisa")) { 3142 continue; 3143 } 3144 3145 /* need to find the device_name_to_block_device_major stuff */ 3146 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 3147 3148 rf_part_found = 0; /*No raid partition as yet*/ 3149 3150 /* get a vnode for the raw partition of this disk */ 3151 3152 wedge = device_is_a(dv, "dk"); 3153 bminor = minor(device_unit(dv)); 3154 dev = wedge ? makedev(bmajor, bminor) : 3155 MAKEDISKDEV(bmajor, bminor, RAW_PART); 3156 if (bdevvp(dev, &vp)) 3157 panic("RAID can't alloc vnode"); 3158 3159 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 3160 3161 if (error) { 3162 /* "Who cares." Continue looking 3163 for something that exists*/ 3164 vput(vp); 3165 continue; 3166 } 3167 3168 error = getdisksize(vp, &numsecs, &secsize); 3169 if (error) { 3170 vput(vp); 3171 continue; 3172 } 3173 if (wedge) { 3174 struct dkwedge_info dkw; 3175 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 3176 NOCRED); 3177 if (error) { 3178 printf("RAIDframe: can't get wedge info for " 3179 "dev %s (%d)\n", device_xname(dv), error); 3180 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3181 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3182 vput(vp); 3183 continue; 3184 } 3185 3186 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 3187 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3188 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3189 vput(vp); 3190 continue; 3191 } 3192 3193 ac_list = rf_get_component(ac_list, dev, vp, 3194 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3195 rf_part_found = 1; /*There is a raid component on this disk*/ 3196 continue; 3197 } 3198 3199 /* Ok, the disk exists. Go get the disklabel. */ 3200 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3201 if (error) { 3202 /* 3203 * XXX can't happen - open() would 3204 * have errored out (or faked up one) 3205 */ 3206 if (error != ENOTTY) 3207 printf("RAIDframe: can't get label for dev " 3208 "%s (%d)\n", device_xname(dv), error); 3209 } 3210 3211 /* don't need this any more. We'll allocate it again 3212 a little later if we really do... */ 3213 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3214 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3215 vput(vp); 3216 3217 if (error) 3218 continue; 3219 3220 rf_part_found = 0; /*No raid partitions yet*/ 3221 for (i = 0; i < label.d_npartitions; i++) { 3222 char cname[sizeof(ac_list->devname)]; 3223 3224 /* We only support partitions marked as RAID */ 3225 if (label.d_partitions[i].p_fstype != FS_RAID) 3226 continue; 3227 3228 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3229 if (bdevvp(dev, &vp)) 3230 panic("RAID can't alloc vnode"); 3231 3232 error = VOP_OPEN(vp, FREAD, NOCRED); 3233 if (error) { 3234 /* Whatever... */ 3235 vput(vp); 3236 continue; 3237 } 3238 snprintf(cname, sizeof(cname), "%s%c", 3239 device_xname(dv), 'a' + i); 3240 ac_list = rf_get_component(ac_list, dev, vp, cname, 3241 label.d_partitions[i].p_size, numsecs, secsize); 3242 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 3243 } 3244 3245 /* 3246 *If there is no raid component on this disk, either in a 3247 *disklabel or inside a wedge, check the raw partition as well, 3248 *as it is possible to configure raid components on raw disk 3249 *devices. 3250 */ 3251 3252 if (!rf_part_found) { 3253 char cname[sizeof(ac_list->devname)]; 3254 3255 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 3256 if (bdevvp(dev, &vp)) 3257 panic("RAID can't alloc vnode"); 3258 3259 error = VOP_OPEN(vp, FREAD, NOCRED); 3260 if (error) { 3261 /* Whatever... */ 3262 vput(vp); 3263 continue; 3264 } 3265 snprintf(cname, sizeof(cname), "%s%c", 3266 device_xname(dv), 'a' + RAW_PART); 3267 ac_list = rf_get_component(ac_list, dev, vp, cname, 3268 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 3269 } 3270 } 3271 deviter_release(&di); 3272 return ac_list; 3273 } 3274 3275 3276 int 3277 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3278 { 3279 3280 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 3281 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 3282 ((clabel->clean == RF_RAID_CLEAN) || 3283 (clabel->clean == RF_RAID_DIRTY)) && 3284 clabel->row >=0 && 3285 clabel->column >= 0 && 3286 clabel->num_rows > 0 && 3287 clabel->num_columns > 0 && 3288 clabel->row < clabel->num_rows && 3289 clabel->column < clabel->num_columns && 3290 clabel->blockSize > 0 && 3291 /* 3292 * numBlocksHi may contain garbage, but it is ok since 3293 * the type is unsigned. If it is really garbage, 3294 * rf_fix_old_label_size() will fix it. 3295 */ 3296 rf_component_label_numblocks(clabel) > 0) { 3297 /* 3298 * label looks reasonable enough... 3299 * let's make sure it has no old garbage. 3300 */ 3301 if (numsecs) 3302 rf_fix_old_label_size(clabel, numsecs); 3303 return(1); 3304 } 3305 return(0); 3306 } 3307 3308 3309 /* 3310 * For reasons yet unknown, some old component labels have garbage in 3311 * the newer numBlocksHi region, and this causes lossage. Since those 3312 * disks will also have numsecs set to less than 32 bits of sectors, 3313 * we can determine when this corruption has occurred, and fix it. 3314 * 3315 * The exact same problem, with the same unknown reason, happens to 3316 * the partitionSizeHi member as well. 3317 */ 3318 static void 3319 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3320 { 3321 3322 if (numsecs < ((uint64_t)1 << 32)) { 3323 if (clabel->numBlocksHi) { 3324 printf("WARNING: total sectors < 32 bits, yet " 3325 "numBlocksHi set\n" 3326 "WARNING: resetting numBlocksHi to zero.\n"); 3327 clabel->numBlocksHi = 0; 3328 } 3329 3330 if (clabel->partitionSizeHi) { 3331 printf("WARNING: total sectors < 32 bits, yet " 3332 "partitionSizeHi set\n" 3333 "WARNING: resetting partitionSizeHi to zero.\n"); 3334 clabel->partitionSizeHi = 0; 3335 } 3336 } 3337 } 3338 3339 3340 #ifdef DEBUG 3341 void 3342 rf_print_component_label(RF_ComponentLabel_t *clabel) 3343 { 3344 uint64_t numBlocks; 3345 static const char *rp[] = { 3346 "No", "Force", "Soft", "*invalid*" 3347 }; 3348 3349 3350 numBlocks = rf_component_label_numblocks(clabel); 3351 3352 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3353 clabel->row, clabel->column, 3354 clabel->num_rows, clabel->num_columns); 3355 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3356 clabel->version, clabel->serial_number, 3357 clabel->mod_counter); 3358 printf(" Clean: %s Status: %d\n", 3359 clabel->clean ? "Yes" : "No", clabel->status); 3360 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3361 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3362 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3363 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3364 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3365 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]); 3366 printf(" Last configured as: raid%d\n", clabel->last_unit); 3367 #if 0 3368 printf(" Config order: %d\n", clabel->config_order); 3369 #endif 3370 3371 } 3372 #endif 3373 3374 RF_ConfigSet_t * 3375 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3376 { 3377 RF_AutoConfig_t *ac; 3378 RF_ConfigSet_t *config_sets; 3379 RF_ConfigSet_t *cset; 3380 RF_AutoConfig_t *ac_next; 3381 3382 3383 config_sets = NULL; 3384 3385 /* Go through the AutoConfig list, and figure out which components 3386 belong to what sets. */ 3387 ac = ac_list; 3388 while(ac!=NULL) { 3389 /* we're going to putz with ac->next, so save it here 3390 for use at the end of the loop */ 3391 ac_next = ac->next; 3392 3393 if (config_sets == NULL) { 3394 /* will need at least this one... */ 3395 config_sets = (RF_ConfigSet_t *) 3396 malloc(sizeof(RF_ConfigSet_t), 3397 M_RAIDFRAME, M_NOWAIT); 3398 if (config_sets == NULL) { 3399 panic("rf_create_auto_sets: No memory!"); 3400 } 3401 /* this one is easy :) */ 3402 config_sets->ac = ac; 3403 config_sets->next = NULL; 3404 config_sets->rootable = 0; 3405 ac->next = NULL; 3406 } else { 3407 /* which set does this component fit into? */ 3408 cset = config_sets; 3409 while(cset!=NULL) { 3410 if (rf_does_it_fit(cset, ac)) { 3411 /* looks like it matches... */ 3412 ac->next = cset->ac; 3413 cset->ac = ac; 3414 break; 3415 } 3416 cset = cset->next; 3417 } 3418 if (cset==NULL) { 3419 /* didn't find a match above... new set..*/ 3420 cset = (RF_ConfigSet_t *) 3421 malloc(sizeof(RF_ConfigSet_t), 3422 M_RAIDFRAME, M_NOWAIT); 3423 if (cset == NULL) { 3424 panic("rf_create_auto_sets: No memory!"); 3425 } 3426 cset->ac = ac; 3427 ac->next = NULL; 3428 cset->next = config_sets; 3429 cset->rootable = 0; 3430 config_sets = cset; 3431 } 3432 } 3433 ac = ac_next; 3434 } 3435 3436 3437 return(config_sets); 3438 } 3439 3440 static int 3441 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3442 { 3443 RF_ComponentLabel_t *clabel1, *clabel2; 3444 3445 /* If this one matches the *first* one in the set, that's good 3446 enough, since the other members of the set would have been 3447 through here too... */ 3448 /* note that we are not checking partitionSize here.. 3449 3450 Note that we are also not checking the mod_counters here. 3451 If everything else matches except the mod_counter, that's 3452 good enough for this test. We will deal with the mod_counters 3453 a little later in the autoconfiguration process. 3454 3455 (clabel1->mod_counter == clabel2->mod_counter) && 3456 3457 The reason we don't check for this is that failed disks 3458 will have lower modification counts. If those disks are 3459 not added to the set they used to belong to, then they will 3460 form their own set, which may result in 2 different sets, 3461 for example, competing to be configured at raid0, and 3462 perhaps competing to be the root filesystem set. If the 3463 wrong ones get configured, or both attempt to become /, 3464 weird behaviour and or serious lossage will occur. Thus we 3465 need to bring them into the fold here, and kick them out at 3466 a later point. 3467 3468 */ 3469 3470 clabel1 = cset->ac->clabel; 3471 clabel2 = ac->clabel; 3472 if ((clabel1->version == clabel2->version) && 3473 (clabel1->serial_number == clabel2->serial_number) && 3474 (clabel1->num_rows == clabel2->num_rows) && 3475 (clabel1->num_columns == clabel2->num_columns) && 3476 (clabel1->sectPerSU == clabel2->sectPerSU) && 3477 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3478 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3479 (clabel1->parityConfig == clabel2->parityConfig) && 3480 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3481 (clabel1->blockSize == clabel2->blockSize) && 3482 rf_component_label_numblocks(clabel1) == 3483 rf_component_label_numblocks(clabel2) && 3484 (clabel1->autoconfigure == clabel2->autoconfigure) && 3485 (clabel1->root_partition == clabel2->root_partition) && 3486 (clabel1->last_unit == clabel2->last_unit) && 3487 (clabel1->config_order == clabel2->config_order)) { 3488 /* if it get's here, it almost *has* to be a match */ 3489 } else { 3490 /* it's not consistent with somebody in the set.. 3491 punt */ 3492 return(0); 3493 } 3494 /* all was fine.. it must fit... */ 3495 return(1); 3496 } 3497 3498 int 3499 rf_have_enough_components(RF_ConfigSet_t *cset) 3500 { 3501 RF_AutoConfig_t *ac; 3502 RF_AutoConfig_t *auto_config; 3503 RF_ComponentLabel_t *clabel; 3504 int c; 3505 int num_cols; 3506 int num_missing; 3507 int mod_counter; 3508 int mod_counter_found; 3509 int even_pair_failed; 3510 char parity_type; 3511 3512 3513 /* check to see that we have enough 'live' components 3514 of this set. If so, we can configure it if necessary */ 3515 3516 num_cols = cset->ac->clabel->num_columns; 3517 parity_type = cset->ac->clabel->parityConfig; 3518 3519 /* XXX Check for duplicate components!?!?!? */ 3520 3521 /* Determine what the mod_counter is supposed to be for this set. */ 3522 3523 mod_counter_found = 0; 3524 mod_counter = 0; 3525 ac = cset->ac; 3526 while(ac!=NULL) { 3527 if (mod_counter_found==0) { 3528 mod_counter = ac->clabel->mod_counter; 3529 mod_counter_found = 1; 3530 } else { 3531 if (ac->clabel->mod_counter > mod_counter) { 3532 mod_counter = ac->clabel->mod_counter; 3533 } 3534 } 3535 ac = ac->next; 3536 } 3537 3538 num_missing = 0; 3539 auto_config = cset->ac; 3540 3541 even_pair_failed = 0; 3542 for(c=0; c<num_cols; c++) { 3543 ac = auto_config; 3544 while(ac!=NULL) { 3545 if ((ac->clabel->column == c) && 3546 (ac->clabel->mod_counter == mod_counter)) { 3547 /* it's this one... */ 3548 #ifdef DEBUG 3549 printf("Found: %s at %d\n", 3550 ac->devname,c); 3551 #endif 3552 break; 3553 } 3554 ac=ac->next; 3555 } 3556 if (ac==NULL) { 3557 /* Didn't find one here! */ 3558 /* special case for RAID 1, especially 3559 where there are more than 2 3560 components (where RAIDframe treats 3561 things a little differently :( ) */ 3562 if (parity_type == '1') { 3563 if (c%2 == 0) { /* even component */ 3564 even_pair_failed = 1; 3565 } else { /* odd component. If 3566 we're failed, and 3567 so is the even 3568 component, it's 3569 "Good Night, Charlie" */ 3570 if (even_pair_failed == 1) { 3571 return(0); 3572 } 3573 } 3574 } else { 3575 /* normal accounting */ 3576 num_missing++; 3577 } 3578 } 3579 if ((parity_type == '1') && (c%2 == 1)) { 3580 /* Just did an even component, and we didn't 3581 bail.. reset the even_pair_failed flag, 3582 and go on to the next component.... */ 3583 even_pair_failed = 0; 3584 } 3585 } 3586 3587 clabel = cset->ac->clabel; 3588 3589 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3590 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3591 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3592 /* XXX this needs to be made *much* more general */ 3593 /* Too many failures */ 3594 return(0); 3595 } 3596 /* otherwise, all is well, and we've got enough to take a kick 3597 at autoconfiguring this set */ 3598 return(1); 3599 } 3600 3601 void 3602 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3603 RF_Raid_t *raidPtr) 3604 { 3605 RF_ComponentLabel_t *clabel; 3606 int i; 3607 3608 clabel = ac->clabel; 3609 3610 /* 1. Fill in the common stuff */ 3611 config->numRow = clabel->num_rows = 1; 3612 config->numCol = clabel->num_columns; 3613 config->numSpare = 0; /* XXX should this be set here? */ 3614 config->sectPerSU = clabel->sectPerSU; 3615 config->SUsPerPU = clabel->SUsPerPU; 3616 config->SUsPerRU = clabel->SUsPerRU; 3617 config->parityConfig = clabel->parityConfig; 3618 /* XXX... */ 3619 strcpy(config->diskQueueType,"fifo"); 3620 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3621 config->layoutSpecificSize = 0; /* XXX ?? */ 3622 3623 while(ac!=NULL) { 3624 /* row/col values will be in range due to the checks 3625 in reasonable_label() */ 3626 strcpy(config->devnames[0][ac->clabel->column], 3627 ac->devname); 3628 ac = ac->next; 3629 } 3630 3631 for(i=0;i<RF_MAXDBGV;i++) { 3632 config->debugVars[i][0] = 0; 3633 } 3634 } 3635 3636 int 3637 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3638 { 3639 RF_ComponentLabel_t *clabel; 3640 int column; 3641 int sparecol; 3642 3643 raidPtr->autoconfigure = new_value; 3644 3645 for(column=0; column<raidPtr->numCol; column++) { 3646 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3647 clabel = raidget_component_label(raidPtr, column); 3648 clabel->autoconfigure = new_value; 3649 raidflush_component_label(raidPtr, column); 3650 } 3651 } 3652 for(column = 0; column < raidPtr->numSpare ; column++) { 3653 sparecol = raidPtr->numCol + column; 3654 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3655 clabel = raidget_component_label(raidPtr, sparecol); 3656 clabel->autoconfigure = new_value; 3657 raidflush_component_label(raidPtr, sparecol); 3658 } 3659 } 3660 return(new_value); 3661 } 3662 3663 int 3664 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3665 { 3666 RF_ComponentLabel_t *clabel; 3667 int column; 3668 int sparecol; 3669 3670 raidPtr->root_partition = new_value; 3671 for(column=0; column<raidPtr->numCol; column++) { 3672 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3673 clabel = raidget_component_label(raidPtr, column); 3674 clabel->root_partition = new_value; 3675 raidflush_component_label(raidPtr, column); 3676 } 3677 } 3678 for(column = 0; column < raidPtr->numSpare ; column++) { 3679 sparecol = raidPtr->numCol + column; 3680 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3681 clabel = raidget_component_label(raidPtr, sparecol); 3682 clabel->root_partition = new_value; 3683 raidflush_component_label(raidPtr, sparecol); 3684 } 3685 } 3686 return(new_value); 3687 } 3688 3689 void 3690 rf_release_all_vps(RF_ConfigSet_t *cset) 3691 { 3692 RF_AutoConfig_t *ac; 3693 3694 ac = cset->ac; 3695 while(ac!=NULL) { 3696 /* Close the vp, and give it back */ 3697 if (ac->vp) { 3698 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3699 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3700 vput(ac->vp); 3701 ac->vp = NULL; 3702 } 3703 ac = ac->next; 3704 } 3705 } 3706 3707 3708 void 3709 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3710 { 3711 RF_AutoConfig_t *ac; 3712 RF_AutoConfig_t *next_ac; 3713 3714 ac = cset->ac; 3715 while(ac!=NULL) { 3716 next_ac = ac->next; 3717 /* nuke the label */ 3718 free(ac->clabel, M_RAIDFRAME); 3719 /* cleanup the config structure */ 3720 free(ac, M_RAIDFRAME); 3721 /* "next.." */ 3722 ac = next_ac; 3723 } 3724 /* and, finally, nuke the config set */ 3725 free(cset, M_RAIDFRAME); 3726 } 3727 3728 3729 void 3730 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3731 { 3732 /* current version number */ 3733 clabel->version = RF_COMPONENT_LABEL_VERSION; 3734 clabel->serial_number = raidPtr->serial_number; 3735 clabel->mod_counter = raidPtr->mod_counter; 3736 3737 clabel->num_rows = 1; 3738 clabel->num_columns = raidPtr->numCol; 3739 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3740 clabel->status = rf_ds_optimal; /* "It's good!" */ 3741 3742 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3743 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3744 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3745 3746 clabel->blockSize = raidPtr->bytesPerSector; 3747 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3748 3749 /* XXX not portable */ 3750 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3751 clabel->maxOutstanding = raidPtr->maxOutstanding; 3752 clabel->autoconfigure = raidPtr->autoconfigure; 3753 clabel->root_partition = raidPtr->root_partition; 3754 clabel->last_unit = raidPtr->raidid; 3755 clabel->config_order = raidPtr->config_order; 3756 3757 #ifndef RF_NO_PARITY_MAP 3758 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3759 #endif 3760 } 3761 3762 struct raid_softc * 3763 rf_auto_config_set(RF_ConfigSet_t *cset) 3764 { 3765 RF_Raid_t *raidPtr; 3766 RF_Config_t *config; 3767 int raidID; 3768 struct raid_softc *sc; 3769 3770 #ifdef DEBUG 3771 printf("RAID autoconfigure\n"); 3772 #endif 3773 3774 /* 1. Create a config structure */ 3775 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO); 3776 if (config == NULL) { 3777 printf("Out of mem!?!?\n"); 3778 /* XXX do something more intelligent here. */ 3779 return NULL; 3780 } 3781 3782 /* 3783 2. Figure out what RAID ID this one is supposed to live at 3784 See if we can get the same RAID dev that it was configured 3785 on last time.. 3786 */ 3787 3788 raidID = cset->ac->clabel->last_unit; 3789 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID)) 3790 continue; 3791 #ifdef DEBUG 3792 printf("Configuring raid%d:\n",raidID); 3793 #endif 3794 3795 raidPtr = &sc->sc_r; 3796 3797 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3798 raidPtr->softc = sc; 3799 raidPtr->raidid = raidID; 3800 raidPtr->openings = RAIDOUTSTANDING; 3801 3802 /* 3. Build the configuration structure */ 3803 rf_create_configuration(cset->ac, config, raidPtr); 3804 3805 /* 4. Do the configuration */ 3806 if (rf_Configure(raidPtr, config, cset->ac) == 0) { 3807 raidinit(sc); 3808 3809 rf_markalldirty(raidPtr); 3810 raidPtr->autoconfigure = 1; /* XXX do this here? */ 3811 switch (cset->ac->clabel->root_partition) { 3812 case 1: /* Force Root */ 3813 case 2: /* Soft Root: root when boot partition part of raid */ 3814 /* 3815 * everything configured just fine. Make a note 3816 * that this set is eligible to be root, 3817 * or forced to be root 3818 */ 3819 cset->rootable = cset->ac->clabel->root_partition; 3820 /* XXX do this here? */ 3821 raidPtr->root_partition = cset->rootable; 3822 break; 3823 default: 3824 break; 3825 } 3826 } else { 3827 raidput(sc); 3828 sc = NULL; 3829 } 3830 3831 /* 5. Cleanup */ 3832 free(config, M_RAIDFRAME); 3833 return sc; 3834 } 3835 3836 void 3837 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3838 { 3839 struct buf *bp; 3840 struct raid_softc *rs; 3841 3842 bp = (struct buf *)desc->bp; 3843 rs = desc->raidPtr->softc; 3844 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid), 3845 (bp->b_flags & B_READ)); 3846 } 3847 3848 void 3849 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3850 size_t xmin, size_t xmax) 3851 { 3852 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3853 pool_sethiwat(p, xmax); 3854 pool_prime(p, xmin); 3855 pool_setlowat(p, xmin); 3856 } 3857 3858 /* 3859 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see 3860 * if there is IO pending and if that IO could possibly be done for a 3861 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3862 * otherwise. 3863 * 3864 */ 3865 3866 int 3867 rf_buf_queue_check(RF_Raid_t *raidPtr) 3868 { 3869 struct raid_softc *rs = raidPtr->softc; 3870 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) { 3871 /* there is work to do */ 3872 return 0; 3873 } 3874 /* default is nothing to do */ 3875 return 1; 3876 } 3877 3878 int 3879 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3880 { 3881 uint64_t numsecs; 3882 unsigned secsize; 3883 int error; 3884 3885 error = getdisksize(vp, &numsecs, &secsize); 3886 if (error == 0) { 3887 diskPtr->blockSize = secsize; 3888 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3889 diskPtr->partitionSize = numsecs; 3890 return 0; 3891 } 3892 return error; 3893 } 3894 3895 static int 3896 raid_match(device_t self, cfdata_t cfdata, void *aux) 3897 { 3898 return 1; 3899 } 3900 3901 static void 3902 raid_attach(device_t parent, device_t self, void *aux) 3903 { 3904 3905 } 3906 3907 3908 static int 3909 raid_detach(device_t self, int flags) 3910 { 3911 int error; 3912 struct raid_softc *rs = raidget(device_unit(self)); 3913 3914 if (rs == NULL) 3915 return ENXIO; 3916 3917 if ((error = raidlock(rs)) != 0) 3918 return (error); 3919 3920 error = raid_detach_unlocked(rs); 3921 3922 raidunlock(rs); 3923 3924 /* XXXkd: raidput(rs) ??? */ 3925 3926 return error; 3927 } 3928 3929 static void 3930 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) 3931 { 3932 struct disk_geom *dg = &rs->sc_dkdev.dk_geom; 3933 3934 memset(dg, 0, sizeof(*dg)); 3935 3936 dg->dg_secperunit = raidPtr->totalSectors; 3937 dg->dg_secsize = raidPtr->bytesPerSector; 3938 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; 3939 dg->dg_ntracks = 4 * raidPtr->numCol; 3940 3941 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL); 3942 } 3943 3944 /* 3945 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3946 * We end up returning whatever error was returned by the first cache flush 3947 * that fails. 3948 */ 3949 3950 int 3951 rf_sync_component_caches(RF_Raid_t *raidPtr) 3952 { 3953 int c, sparecol; 3954 int e,error; 3955 int force = 1; 3956 3957 error = 0; 3958 for (c = 0; c < raidPtr->numCol; c++) { 3959 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3960 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3961 &force, FWRITE, NOCRED); 3962 if (e) { 3963 if (e != ENODEV) 3964 printf("raid%d: cache flush to component %s failed.\n", 3965 raidPtr->raidid, raidPtr->Disks[c].devname); 3966 if (error == 0) { 3967 error = e; 3968 } 3969 } 3970 } 3971 } 3972 3973 for( c = 0; c < raidPtr->numSpare ; c++) { 3974 sparecol = raidPtr->numCol + c; 3975 /* Need to ensure that the reconstruct actually completed! */ 3976 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3977 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3978 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3979 if (e) { 3980 if (e != ENODEV) 3981 printf("raid%d: cache flush to component %s failed.\n", 3982 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3983 if (error == 0) { 3984 error = e; 3985 } 3986 } 3987 } 3988 } 3989 return error; 3990 } 3991