1 /* $NetBSD: rf_netbsdkintf.c,v 1.310 2014/05/12 15:53:01 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.310 2014/05/12 15:53:01 christos Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_compat_netbsd.h" 108 #include "opt_raid_autoconfig.h" 109 #endif 110 111 #include <sys/param.h> 112 #include <sys/errno.h> 113 #include <sys/pool.h> 114 #include <sys/proc.h> 115 #include <sys/queue.h> 116 #include <sys/disk.h> 117 #include <sys/device.h> 118 #include <sys/stat.h> 119 #include <sys/ioctl.h> 120 #include <sys/fcntl.h> 121 #include <sys/systm.h> 122 #include <sys/vnode.h> 123 #include <sys/disklabel.h> 124 #include <sys/conf.h> 125 #include <sys/buf.h> 126 #include <sys/bufq.h> 127 #include <sys/reboot.h> 128 #include <sys/kauth.h> 129 130 #include <prop/proplib.h> 131 132 #include <dev/raidframe/raidframevar.h> 133 #include <dev/raidframe/raidframeio.h> 134 #include <dev/raidframe/rf_paritymap.h> 135 136 #include "rf_raid.h" 137 #include "rf_copyback.h" 138 #include "rf_dag.h" 139 #include "rf_dagflags.h" 140 #include "rf_desc.h" 141 #include "rf_diskqueue.h" 142 #include "rf_etimer.h" 143 #include "rf_general.h" 144 #include "rf_kintf.h" 145 #include "rf_options.h" 146 #include "rf_driver.h" 147 #include "rf_parityscan.h" 148 #include "rf_threadstuff.h" 149 150 #ifdef COMPAT_50 151 #include "rf_compat50.h" 152 #endif 153 154 #ifdef DEBUG 155 int rf_kdebug_level = 0; 156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 157 #else /* DEBUG */ 158 #define db1_printf(a) { } 159 #endif /* DEBUG */ 160 161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 162 static rf_declare_mutex2(rf_sparet_wait_mutex); 163 static rf_declare_cond2(rf_sparet_wait_cv); 164 static rf_declare_cond2(rf_sparet_resp_cv); 165 166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 167 * spare table */ 168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 169 * installation process */ 170 #endif 171 172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 173 174 /* prototypes */ 175 static void KernelWakeupFunc(struct buf *); 176 static void InitBP(struct buf *, struct vnode *, unsigned, 177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 178 void *, int, struct proc *); 179 struct raid_softc; 180 static void raidinit(struct raid_softc *); 181 182 void raidattach(int); 183 static int raid_match(device_t, cfdata_t, void *); 184 static void raid_attach(device_t, device_t, void *); 185 static int raid_detach(device_t, int); 186 187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 188 daddr_t, daddr_t); 189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 190 daddr_t, daddr_t, int); 191 192 static int raidwrite_component_label(unsigned, 193 dev_t, struct vnode *, RF_ComponentLabel_t *); 194 static int raidread_component_label(unsigned, 195 dev_t, struct vnode *, RF_ComponentLabel_t *); 196 197 198 dev_type_open(raidopen); 199 dev_type_close(raidclose); 200 dev_type_read(raidread); 201 dev_type_write(raidwrite); 202 dev_type_ioctl(raidioctl); 203 dev_type_strategy(raidstrategy); 204 dev_type_dump(raiddump); 205 dev_type_size(raidsize); 206 207 const struct bdevsw raid_bdevsw = { 208 .d_open = raidopen, 209 .d_close = raidclose, 210 .d_strategy = raidstrategy, 211 .d_ioctl = raidioctl, 212 .d_dump = raiddump, 213 .d_psize = raidsize, 214 .d_flag = D_DISK 215 }; 216 217 const struct cdevsw raid_cdevsw = { 218 .d_open = raidopen, 219 .d_close = raidclose, 220 .d_read = raidread, 221 .d_write = raidwrite, 222 .d_ioctl = raidioctl, 223 .d_stop = nostop, 224 .d_tty = notty, 225 .d_poll = nopoll, 226 .d_mmap = nommap, 227 .d_kqfilter = nokqfilter, 228 .d_flag = D_DISK 229 }; 230 231 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 232 233 struct raid_softc { 234 device_t sc_dev; 235 int sc_unit; 236 int sc_flags; /* flags */ 237 int sc_cflags; /* configuration flags */ 238 uint64_t sc_size; /* size of the raid device */ 239 char sc_xname[20]; /* XXX external name */ 240 struct disk sc_dkdev; /* generic disk device info */ 241 struct bufq_state *buf_queue; /* used for the device queue */ 242 RF_Raid_t sc_r; 243 LIST_ENTRY(raid_softc) sc_link; 244 }; 245 /* sc_flags */ 246 #define RAIDF_INITED 0x01 /* unit has been initialized */ 247 #define RAIDF_WLABEL 0x02 /* label area is writable */ 248 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 249 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */ 250 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 251 #define RAIDF_LOCKED 0x80 /* unit is locked */ 252 253 #define raidunit(x) DISKUNIT(x) 254 255 extern struct cfdriver raid_cd; 256 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 257 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 258 DVF_DETACH_SHUTDOWN); 259 260 /* 261 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 262 * Be aware that large numbers can allow the driver to consume a lot of 263 * kernel memory, especially on writes, and in degraded mode reads. 264 * 265 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 266 * a single 64K write will typically require 64K for the old data, 267 * 64K for the old parity, and 64K for the new parity, for a total 268 * of 192K (if the parity buffer is not re-used immediately). 269 * Even it if is used immediately, that's still 128K, which when multiplied 270 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 271 * 272 * Now in degraded mode, for example, a 64K read on the above setup may 273 * require data reconstruction, which will require *all* of the 4 remaining 274 * disks to participate -- 4 * 32K/disk == 128K again. 275 */ 276 277 #ifndef RAIDOUTSTANDING 278 #define RAIDOUTSTANDING 6 279 #endif 280 281 #define RAIDLABELDEV(dev) \ 282 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 283 284 /* declared here, and made public, for the benefit of KVM stuff.. */ 285 286 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 287 struct disklabel *); 288 static void raidgetdisklabel(dev_t); 289 static void raidmakedisklabel(struct raid_softc *); 290 291 static int raidlock(struct raid_softc *); 292 static void raidunlock(struct raid_softc *); 293 294 static int raid_detach_unlocked(struct raid_softc *); 295 296 static void rf_markalldirty(RF_Raid_t *); 297 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); 298 299 void rf_ReconThread(struct rf_recon_req *); 300 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 301 void rf_CopybackThread(RF_Raid_t *raidPtr); 302 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 303 int rf_autoconfig(device_t); 304 void rf_buildroothack(RF_ConfigSet_t *); 305 306 RF_AutoConfig_t *rf_find_raid_components(void); 307 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 308 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 309 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t); 310 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 311 int rf_set_autoconfig(RF_Raid_t *, int); 312 int rf_set_rootpartition(RF_Raid_t *, int); 313 void rf_release_all_vps(RF_ConfigSet_t *); 314 void rf_cleanup_config_set(RF_ConfigSet_t *); 315 int rf_have_enough_components(RF_ConfigSet_t *); 316 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); 317 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 318 319 /* 320 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 321 * Note that this is overridden by having RAID_AUTOCONFIG as an option 322 * in the kernel config file. 323 */ 324 #ifdef RAID_AUTOCONFIG 325 int raidautoconfig = 1; 326 #else 327 int raidautoconfig = 0; 328 #endif 329 static bool raidautoconfigdone = false; 330 331 struct RF_Pools_s rf_pools; 332 333 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); 334 static kmutex_t raid_lock; 335 336 static struct raid_softc * 337 raidcreate(int unit) { 338 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 339 if (sc == NULL) { 340 #ifdef DIAGNOSTIC 341 printf("%s: out of memory\n", __func__); 342 #endif 343 return NULL; 344 } 345 sc->sc_unit = unit; 346 bufq_alloc(&sc->buf_queue, "fcfs", BUFQ_SORT_RAWBLOCK); 347 return sc; 348 } 349 350 static void 351 raiddestroy(struct raid_softc *sc) { 352 bufq_free(sc->buf_queue); 353 kmem_free(sc, sizeof(*sc)); 354 } 355 356 static struct raid_softc * 357 raidget(int unit) { 358 struct raid_softc *sc; 359 if (unit < 0) { 360 #ifdef DIAGNOSTIC 361 panic("%s: unit %d!", __func__, unit); 362 #endif 363 return NULL; 364 } 365 mutex_enter(&raid_lock); 366 LIST_FOREACH(sc, &raids, sc_link) { 367 if (sc->sc_unit == unit) { 368 mutex_exit(&raid_lock); 369 return sc; 370 } 371 } 372 mutex_exit(&raid_lock); 373 if ((sc = raidcreate(unit)) == NULL) 374 return NULL; 375 mutex_enter(&raid_lock); 376 LIST_INSERT_HEAD(&raids, sc, sc_link); 377 mutex_exit(&raid_lock); 378 return sc; 379 } 380 381 static void 382 raidput(struct raid_softc *sc) { 383 mutex_enter(&raid_lock); 384 LIST_REMOVE(sc, sc_link); 385 mutex_exit(&raid_lock); 386 raiddestroy(sc); 387 } 388 389 void 390 raidattach(int num) 391 { 392 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); 393 /* This is where all the initialization stuff gets done. */ 394 395 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 396 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 397 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 398 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 399 400 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 401 #endif 402 403 if (rf_BootRaidframe() == 0) 404 aprint_verbose("Kernelized RAIDframe activated\n"); 405 else 406 panic("Serious error booting RAID!!"); 407 408 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 409 aprint_error("raidattach: config_cfattach_attach failed?\n"); 410 } 411 412 raidautoconfigdone = false; 413 414 /* 415 * Register a finalizer which will be used to auto-config RAID 416 * sets once all real hardware devices have been found. 417 */ 418 if (config_finalize_register(NULL, rf_autoconfig) != 0) 419 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 420 } 421 422 int 423 rf_autoconfig(device_t self) 424 { 425 RF_AutoConfig_t *ac_list; 426 RF_ConfigSet_t *config_sets; 427 428 if (!raidautoconfig || raidautoconfigdone == true) 429 return (0); 430 431 /* XXX This code can only be run once. */ 432 raidautoconfigdone = true; 433 434 #ifdef __HAVE_CPU_BOOTCONF 435 /* 436 * 0. find the boot device if needed first so we can use it later 437 * this needs to be done before we autoconfigure any raid sets, 438 * because if we use wedges we are not going to be able to open 439 * the boot device later 440 */ 441 if (booted_device == NULL) 442 cpu_bootconf(); 443 #endif 444 /* 1. locate all RAID components on the system */ 445 aprint_debug("Searching for RAID components...\n"); 446 ac_list = rf_find_raid_components(); 447 448 /* 2. Sort them into their respective sets. */ 449 config_sets = rf_create_auto_sets(ac_list); 450 451 /* 452 * 3. Evaluate each set and configure the valid ones. 453 * This gets done in rf_buildroothack(). 454 */ 455 rf_buildroothack(config_sets); 456 457 return 1; 458 } 459 460 static int 461 rf_containsboot(RF_Raid_t *r, device_t bdv) { 462 const char *bootname = device_xname(bdv); 463 size_t len = strlen(bootname); 464 465 for (int col = 0; col < r->numCol; col++) { 466 const char *devname = r->Disks[col].devname; 467 devname += sizeof("/dev/") - 1; 468 if (strncmp(devname, "dk", 2) == 0) { 469 const char *parent = 470 dkwedge_get_parent_name(r->Disks[col].dev); 471 if (parent != NULL) 472 devname = parent; 473 } 474 if (strncmp(devname, bootname, len) == 0) { 475 struct raid_softc *sc = r->softc; 476 aprint_debug("raid%d includes boot device %s\n", 477 sc->sc_unit, devname); 478 return 1; 479 } 480 } 481 return 0; 482 } 483 484 void 485 rf_buildroothack(RF_ConfigSet_t *config_sets) 486 { 487 RF_ConfigSet_t *cset; 488 RF_ConfigSet_t *next_cset; 489 int num_root; 490 struct raid_softc *sc, *rsc; 491 492 sc = rsc = NULL; 493 num_root = 0; 494 cset = config_sets; 495 while (cset != NULL) { 496 next_cset = cset->next; 497 if (rf_have_enough_components(cset) && 498 cset->ac->clabel->autoconfigure == 1) { 499 sc = rf_auto_config_set(cset); 500 if (sc != NULL) { 501 aprint_debug("raid%d: configured ok\n", 502 sc->sc_unit); 503 if (cset->rootable) { 504 rsc = sc; 505 num_root++; 506 } 507 } else { 508 /* The autoconfig didn't work :( */ 509 aprint_debug("Autoconfig failed\n"); 510 rf_release_all_vps(cset); 511 } 512 } else { 513 /* we're not autoconfiguring this set... 514 release the associated resources */ 515 rf_release_all_vps(cset); 516 } 517 /* cleanup */ 518 rf_cleanup_config_set(cset); 519 cset = next_cset; 520 } 521 522 /* if the user has specified what the root device should be 523 then we don't touch booted_device or boothowto... */ 524 525 if (rootspec != NULL) 526 return; 527 528 /* we found something bootable... */ 529 530 /* 531 * XXX: The following code assumes that the root raid 532 * is the first ('a') partition. This is about the best 533 * we can do with a BSD disklabel, but we might be able 534 * to do better with a GPT label, by setting a specified 535 * attribute to indicate the root partition. We can then 536 * stash the partition number in the r->root_partition 537 * high bits (the bottom 2 bits are already used). For 538 * now we just set booted_partition to 0 when we override 539 * root. 540 */ 541 if (num_root == 1) { 542 device_t candidate_root; 543 if (rsc->sc_dkdev.dk_nwedges != 0) { 544 char cname[sizeof(cset->ac->devname)]; 545 /* XXX: assume 'a' */ 546 snprintf(cname, sizeof(cname), "%s%c", 547 device_xname(rsc->sc_dev), 'a'); 548 candidate_root = dkwedge_find_by_wname(cname); 549 } else 550 candidate_root = rsc->sc_dev; 551 if (booted_device == NULL || 552 rsc->sc_r.root_partition == 1 || 553 rf_containsboot(&rsc->sc_r, booted_device)) { 554 booted_device = candidate_root; 555 booted_partition = 0; /* XXX assume 'a' */ 556 } 557 } else if (num_root > 1) { 558 559 /* 560 * Maybe the MD code can help. If it cannot, then 561 * setroot() will discover that we have no 562 * booted_device and will ask the user if nothing was 563 * hardwired in the kernel config file 564 */ 565 if (booted_device == NULL) 566 return; 567 568 num_root = 0; 569 mutex_enter(&raid_lock); 570 LIST_FOREACH(sc, &raids, sc_link) { 571 RF_Raid_t *r = &sc->sc_r; 572 if (r->valid == 0) 573 continue; 574 575 if (r->root_partition == 0) 576 continue; 577 578 if (rf_containsboot(r, booted_device)) { 579 num_root++; 580 rsc = sc; 581 } 582 } 583 mutex_exit(&raid_lock); 584 585 if (num_root == 1) { 586 booted_device = rsc->sc_dev; 587 booted_partition = 0; /* XXX assume 'a' */ 588 } else { 589 /* we can't guess.. require the user to answer... */ 590 boothowto |= RB_ASKNAME; 591 } 592 } 593 } 594 595 596 int 597 raidsize(dev_t dev) 598 { 599 struct raid_softc *rs; 600 struct disklabel *lp; 601 int part, unit, omask, size; 602 603 unit = raidunit(dev); 604 if ((rs = raidget(unit)) == NULL) 605 return -1; 606 if ((rs->sc_flags & RAIDF_INITED) == 0) 607 return (-1); 608 609 part = DISKPART(dev); 610 omask = rs->sc_dkdev.dk_openmask & (1 << part); 611 lp = rs->sc_dkdev.dk_label; 612 613 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 614 return (-1); 615 616 if (lp->d_partitions[part].p_fstype != FS_SWAP) 617 size = -1; 618 else 619 size = lp->d_partitions[part].p_size * 620 (lp->d_secsize / DEV_BSIZE); 621 622 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 623 return (-1); 624 625 return (size); 626 627 } 628 629 int 630 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 631 { 632 int unit = raidunit(dev); 633 struct raid_softc *rs; 634 const struct bdevsw *bdev; 635 struct disklabel *lp; 636 RF_Raid_t *raidPtr; 637 daddr_t offset; 638 int part, c, sparecol, j, scol, dumpto; 639 int error = 0; 640 641 if ((rs = raidget(unit)) == NULL) 642 return ENXIO; 643 644 raidPtr = &rs->sc_r; 645 646 if ((rs->sc_flags & RAIDF_INITED) == 0) 647 return ENXIO; 648 649 /* we only support dumping to RAID 1 sets */ 650 if (raidPtr->Layout.numDataCol != 1 || 651 raidPtr->Layout.numParityCol != 1) 652 return EINVAL; 653 654 655 if ((error = raidlock(rs)) != 0) 656 return error; 657 658 if (size % DEV_BSIZE != 0) { 659 error = EINVAL; 660 goto out; 661 } 662 663 if (blkno + size / DEV_BSIZE > rs->sc_size) { 664 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 665 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 666 size / DEV_BSIZE, rs->sc_size); 667 error = EINVAL; 668 goto out; 669 } 670 671 part = DISKPART(dev); 672 lp = rs->sc_dkdev.dk_label; 673 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 674 675 /* figure out what device is alive.. */ 676 677 /* 678 Look for a component to dump to. The preference for the 679 component to dump to is as follows: 680 1) the master 681 2) a used_spare of the master 682 3) the slave 683 4) a used_spare of the slave 684 */ 685 686 dumpto = -1; 687 for (c = 0; c < raidPtr->numCol; c++) { 688 if (raidPtr->Disks[c].status == rf_ds_optimal) { 689 /* this might be the one */ 690 dumpto = c; 691 break; 692 } 693 } 694 695 /* 696 At this point we have possibly selected a live master or a 697 live slave. We now check to see if there is a spared 698 master (or a spared slave), if we didn't find a live master 699 or a live slave. 700 */ 701 702 for (c = 0; c < raidPtr->numSpare; c++) { 703 sparecol = raidPtr->numCol + c; 704 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 705 /* How about this one? */ 706 scol = -1; 707 for(j=0;j<raidPtr->numCol;j++) { 708 if (raidPtr->Disks[j].spareCol == sparecol) { 709 scol = j; 710 break; 711 } 712 } 713 if (scol == 0) { 714 /* 715 We must have found a spared master! 716 We'll take that over anything else 717 found so far. (We couldn't have 718 found a real master before, since 719 this is a used spare, and it's 720 saying that it's replacing the 721 master.) On reboot (with 722 autoconfiguration turned on) 723 sparecol will become the 1st 724 component (component0) of this set. 725 */ 726 dumpto = sparecol; 727 break; 728 } else if (scol != -1) { 729 /* 730 Must be a spared slave. We'll dump 731 to that if we havn't found anything 732 else so far. 733 */ 734 if (dumpto == -1) 735 dumpto = sparecol; 736 } 737 } 738 } 739 740 if (dumpto == -1) { 741 /* we couldn't find any live components to dump to!?!? 742 */ 743 error = EINVAL; 744 goto out; 745 } 746 747 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 748 749 /* 750 Note that blkno is relative to this particular partition. 751 By adding the offset of this partition in the RAID 752 set, and also adding RF_PROTECTED_SECTORS, we get a 753 value that is relative to the partition used for the 754 underlying component. 755 */ 756 757 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 758 blkno + offset, va, size); 759 760 out: 761 raidunlock(rs); 762 763 return error; 764 } 765 /* ARGSUSED */ 766 int 767 raidopen(dev_t dev, int flags, int fmt, 768 struct lwp *l) 769 { 770 int unit = raidunit(dev); 771 struct raid_softc *rs; 772 struct disklabel *lp; 773 int part, pmask; 774 int error = 0; 775 776 if ((rs = raidget(unit)) == NULL) 777 return ENXIO; 778 if ((error = raidlock(rs)) != 0) 779 return (error); 780 781 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 782 error = EBUSY; 783 goto bad; 784 } 785 786 lp = rs->sc_dkdev.dk_label; 787 788 part = DISKPART(dev); 789 790 /* 791 * If there are wedges, and this is not RAW_PART, then we 792 * need to fail. 793 */ 794 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 795 error = EBUSY; 796 goto bad; 797 } 798 pmask = (1 << part); 799 800 if ((rs->sc_flags & RAIDF_INITED) && 801 (rs->sc_dkdev.dk_openmask == 0)) 802 raidgetdisklabel(dev); 803 804 /* make sure that this partition exists */ 805 806 if (part != RAW_PART) { 807 if (((rs->sc_flags & RAIDF_INITED) == 0) || 808 ((part >= lp->d_npartitions) || 809 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 810 error = ENXIO; 811 goto bad; 812 } 813 } 814 /* Prevent this unit from being unconfigured while open. */ 815 switch (fmt) { 816 case S_IFCHR: 817 rs->sc_dkdev.dk_copenmask |= pmask; 818 break; 819 820 case S_IFBLK: 821 rs->sc_dkdev.dk_bopenmask |= pmask; 822 break; 823 } 824 825 if ((rs->sc_dkdev.dk_openmask == 0) && 826 ((rs->sc_flags & RAIDF_INITED) != 0)) { 827 /* First one... mark things as dirty... Note that we *MUST* 828 have done a configure before this. I DO NOT WANT TO BE 829 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 830 THAT THEY BELONG TOGETHER!!!!! */ 831 /* XXX should check to see if we're only open for reading 832 here... If so, we needn't do this, but then need some 833 other way of keeping track of what's happened.. */ 834 835 rf_markalldirty(&rs->sc_r); 836 } 837 838 839 rs->sc_dkdev.dk_openmask = 840 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 841 842 bad: 843 raidunlock(rs); 844 845 return (error); 846 847 848 } 849 /* ARGSUSED */ 850 int 851 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 852 { 853 int unit = raidunit(dev); 854 struct raid_softc *rs; 855 int error = 0; 856 int part; 857 858 if ((rs = raidget(unit)) == NULL) 859 return ENXIO; 860 861 if ((error = raidlock(rs)) != 0) 862 return (error); 863 864 part = DISKPART(dev); 865 866 /* ...that much closer to allowing unconfiguration... */ 867 switch (fmt) { 868 case S_IFCHR: 869 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 870 break; 871 872 case S_IFBLK: 873 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 874 break; 875 } 876 rs->sc_dkdev.dk_openmask = 877 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 878 879 if ((rs->sc_dkdev.dk_openmask == 0) && 880 ((rs->sc_flags & RAIDF_INITED) != 0)) { 881 /* Last one... device is not unconfigured yet. 882 Device shutdown has taken care of setting the 883 clean bits if RAIDF_INITED is not set 884 mark things as clean... */ 885 886 rf_update_component_labels(&rs->sc_r, 887 RF_FINAL_COMPONENT_UPDATE); 888 889 /* If the kernel is shutting down, it will detach 890 * this RAID set soon enough. 891 */ 892 } 893 894 raidunlock(rs); 895 return (0); 896 897 } 898 899 void 900 raidstrategy(struct buf *bp) 901 { 902 unsigned int unit = raidunit(bp->b_dev); 903 RF_Raid_t *raidPtr; 904 int wlabel; 905 struct raid_softc *rs; 906 907 if ((rs = raidget(unit)) == NULL) { 908 bp->b_error = ENXIO; 909 goto done; 910 } 911 if ((rs->sc_flags & RAIDF_INITED) == 0) { 912 bp->b_error = ENXIO; 913 goto done; 914 } 915 raidPtr = &rs->sc_r; 916 if (!raidPtr->valid) { 917 bp->b_error = ENODEV; 918 goto done; 919 } 920 if (bp->b_bcount == 0) { 921 db1_printf(("b_bcount is zero..\n")); 922 goto done; 923 } 924 925 /* 926 * Do bounds checking and adjust transfer. If there's an 927 * error, the bounds check will flag that for us. 928 */ 929 930 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 931 if (DISKPART(bp->b_dev) == RAW_PART) { 932 uint64_t size; /* device size in DEV_BSIZE unit */ 933 934 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 935 size = raidPtr->totalSectors << 936 (raidPtr->logBytesPerSector - DEV_BSHIFT); 937 } else { 938 size = raidPtr->totalSectors >> 939 (DEV_BSHIFT - raidPtr->logBytesPerSector); 940 } 941 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 942 goto done; 943 } 944 } else { 945 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 946 db1_printf(("Bounds check failed!!:%d %d\n", 947 (int) bp->b_blkno, (int) wlabel)); 948 goto done; 949 } 950 } 951 952 rf_lock_mutex2(raidPtr->iodone_lock); 953 954 bp->b_resid = 0; 955 956 /* stuff it onto our queue */ 957 bufq_put(rs->buf_queue, bp); 958 959 /* scheduled the IO to happen at the next convenient time */ 960 rf_signal_cond2(raidPtr->iodone_cv); 961 rf_unlock_mutex2(raidPtr->iodone_lock); 962 963 return; 964 965 done: 966 bp->b_resid = bp->b_bcount; 967 biodone(bp); 968 } 969 /* ARGSUSED */ 970 int 971 raidread(dev_t dev, struct uio *uio, int flags) 972 { 973 int unit = raidunit(dev); 974 struct raid_softc *rs; 975 976 if ((rs = raidget(unit)) == NULL) 977 return ENXIO; 978 979 if ((rs->sc_flags & RAIDF_INITED) == 0) 980 return (ENXIO); 981 982 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 983 984 } 985 /* ARGSUSED */ 986 int 987 raidwrite(dev_t dev, struct uio *uio, int flags) 988 { 989 int unit = raidunit(dev); 990 struct raid_softc *rs; 991 992 if ((rs = raidget(unit)) == NULL) 993 return ENXIO; 994 995 if ((rs->sc_flags & RAIDF_INITED) == 0) 996 return (ENXIO); 997 998 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 999 1000 } 1001 1002 static int 1003 raid_detach_unlocked(struct raid_softc *rs) 1004 { 1005 int error; 1006 RF_Raid_t *raidPtr; 1007 1008 raidPtr = &rs->sc_r; 1009 1010 /* 1011 * If somebody has a partition mounted, we shouldn't 1012 * shutdown. 1013 */ 1014 if (rs->sc_dkdev.dk_openmask != 0) 1015 return EBUSY; 1016 1017 if ((rs->sc_flags & RAIDF_INITED) == 0) 1018 ; /* not initialized: nothing to do */ 1019 else if ((error = rf_Shutdown(raidPtr)) != 0) 1020 return error; 1021 else 1022 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN); 1023 1024 /* Detach the disk. */ 1025 dkwedge_delall(&rs->sc_dkdev); 1026 disk_detach(&rs->sc_dkdev); 1027 disk_destroy(&rs->sc_dkdev); 1028 1029 aprint_normal_dev(rs->sc_dev, "detached\n"); 1030 1031 return 0; 1032 } 1033 1034 int 1035 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1036 { 1037 int unit = raidunit(dev); 1038 int error = 0; 1039 int part, pmask, s; 1040 cfdata_t cf; 1041 struct raid_softc *rs; 1042 RF_Config_t *k_cfg, *u_cfg; 1043 RF_Raid_t *raidPtr; 1044 RF_RaidDisk_t *diskPtr; 1045 RF_AccTotals_t *totals; 1046 RF_DeviceConfig_t *d_cfg, **ucfgp; 1047 u_char *specific_buf; 1048 int retcode = 0; 1049 int column; 1050 /* int raidid; */ 1051 struct rf_recon_req *rrcopy, *rr; 1052 RF_ComponentLabel_t *clabel; 1053 RF_ComponentLabel_t *ci_label; 1054 RF_ComponentLabel_t **clabel_ptr; 1055 RF_SingleComponent_t *sparePtr,*componentPtr; 1056 RF_SingleComponent_t component; 1057 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 1058 int i, j, d; 1059 #ifdef __HAVE_OLD_DISKLABEL 1060 struct disklabel newlabel; 1061 #endif 1062 struct dkwedge_info *dkw; 1063 1064 if ((rs = raidget(unit)) == NULL) 1065 return ENXIO; 1066 raidPtr = &rs->sc_r; 1067 1068 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1069 (int) DISKPART(dev), (int) unit, cmd)); 1070 1071 /* Must be open for writes for these commands... */ 1072 switch (cmd) { 1073 #ifdef DIOCGSECTORSIZE 1074 case DIOCGSECTORSIZE: 1075 *(u_int *)data = raidPtr->bytesPerSector; 1076 return 0; 1077 case DIOCGMEDIASIZE: 1078 *(off_t *)data = 1079 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1080 return 0; 1081 #endif 1082 case DIOCSDINFO: 1083 case DIOCWDINFO: 1084 #ifdef __HAVE_OLD_DISKLABEL 1085 case ODIOCWDINFO: 1086 case ODIOCSDINFO: 1087 #endif 1088 case DIOCWLABEL: 1089 case DIOCAWEDGE: 1090 case DIOCDWEDGE: 1091 case DIOCSSTRATEGY: 1092 if ((flag & FWRITE) == 0) 1093 return (EBADF); 1094 } 1095 1096 /* Must be initialized for these... */ 1097 switch (cmd) { 1098 case DIOCGDINFO: 1099 case DIOCSDINFO: 1100 case DIOCWDINFO: 1101 #ifdef __HAVE_OLD_DISKLABEL 1102 case ODIOCGDINFO: 1103 case ODIOCWDINFO: 1104 case ODIOCSDINFO: 1105 case ODIOCGDEFLABEL: 1106 #endif 1107 case DIOCGPART: 1108 case DIOCWLABEL: 1109 case DIOCGDEFLABEL: 1110 case DIOCAWEDGE: 1111 case DIOCDWEDGE: 1112 case DIOCLWEDGES: 1113 case DIOCCACHESYNC: 1114 case RAIDFRAME_SHUTDOWN: 1115 case RAIDFRAME_REWRITEPARITY: 1116 case RAIDFRAME_GET_INFO: 1117 case RAIDFRAME_RESET_ACCTOTALS: 1118 case RAIDFRAME_GET_ACCTOTALS: 1119 case RAIDFRAME_KEEP_ACCTOTALS: 1120 case RAIDFRAME_GET_SIZE: 1121 case RAIDFRAME_FAIL_DISK: 1122 case RAIDFRAME_COPYBACK: 1123 case RAIDFRAME_CHECK_RECON_STATUS: 1124 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1125 case RAIDFRAME_GET_COMPONENT_LABEL: 1126 case RAIDFRAME_SET_COMPONENT_LABEL: 1127 case RAIDFRAME_ADD_HOT_SPARE: 1128 case RAIDFRAME_REMOVE_HOT_SPARE: 1129 case RAIDFRAME_INIT_LABELS: 1130 case RAIDFRAME_REBUILD_IN_PLACE: 1131 case RAIDFRAME_CHECK_PARITY: 1132 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1133 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1134 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1135 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1136 case RAIDFRAME_SET_AUTOCONFIG: 1137 case RAIDFRAME_SET_ROOT: 1138 case RAIDFRAME_DELETE_COMPONENT: 1139 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1140 case RAIDFRAME_PARITYMAP_STATUS: 1141 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1142 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1143 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1144 case DIOCGSTRATEGY: 1145 case DIOCSSTRATEGY: 1146 if ((rs->sc_flags & RAIDF_INITED) == 0) 1147 return (ENXIO); 1148 } 1149 1150 switch (cmd) { 1151 #ifdef COMPAT_50 1152 case RAIDFRAME_GET_INFO50: 1153 return rf_get_info50(raidPtr, data); 1154 1155 case RAIDFRAME_CONFIGURE50: 1156 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1157 return retcode; 1158 goto config; 1159 #endif 1160 /* configure the system */ 1161 case RAIDFRAME_CONFIGURE: 1162 1163 if (raidPtr->valid) { 1164 /* There is a valid RAID set running on this unit! */ 1165 printf("raid%d: Device already configured!\n",unit); 1166 return(EINVAL); 1167 } 1168 1169 /* copy-in the configuration information */ 1170 /* data points to a pointer to the configuration structure */ 1171 1172 u_cfg = *((RF_Config_t **) data); 1173 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1174 if (k_cfg == NULL) { 1175 return (ENOMEM); 1176 } 1177 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1178 if (retcode) { 1179 RF_Free(k_cfg, sizeof(RF_Config_t)); 1180 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1181 retcode)); 1182 return (retcode); 1183 } 1184 goto config; 1185 config: 1186 /* allocate a buffer for the layout-specific data, and copy it 1187 * in */ 1188 if (k_cfg->layoutSpecificSize) { 1189 if (k_cfg->layoutSpecificSize > 10000) { 1190 /* sanity check */ 1191 RF_Free(k_cfg, sizeof(RF_Config_t)); 1192 return (EINVAL); 1193 } 1194 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1195 (u_char *)); 1196 if (specific_buf == NULL) { 1197 RF_Free(k_cfg, sizeof(RF_Config_t)); 1198 return (ENOMEM); 1199 } 1200 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1201 k_cfg->layoutSpecificSize); 1202 if (retcode) { 1203 RF_Free(k_cfg, sizeof(RF_Config_t)); 1204 RF_Free(specific_buf, 1205 k_cfg->layoutSpecificSize); 1206 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1207 retcode)); 1208 return (retcode); 1209 } 1210 } else 1211 specific_buf = NULL; 1212 k_cfg->layoutSpecific = specific_buf; 1213 1214 /* should do some kind of sanity check on the configuration. 1215 * Store the sum of all the bytes in the last byte? */ 1216 1217 /* configure the system */ 1218 1219 /* 1220 * Clear the entire RAID descriptor, just to make sure 1221 * there is no stale data left in the case of a 1222 * reconfiguration 1223 */ 1224 memset(raidPtr, 0, sizeof(*raidPtr)); 1225 raidPtr->softc = rs; 1226 raidPtr->raidid = unit; 1227 1228 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1229 1230 if (retcode == 0) { 1231 1232 /* allow this many simultaneous IO's to 1233 this RAID device */ 1234 raidPtr->openings = RAIDOUTSTANDING; 1235 1236 raidinit(rs); 1237 rf_markalldirty(raidPtr); 1238 } 1239 /* free the buffers. No return code here. */ 1240 if (k_cfg->layoutSpecificSize) { 1241 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1242 } 1243 RF_Free(k_cfg, sizeof(RF_Config_t)); 1244 1245 return (retcode); 1246 1247 /* shutdown the system */ 1248 case RAIDFRAME_SHUTDOWN: 1249 1250 part = DISKPART(dev); 1251 pmask = (1 << part); 1252 1253 if ((error = raidlock(rs)) != 0) 1254 return (error); 1255 1256 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1257 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1258 (rs->sc_dkdev.dk_copenmask & pmask))) 1259 retcode = EBUSY; 1260 else { 1261 rs->sc_flags |= RAIDF_SHUTDOWN; 1262 rs->sc_dkdev.dk_copenmask &= ~pmask; 1263 rs->sc_dkdev.dk_bopenmask &= ~pmask; 1264 rs->sc_dkdev.dk_openmask &= ~pmask; 1265 retcode = 0; 1266 } 1267 1268 raidunlock(rs); 1269 1270 if (retcode != 0) 1271 return retcode; 1272 1273 /* free the pseudo device attach bits */ 1274 1275 cf = device_cfdata(rs->sc_dev); 1276 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0) 1277 free(cf, M_RAIDFRAME); 1278 1279 return (retcode); 1280 case RAIDFRAME_GET_COMPONENT_LABEL: 1281 clabel_ptr = (RF_ComponentLabel_t **) data; 1282 /* need to read the component label for the disk indicated 1283 by row,column in clabel */ 1284 1285 /* 1286 * Perhaps there should be an option to skip the in-core 1287 * copy and hit the disk, as with disklabel(8). 1288 */ 1289 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *)); 1290 1291 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel)); 1292 1293 if (retcode) { 1294 RF_Free(clabel, sizeof(*clabel)); 1295 return retcode; 1296 } 1297 1298 clabel->row = 0; /* Don't allow looking at anything else.*/ 1299 1300 column = clabel->column; 1301 1302 if ((column < 0) || (column >= raidPtr->numCol + 1303 raidPtr->numSpare)) { 1304 RF_Free(clabel, sizeof(*clabel)); 1305 return EINVAL; 1306 } 1307 1308 RF_Free(clabel, sizeof(*clabel)); 1309 1310 clabel = raidget_component_label(raidPtr, column); 1311 1312 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr)); 1313 1314 #if 0 1315 case RAIDFRAME_SET_COMPONENT_LABEL: 1316 clabel = (RF_ComponentLabel_t *) data; 1317 1318 /* XXX check the label for valid stuff... */ 1319 /* Note that some things *should not* get modified -- 1320 the user should be re-initing the labels instead of 1321 trying to patch things. 1322 */ 1323 1324 raidid = raidPtr->raidid; 1325 #ifdef DEBUG 1326 printf("raid%d: Got component label:\n", raidid); 1327 printf("raid%d: Version: %d\n", raidid, clabel->version); 1328 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1329 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1330 printf("raid%d: Column: %d\n", raidid, clabel->column); 1331 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1332 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1333 printf("raid%d: Status: %d\n", raidid, clabel->status); 1334 #endif 1335 clabel->row = 0; 1336 column = clabel->column; 1337 1338 if ((column < 0) || (column >= raidPtr->numCol)) { 1339 return(EINVAL); 1340 } 1341 1342 /* XXX this isn't allowed to do anything for now :-) */ 1343 1344 /* XXX and before it is, we need to fill in the rest 1345 of the fields!?!?!?! */ 1346 memcpy(raidget_component_label(raidPtr, column), 1347 clabel, sizeof(*clabel)); 1348 raidflush_component_label(raidPtr, column); 1349 return (0); 1350 #endif 1351 1352 case RAIDFRAME_INIT_LABELS: 1353 clabel = (RF_ComponentLabel_t *) data; 1354 /* 1355 we only want the serial number from 1356 the above. We get all the rest of the information 1357 from the config that was used to create this RAID 1358 set. 1359 */ 1360 1361 raidPtr->serial_number = clabel->serial_number; 1362 1363 for(column=0;column<raidPtr->numCol;column++) { 1364 diskPtr = &raidPtr->Disks[column]; 1365 if (!RF_DEAD_DISK(diskPtr->status)) { 1366 ci_label = raidget_component_label(raidPtr, 1367 column); 1368 /* Zeroing this is important. */ 1369 memset(ci_label, 0, sizeof(*ci_label)); 1370 raid_init_component_label(raidPtr, ci_label); 1371 ci_label->serial_number = 1372 raidPtr->serial_number; 1373 ci_label->row = 0; /* we dont' pretend to support more */ 1374 rf_component_label_set_partitionsize(ci_label, 1375 diskPtr->partitionSize); 1376 ci_label->column = column; 1377 raidflush_component_label(raidPtr, column); 1378 } 1379 /* XXXjld what about the spares? */ 1380 } 1381 1382 return (retcode); 1383 case RAIDFRAME_SET_AUTOCONFIG: 1384 d = rf_set_autoconfig(raidPtr, *(int *) data); 1385 printf("raid%d: New autoconfig value is: %d\n", 1386 raidPtr->raidid, d); 1387 *(int *) data = d; 1388 return (retcode); 1389 1390 case RAIDFRAME_SET_ROOT: 1391 d = rf_set_rootpartition(raidPtr, *(int *) data); 1392 printf("raid%d: New rootpartition value is: %d\n", 1393 raidPtr->raidid, d); 1394 *(int *) data = d; 1395 return (retcode); 1396 1397 /* initialize all parity */ 1398 case RAIDFRAME_REWRITEPARITY: 1399 1400 if (raidPtr->Layout.map->faultsTolerated == 0) { 1401 /* Parity for RAID 0 is trivially correct */ 1402 raidPtr->parity_good = RF_RAID_CLEAN; 1403 return(0); 1404 } 1405 1406 if (raidPtr->parity_rewrite_in_progress == 1) { 1407 /* Re-write is already in progress! */ 1408 return(EINVAL); 1409 } 1410 1411 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1412 rf_RewriteParityThread, 1413 raidPtr,"raid_parity"); 1414 return (retcode); 1415 1416 1417 case RAIDFRAME_ADD_HOT_SPARE: 1418 sparePtr = (RF_SingleComponent_t *) data; 1419 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1420 retcode = rf_add_hot_spare(raidPtr, &component); 1421 return(retcode); 1422 1423 case RAIDFRAME_REMOVE_HOT_SPARE: 1424 return(retcode); 1425 1426 case RAIDFRAME_DELETE_COMPONENT: 1427 componentPtr = (RF_SingleComponent_t *)data; 1428 memcpy( &component, componentPtr, 1429 sizeof(RF_SingleComponent_t)); 1430 retcode = rf_delete_component(raidPtr, &component); 1431 return(retcode); 1432 1433 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1434 componentPtr = (RF_SingleComponent_t *)data; 1435 memcpy( &component, componentPtr, 1436 sizeof(RF_SingleComponent_t)); 1437 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1438 return(retcode); 1439 1440 case RAIDFRAME_REBUILD_IN_PLACE: 1441 1442 if (raidPtr->Layout.map->faultsTolerated == 0) { 1443 /* Can't do this on a RAID 0!! */ 1444 return(EINVAL); 1445 } 1446 1447 if (raidPtr->recon_in_progress == 1) { 1448 /* a reconstruct is already in progress! */ 1449 return(EINVAL); 1450 } 1451 1452 componentPtr = (RF_SingleComponent_t *) data; 1453 memcpy( &component, componentPtr, 1454 sizeof(RF_SingleComponent_t)); 1455 component.row = 0; /* we don't support any more */ 1456 column = component.column; 1457 1458 if ((column < 0) || (column >= raidPtr->numCol)) { 1459 return(EINVAL); 1460 } 1461 1462 rf_lock_mutex2(raidPtr->mutex); 1463 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1464 (raidPtr->numFailures > 0)) { 1465 /* XXX 0 above shouldn't be constant!!! */ 1466 /* some component other than this has failed. 1467 Let's not make things worse than they already 1468 are... */ 1469 printf("raid%d: Unable to reconstruct to disk at:\n", 1470 raidPtr->raidid); 1471 printf("raid%d: Col: %d Too many failures.\n", 1472 raidPtr->raidid, column); 1473 rf_unlock_mutex2(raidPtr->mutex); 1474 return (EINVAL); 1475 } 1476 if (raidPtr->Disks[column].status == 1477 rf_ds_reconstructing) { 1478 printf("raid%d: Unable to reconstruct to disk at:\n", 1479 raidPtr->raidid); 1480 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column); 1481 1482 rf_unlock_mutex2(raidPtr->mutex); 1483 return (EINVAL); 1484 } 1485 if (raidPtr->Disks[column].status == rf_ds_spared) { 1486 rf_unlock_mutex2(raidPtr->mutex); 1487 return (EINVAL); 1488 } 1489 rf_unlock_mutex2(raidPtr->mutex); 1490 1491 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1492 if (rrcopy == NULL) 1493 return(ENOMEM); 1494 1495 rrcopy->raidPtr = (void *) raidPtr; 1496 rrcopy->col = column; 1497 1498 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1499 rf_ReconstructInPlaceThread, 1500 rrcopy,"raid_reconip"); 1501 return(retcode); 1502 1503 case RAIDFRAME_GET_INFO: 1504 if (!raidPtr->valid) 1505 return (ENODEV); 1506 ucfgp = (RF_DeviceConfig_t **) data; 1507 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1508 (RF_DeviceConfig_t *)); 1509 if (d_cfg == NULL) 1510 return (ENOMEM); 1511 d_cfg->rows = 1; /* there is only 1 row now */ 1512 d_cfg->cols = raidPtr->numCol; 1513 d_cfg->ndevs = raidPtr->numCol; 1514 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1515 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1516 return (ENOMEM); 1517 } 1518 d_cfg->nspares = raidPtr->numSpare; 1519 if (d_cfg->nspares >= RF_MAX_DISKS) { 1520 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1521 return (ENOMEM); 1522 } 1523 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1524 d = 0; 1525 for (j = 0; j < d_cfg->cols; j++) { 1526 d_cfg->devs[d] = raidPtr->Disks[j]; 1527 d++; 1528 } 1529 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1530 d_cfg->spares[i] = raidPtr->Disks[j]; 1531 } 1532 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1533 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1534 1535 return (retcode); 1536 1537 case RAIDFRAME_CHECK_PARITY: 1538 *(int *) data = raidPtr->parity_good; 1539 return (0); 1540 1541 case RAIDFRAME_PARITYMAP_STATUS: 1542 if (rf_paritymap_ineligible(raidPtr)) 1543 return EINVAL; 1544 rf_paritymap_status(raidPtr->parity_map, 1545 (struct rf_pmstat *)data); 1546 return 0; 1547 1548 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1549 if (rf_paritymap_ineligible(raidPtr)) 1550 return EINVAL; 1551 if (raidPtr->parity_map == NULL) 1552 return ENOENT; /* ??? */ 1553 if (0 != rf_paritymap_set_params(raidPtr->parity_map, 1554 (struct rf_pmparams *)data, 1)) 1555 return EINVAL; 1556 return 0; 1557 1558 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1559 if (rf_paritymap_ineligible(raidPtr)) 1560 return EINVAL; 1561 *(int *) data = rf_paritymap_get_disable(raidPtr); 1562 return 0; 1563 1564 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1565 if (rf_paritymap_ineligible(raidPtr)) 1566 return EINVAL; 1567 rf_paritymap_set_disable(raidPtr, *(int *)data); 1568 /* XXX should errors be passed up? */ 1569 return 0; 1570 1571 case RAIDFRAME_RESET_ACCTOTALS: 1572 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1573 return (0); 1574 1575 case RAIDFRAME_GET_ACCTOTALS: 1576 totals = (RF_AccTotals_t *) data; 1577 *totals = raidPtr->acc_totals; 1578 return (0); 1579 1580 case RAIDFRAME_KEEP_ACCTOTALS: 1581 raidPtr->keep_acc_totals = *(int *)data; 1582 return (0); 1583 1584 case RAIDFRAME_GET_SIZE: 1585 *(int *) data = raidPtr->totalSectors; 1586 return (0); 1587 1588 /* fail a disk & optionally start reconstruction */ 1589 case RAIDFRAME_FAIL_DISK: 1590 1591 if (raidPtr->Layout.map->faultsTolerated == 0) { 1592 /* Can't do this on a RAID 0!! */ 1593 return(EINVAL); 1594 } 1595 1596 rr = (struct rf_recon_req *) data; 1597 rr->row = 0; 1598 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1599 return (EINVAL); 1600 1601 1602 rf_lock_mutex2(raidPtr->mutex); 1603 if (raidPtr->status == rf_rs_reconstructing) { 1604 /* you can't fail a disk while we're reconstructing! */ 1605 /* XXX wrong for RAID6 */ 1606 rf_unlock_mutex2(raidPtr->mutex); 1607 return (EINVAL); 1608 } 1609 if ((raidPtr->Disks[rr->col].status == 1610 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1611 /* some other component has failed. Let's not make 1612 things worse. XXX wrong for RAID6 */ 1613 rf_unlock_mutex2(raidPtr->mutex); 1614 return (EINVAL); 1615 } 1616 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1617 /* Can't fail a spared disk! */ 1618 rf_unlock_mutex2(raidPtr->mutex); 1619 return (EINVAL); 1620 } 1621 rf_unlock_mutex2(raidPtr->mutex); 1622 1623 /* make a copy of the recon request so that we don't rely on 1624 * the user's buffer */ 1625 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1626 if (rrcopy == NULL) 1627 return(ENOMEM); 1628 memcpy(rrcopy, rr, sizeof(*rr)); 1629 rrcopy->raidPtr = (void *) raidPtr; 1630 1631 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1632 rf_ReconThread, 1633 rrcopy,"raid_recon"); 1634 return (0); 1635 1636 /* invoke a copyback operation after recon on whatever disk 1637 * needs it, if any */ 1638 case RAIDFRAME_COPYBACK: 1639 1640 if (raidPtr->Layout.map->faultsTolerated == 0) { 1641 /* This makes no sense on a RAID 0!! */ 1642 return(EINVAL); 1643 } 1644 1645 if (raidPtr->copyback_in_progress == 1) { 1646 /* Copyback is already in progress! */ 1647 return(EINVAL); 1648 } 1649 1650 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1651 rf_CopybackThread, 1652 raidPtr,"raid_copyback"); 1653 return (retcode); 1654 1655 /* return the percentage completion of reconstruction */ 1656 case RAIDFRAME_CHECK_RECON_STATUS: 1657 if (raidPtr->Layout.map->faultsTolerated == 0) { 1658 /* This makes no sense on a RAID 0, so tell the 1659 user it's done. */ 1660 *(int *) data = 100; 1661 return(0); 1662 } 1663 if (raidPtr->status != rf_rs_reconstructing) 1664 *(int *) data = 100; 1665 else { 1666 if (raidPtr->reconControl->numRUsTotal > 0) { 1667 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1668 } else { 1669 *(int *) data = 0; 1670 } 1671 } 1672 return (0); 1673 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1674 progressInfoPtr = (RF_ProgressInfo_t **) data; 1675 if (raidPtr->status != rf_rs_reconstructing) { 1676 progressInfo.remaining = 0; 1677 progressInfo.completed = 100; 1678 progressInfo.total = 100; 1679 } else { 1680 progressInfo.total = 1681 raidPtr->reconControl->numRUsTotal; 1682 progressInfo.completed = 1683 raidPtr->reconControl->numRUsComplete; 1684 progressInfo.remaining = progressInfo.total - 1685 progressInfo.completed; 1686 } 1687 retcode = copyout(&progressInfo, *progressInfoPtr, 1688 sizeof(RF_ProgressInfo_t)); 1689 return (retcode); 1690 1691 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1692 if (raidPtr->Layout.map->faultsTolerated == 0) { 1693 /* This makes no sense on a RAID 0, so tell the 1694 user it's done. */ 1695 *(int *) data = 100; 1696 return(0); 1697 } 1698 if (raidPtr->parity_rewrite_in_progress == 1) { 1699 *(int *) data = 100 * 1700 raidPtr->parity_rewrite_stripes_done / 1701 raidPtr->Layout.numStripe; 1702 } else { 1703 *(int *) data = 100; 1704 } 1705 return (0); 1706 1707 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1708 progressInfoPtr = (RF_ProgressInfo_t **) data; 1709 if (raidPtr->parity_rewrite_in_progress == 1) { 1710 progressInfo.total = raidPtr->Layout.numStripe; 1711 progressInfo.completed = 1712 raidPtr->parity_rewrite_stripes_done; 1713 progressInfo.remaining = progressInfo.total - 1714 progressInfo.completed; 1715 } else { 1716 progressInfo.remaining = 0; 1717 progressInfo.completed = 100; 1718 progressInfo.total = 100; 1719 } 1720 retcode = copyout(&progressInfo, *progressInfoPtr, 1721 sizeof(RF_ProgressInfo_t)); 1722 return (retcode); 1723 1724 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1725 if (raidPtr->Layout.map->faultsTolerated == 0) { 1726 /* This makes no sense on a RAID 0 */ 1727 *(int *) data = 100; 1728 return(0); 1729 } 1730 if (raidPtr->copyback_in_progress == 1) { 1731 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1732 raidPtr->Layout.numStripe; 1733 } else { 1734 *(int *) data = 100; 1735 } 1736 return (0); 1737 1738 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1739 progressInfoPtr = (RF_ProgressInfo_t **) data; 1740 if (raidPtr->copyback_in_progress == 1) { 1741 progressInfo.total = raidPtr->Layout.numStripe; 1742 progressInfo.completed = 1743 raidPtr->copyback_stripes_done; 1744 progressInfo.remaining = progressInfo.total - 1745 progressInfo.completed; 1746 } else { 1747 progressInfo.remaining = 0; 1748 progressInfo.completed = 100; 1749 progressInfo.total = 100; 1750 } 1751 retcode = copyout(&progressInfo, *progressInfoPtr, 1752 sizeof(RF_ProgressInfo_t)); 1753 return (retcode); 1754 1755 /* the sparetable daemon calls this to wait for the kernel to 1756 * need a spare table. this ioctl does not return until a 1757 * spare table is needed. XXX -- calling mpsleep here in the 1758 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1759 * -- I should either compute the spare table in the kernel, 1760 * or have a different -- XXX XXX -- interface (a different 1761 * character device) for delivering the table -- XXX */ 1762 #if 0 1763 case RAIDFRAME_SPARET_WAIT: 1764 rf_lock_mutex2(rf_sparet_wait_mutex); 1765 while (!rf_sparet_wait_queue) 1766 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1767 waitreq = rf_sparet_wait_queue; 1768 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1769 rf_unlock_mutex2(rf_sparet_wait_mutex); 1770 1771 /* structure assignment */ 1772 *((RF_SparetWait_t *) data) = *waitreq; 1773 1774 RF_Free(waitreq, sizeof(*waitreq)); 1775 return (0); 1776 1777 /* wakes up a process waiting on SPARET_WAIT and puts an error 1778 * code in it that will cause the dameon to exit */ 1779 case RAIDFRAME_ABORT_SPARET_WAIT: 1780 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1781 waitreq->fcol = -1; 1782 rf_lock_mutex2(rf_sparet_wait_mutex); 1783 waitreq->next = rf_sparet_wait_queue; 1784 rf_sparet_wait_queue = waitreq; 1785 rf_broadcast_conf2(rf_sparet_wait_cv); 1786 rf_unlock_mutex2(rf_sparet_wait_mutex); 1787 return (0); 1788 1789 /* used by the spare table daemon to deliver a spare table 1790 * into the kernel */ 1791 case RAIDFRAME_SEND_SPARET: 1792 1793 /* install the spare table */ 1794 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1795 1796 /* respond to the requestor. the return status of the spare 1797 * table installation is passed in the "fcol" field */ 1798 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1799 waitreq->fcol = retcode; 1800 rf_lock_mutex2(rf_sparet_wait_mutex); 1801 waitreq->next = rf_sparet_resp_queue; 1802 rf_sparet_resp_queue = waitreq; 1803 rf_broadcast_cond2(rf_sparet_resp_cv); 1804 rf_unlock_mutex2(rf_sparet_wait_mutex); 1805 1806 return (retcode); 1807 #endif 1808 1809 default: 1810 break; /* fall through to the os-specific code below */ 1811 1812 } 1813 1814 if (!raidPtr->valid) 1815 return (EINVAL); 1816 1817 /* 1818 * Add support for "regular" device ioctls here. 1819 */ 1820 1821 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l); 1822 if (error != EPASSTHROUGH) 1823 return (error); 1824 1825 switch (cmd) { 1826 case DIOCGDINFO: 1827 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1828 break; 1829 #ifdef __HAVE_OLD_DISKLABEL 1830 case ODIOCGDINFO: 1831 newlabel = *(rs->sc_dkdev.dk_label); 1832 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1833 return ENOTTY; 1834 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1835 break; 1836 #endif 1837 1838 case DIOCGPART: 1839 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1840 ((struct partinfo *) data)->part = 1841 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1842 break; 1843 1844 case DIOCWDINFO: 1845 case DIOCSDINFO: 1846 #ifdef __HAVE_OLD_DISKLABEL 1847 case ODIOCWDINFO: 1848 case ODIOCSDINFO: 1849 #endif 1850 { 1851 struct disklabel *lp; 1852 #ifdef __HAVE_OLD_DISKLABEL 1853 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1854 memset(&newlabel, 0, sizeof newlabel); 1855 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1856 lp = &newlabel; 1857 } else 1858 #endif 1859 lp = (struct disklabel *)data; 1860 1861 if ((error = raidlock(rs)) != 0) 1862 return (error); 1863 1864 rs->sc_flags |= RAIDF_LABELLING; 1865 1866 error = setdisklabel(rs->sc_dkdev.dk_label, 1867 lp, 0, rs->sc_dkdev.dk_cpulabel); 1868 if (error == 0) { 1869 if (cmd == DIOCWDINFO 1870 #ifdef __HAVE_OLD_DISKLABEL 1871 || cmd == ODIOCWDINFO 1872 #endif 1873 ) 1874 error = writedisklabel(RAIDLABELDEV(dev), 1875 raidstrategy, rs->sc_dkdev.dk_label, 1876 rs->sc_dkdev.dk_cpulabel); 1877 } 1878 rs->sc_flags &= ~RAIDF_LABELLING; 1879 1880 raidunlock(rs); 1881 1882 if (error) 1883 return (error); 1884 break; 1885 } 1886 1887 case DIOCWLABEL: 1888 if (*(int *) data != 0) 1889 rs->sc_flags |= RAIDF_WLABEL; 1890 else 1891 rs->sc_flags &= ~RAIDF_WLABEL; 1892 break; 1893 1894 case DIOCGDEFLABEL: 1895 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1896 break; 1897 1898 #ifdef __HAVE_OLD_DISKLABEL 1899 case ODIOCGDEFLABEL: 1900 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1901 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1902 return ENOTTY; 1903 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1904 break; 1905 #endif 1906 1907 case DIOCAWEDGE: 1908 case DIOCDWEDGE: 1909 dkw = (void *)data; 1910 1911 /* If the ioctl happens here, the parent is us. */ 1912 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1913 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1914 1915 case DIOCLWEDGES: 1916 return dkwedge_list(&rs->sc_dkdev, 1917 (struct dkwedge_list *)data, l); 1918 case DIOCCACHESYNC: 1919 return rf_sync_component_caches(raidPtr); 1920 1921 case DIOCGSTRATEGY: 1922 { 1923 struct disk_strategy *dks = (void *)data; 1924 1925 s = splbio(); 1926 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue), 1927 sizeof(dks->dks_name)); 1928 splx(s); 1929 dks->dks_paramlen = 0; 1930 1931 return 0; 1932 } 1933 1934 case DIOCSSTRATEGY: 1935 { 1936 struct disk_strategy *dks = (void *)data; 1937 struct bufq_state *new; 1938 struct bufq_state *old; 1939 1940 if (dks->dks_param != NULL) { 1941 return EINVAL; 1942 } 1943 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */ 1944 error = bufq_alloc(&new, dks->dks_name, 1945 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK); 1946 if (error) { 1947 return error; 1948 } 1949 s = splbio(); 1950 old = rs->buf_queue; 1951 bufq_move(new, old); 1952 rs->buf_queue = new; 1953 splx(s); 1954 bufq_free(old); 1955 1956 return 0; 1957 } 1958 1959 default: 1960 retcode = ENOTTY; 1961 } 1962 return (retcode); 1963 1964 } 1965 1966 1967 /* raidinit -- complete the rest of the initialization for the 1968 RAIDframe device. */ 1969 1970 1971 static void 1972 raidinit(struct raid_softc *rs) 1973 { 1974 cfdata_t cf; 1975 int unit; 1976 RF_Raid_t *raidPtr = &rs->sc_r; 1977 1978 unit = raidPtr->raidid; 1979 1980 1981 /* XXX should check return code first... */ 1982 rs->sc_flags |= RAIDF_INITED; 1983 1984 /* XXX doesn't check bounds. */ 1985 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1986 1987 /* attach the pseudo device */ 1988 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1989 cf->cf_name = raid_cd.cd_name; 1990 cf->cf_atname = raid_cd.cd_name; 1991 cf->cf_unit = unit; 1992 cf->cf_fstate = FSTATE_STAR; 1993 1994 rs->sc_dev = config_attach_pseudo(cf); 1995 1996 if (rs->sc_dev == NULL) { 1997 printf("raid%d: config_attach_pseudo failed\n", 1998 raidPtr->raidid); 1999 rs->sc_flags &= ~RAIDF_INITED; 2000 free(cf, M_RAIDFRAME); 2001 return; 2002 } 2003 2004 /* disk_attach actually creates space for the CPU disklabel, among 2005 * other things, so it's critical to call this *BEFORE* we try putzing 2006 * with disklabels. */ 2007 2008 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 2009 disk_attach(&rs->sc_dkdev); 2010 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector); 2011 2012 /* XXX There may be a weird interaction here between this, and 2013 * protectedSectors, as used in RAIDframe. */ 2014 2015 rs->sc_size = raidPtr->totalSectors; 2016 2017 dkwedge_discover(&rs->sc_dkdev); 2018 2019 rf_set_geometry(rs, raidPtr); 2020 2021 } 2022 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 2023 /* wake up the daemon & tell it to get us a spare table 2024 * XXX 2025 * the entries in the queues should be tagged with the raidPtr 2026 * so that in the extremely rare case that two recons happen at once, 2027 * we know for which device were requesting a spare table 2028 * XXX 2029 * 2030 * XXX This code is not currently used. GO 2031 */ 2032 int 2033 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 2034 { 2035 int retcode; 2036 2037 rf_lock_mutex2(rf_sparet_wait_mutex); 2038 req->next = rf_sparet_wait_queue; 2039 rf_sparet_wait_queue = req; 2040 rf_broadcast_cond2(rf_sparet_wait_cv); 2041 2042 /* mpsleep unlocks the mutex */ 2043 while (!rf_sparet_resp_queue) { 2044 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 2045 } 2046 req = rf_sparet_resp_queue; 2047 rf_sparet_resp_queue = req->next; 2048 rf_unlock_mutex2(rf_sparet_wait_mutex); 2049 2050 retcode = req->fcol; 2051 RF_Free(req, sizeof(*req)); /* this is not the same req as we 2052 * alloc'd */ 2053 return (retcode); 2054 } 2055 #endif 2056 2057 /* a wrapper around rf_DoAccess that extracts appropriate info from the 2058 * bp & passes it down. 2059 * any calls originating in the kernel must use non-blocking I/O 2060 * do some extra sanity checking to return "appropriate" error values for 2061 * certain conditions (to make some standard utilities work) 2062 * 2063 * Formerly known as: rf_DoAccessKernel 2064 */ 2065 void 2066 raidstart(RF_Raid_t *raidPtr) 2067 { 2068 RF_SectorCount_t num_blocks, pb, sum; 2069 RF_RaidAddr_t raid_addr; 2070 struct partition *pp; 2071 daddr_t blocknum; 2072 struct raid_softc *rs; 2073 int do_async; 2074 struct buf *bp; 2075 int rc; 2076 2077 rs = raidPtr->softc; 2078 /* quick check to see if anything has died recently */ 2079 rf_lock_mutex2(raidPtr->mutex); 2080 if (raidPtr->numNewFailures > 0) { 2081 rf_unlock_mutex2(raidPtr->mutex); 2082 rf_update_component_labels(raidPtr, 2083 RF_NORMAL_COMPONENT_UPDATE); 2084 rf_lock_mutex2(raidPtr->mutex); 2085 raidPtr->numNewFailures--; 2086 } 2087 2088 /* Check to see if we're at the limit... */ 2089 while (raidPtr->openings > 0) { 2090 rf_unlock_mutex2(raidPtr->mutex); 2091 2092 /* get the next item, if any, from the queue */ 2093 if ((bp = bufq_get(rs->buf_queue)) == NULL) { 2094 /* nothing more to do */ 2095 return; 2096 } 2097 2098 /* Ok, for the bp we have here, bp->b_blkno is relative to the 2099 * partition.. Need to make it absolute to the underlying 2100 * device.. */ 2101 2102 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector; 2103 if (DISKPART(bp->b_dev) != RAW_PART) { 2104 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 2105 blocknum += pp->p_offset; 2106 } 2107 2108 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2109 (int) blocknum)); 2110 2111 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2112 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2113 2114 /* *THIS* is where we adjust what block we're going to... 2115 * but DO NOT TOUCH bp->b_blkno!!! */ 2116 raid_addr = blocknum; 2117 2118 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2119 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2120 sum = raid_addr + num_blocks + pb; 2121 if (1 || rf_debugKernelAccess) { 2122 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2123 (int) raid_addr, (int) sum, (int) num_blocks, 2124 (int) pb, (int) bp->b_resid)); 2125 } 2126 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2127 || (sum < num_blocks) || (sum < pb)) { 2128 bp->b_error = ENOSPC; 2129 bp->b_resid = bp->b_bcount; 2130 biodone(bp); 2131 rf_lock_mutex2(raidPtr->mutex); 2132 continue; 2133 } 2134 /* 2135 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2136 */ 2137 2138 if (bp->b_bcount & raidPtr->sectorMask) { 2139 bp->b_error = EINVAL; 2140 bp->b_resid = bp->b_bcount; 2141 biodone(bp); 2142 rf_lock_mutex2(raidPtr->mutex); 2143 continue; 2144 2145 } 2146 db1_printf(("Calling DoAccess..\n")); 2147 2148 2149 rf_lock_mutex2(raidPtr->mutex); 2150 raidPtr->openings--; 2151 rf_unlock_mutex2(raidPtr->mutex); 2152 2153 /* 2154 * Everything is async. 2155 */ 2156 do_async = 1; 2157 2158 disk_busy(&rs->sc_dkdev); 2159 2160 /* XXX we're still at splbio() here... do we *really* 2161 need to be? */ 2162 2163 /* don't ever condition on bp->b_flags & B_WRITE. 2164 * always condition on B_READ instead */ 2165 2166 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2167 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2168 do_async, raid_addr, num_blocks, 2169 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2170 2171 if (rc) { 2172 bp->b_error = rc; 2173 bp->b_resid = bp->b_bcount; 2174 biodone(bp); 2175 /* continue loop */ 2176 } 2177 2178 rf_lock_mutex2(raidPtr->mutex); 2179 } 2180 rf_unlock_mutex2(raidPtr->mutex); 2181 } 2182 2183 2184 2185 2186 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2187 2188 int 2189 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2190 { 2191 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2192 struct buf *bp; 2193 2194 req->queue = queue; 2195 bp = req->bp; 2196 2197 switch (req->type) { 2198 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2199 /* XXX need to do something extra here.. */ 2200 /* I'm leaving this in, as I've never actually seen it used, 2201 * and I'd like folks to report it... GO */ 2202 printf(("WAKEUP CALLED\n")); 2203 queue->numOutstanding++; 2204 2205 bp->b_flags = 0; 2206 bp->b_private = req; 2207 2208 KernelWakeupFunc(bp); 2209 break; 2210 2211 case RF_IO_TYPE_READ: 2212 case RF_IO_TYPE_WRITE: 2213 #if RF_ACC_TRACE > 0 2214 if (req->tracerec) { 2215 RF_ETIMER_START(req->tracerec->timer); 2216 } 2217 #endif 2218 InitBP(bp, queue->rf_cinfo->ci_vp, 2219 op, queue->rf_cinfo->ci_dev, 2220 req->sectorOffset, req->numSector, 2221 req->buf, KernelWakeupFunc, (void *) req, 2222 queue->raidPtr->logBytesPerSector, req->b_proc); 2223 2224 if (rf_debugKernelAccess) { 2225 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2226 (long) bp->b_blkno)); 2227 } 2228 queue->numOutstanding++; 2229 queue->last_deq_sector = req->sectorOffset; 2230 /* acc wouldn't have been let in if there were any pending 2231 * reqs at any other priority */ 2232 queue->curPriority = req->priority; 2233 2234 db1_printf(("Going for %c to unit %d col %d\n", 2235 req->type, queue->raidPtr->raidid, 2236 queue->col)); 2237 db1_printf(("sector %d count %d (%d bytes) %d\n", 2238 (int) req->sectorOffset, (int) req->numSector, 2239 (int) (req->numSector << 2240 queue->raidPtr->logBytesPerSector), 2241 (int) queue->raidPtr->logBytesPerSector)); 2242 2243 /* 2244 * XXX: drop lock here since this can block at 2245 * least with backing SCSI devices. Retake it 2246 * to minimize fuss with calling interfaces. 2247 */ 2248 2249 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2250 bdev_strategy(bp); 2251 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2252 break; 2253 2254 default: 2255 panic("bad req->type in rf_DispatchKernelIO"); 2256 } 2257 db1_printf(("Exiting from DispatchKernelIO\n")); 2258 2259 return (0); 2260 } 2261 /* this is the callback function associated with a I/O invoked from 2262 kernel code. 2263 */ 2264 static void 2265 KernelWakeupFunc(struct buf *bp) 2266 { 2267 RF_DiskQueueData_t *req = NULL; 2268 RF_DiskQueue_t *queue; 2269 2270 db1_printf(("recovering the request queue:\n")); 2271 2272 req = bp->b_private; 2273 2274 queue = (RF_DiskQueue_t *) req->queue; 2275 2276 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2277 2278 #if RF_ACC_TRACE > 0 2279 if (req->tracerec) { 2280 RF_ETIMER_STOP(req->tracerec->timer); 2281 RF_ETIMER_EVAL(req->tracerec->timer); 2282 rf_lock_mutex2(rf_tracing_mutex); 2283 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2284 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2285 req->tracerec->num_phys_ios++; 2286 rf_unlock_mutex2(rf_tracing_mutex); 2287 } 2288 #endif 2289 2290 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2291 * ballistic, and mark the component as hosed... */ 2292 2293 if (bp->b_error != 0) { 2294 /* Mark the disk as dead */ 2295 /* but only mark it once... */ 2296 /* and only if it wouldn't leave this RAID set 2297 completely broken */ 2298 if (((queue->raidPtr->Disks[queue->col].status == 2299 rf_ds_optimal) || 2300 (queue->raidPtr->Disks[queue->col].status == 2301 rf_ds_used_spare)) && 2302 (queue->raidPtr->numFailures < 2303 queue->raidPtr->Layout.map->faultsTolerated)) { 2304 printf("raid%d: IO Error. Marking %s as failed.\n", 2305 queue->raidPtr->raidid, 2306 queue->raidPtr->Disks[queue->col].devname); 2307 queue->raidPtr->Disks[queue->col].status = 2308 rf_ds_failed; 2309 queue->raidPtr->status = rf_rs_degraded; 2310 queue->raidPtr->numFailures++; 2311 queue->raidPtr->numNewFailures++; 2312 } else { /* Disk is already dead... */ 2313 /* printf("Disk already marked as dead!\n"); */ 2314 } 2315 2316 } 2317 2318 /* Fill in the error value */ 2319 req->error = bp->b_error; 2320 2321 /* Drop this one on the "finished" queue... */ 2322 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2323 2324 /* Let the raidio thread know there is work to be done. */ 2325 rf_signal_cond2(queue->raidPtr->iodone_cv); 2326 2327 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2328 } 2329 2330 2331 /* 2332 * initialize a buf structure for doing an I/O in the kernel. 2333 */ 2334 static void 2335 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2336 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2337 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2338 struct proc *b_proc) 2339 { 2340 /* bp->b_flags = B_PHYS | rw_flag; */ 2341 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2342 bp->b_oflags = 0; 2343 bp->b_cflags = 0; 2344 bp->b_bcount = numSect << logBytesPerSector; 2345 bp->b_bufsize = bp->b_bcount; 2346 bp->b_error = 0; 2347 bp->b_dev = dev; 2348 bp->b_data = bf; 2349 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2350 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2351 if (bp->b_bcount == 0) { 2352 panic("bp->b_bcount is zero in InitBP!!"); 2353 } 2354 bp->b_proc = b_proc; 2355 bp->b_iodone = cbFunc; 2356 bp->b_private = cbArg; 2357 } 2358 2359 static void 2360 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2361 struct disklabel *lp) 2362 { 2363 memset(lp, 0, sizeof(*lp)); 2364 2365 /* fabricate a label... */ 2366 lp->d_secperunit = raidPtr->totalSectors; 2367 lp->d_secsize = raidPtr->bytesPerSector; 2368 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2369 lp->d_ntracks = 4 * raidPtr->numCol; 2370 lp->d_ncylinders = raidPtr->totalSectors / 2371 (lp->d_nsectors * lp->d_ntracks); 2372 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2373 2374 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2375 lp->d_type = DTYPE_RAID; 2376 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2377 lp->d_rpm = 3600; 2378 lp->d_interleave = 1; 2379 lp->d_flags = 0; 2380 2381 lp->d_partitions[RAW_PART].p_offset = 0; 2382 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2383 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2384 lp->d_npartitions = RAW_PART + 1; 2385 2386 lp->d_magic = DISKMAGIC; 2387 lp->d_magic2 = DISKMAGIC; 2388 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2389 2390 } 2391 /* 2392 * Read the disklabel from the raid device. If one is not present, fake one 2393 * up. 2394 */ 2395 static void 2396 raidgetdisklabel(dev_t dev) 2397 { 2398 int unit = raidunit(dev); 2399 struct raid_softc *rs; 2400 const char *errstring; 2401 struct disklabel *lp; 2402 struct cpu_disklabel *clp; 2403 RF_Raid_t *raidPtr; 2404 2405 if ((rs = raidget(unit)) == NULL) 2406 return; 2407 2408 lp = rs->sc_dkdev.dk_label; 2409 clp = rs->sc_dkdev.dk_cpulabel; 2410 2411 db1_printf(("Getting the disklabel...\n")); 2412 2413 memset(clp, 0, sizeof(*clp)); 2414 2415 raidPtr = &rs->sc_r; 2416 2417 raidgetdefaultlabel(raidPtr, rs, lp); 2418 2419 /* 2420 * Call the generic disklabel extraction routine. 2421 */ 2422 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2423 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2424 if (errstring) 2425 raidmakedisklabel(rs); 2426 else { 2427 int i; 2428 struct partition *pp; 2429 2430 /* 2431 * Sanity check whether the found disklabel is valid. 2432 * 2433 * This is necessary since total size of the raid device 2434 * may vary when an interleave is changed even though exactly 2435 * same components are used, and old disklabel may used 2436 * if that is found. 2437 */ 2438 if (lp->d_secperunit != rs->sc_size) 2439 printf("raid%d: WARNING: %s: " 2440 "total sector size in disklabel (%" PRIu32 ") != " 2441 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname, 2442 lp->d_secperunit, rs->sc_size); 2443 for (i = 0; i < lp->d_npartitions; i++) { 2444 pp = &lp->d_partitions[i]; 2445 if (pp->p_offset + pp->p_size > rs->sc_size) 2446 printf("raid%d: WARNING: %s: end of partition `%c' " 2447 "exceeds the size of raid (%" PRIu64 ")\n", 2448 unit, rs->sc_xname, 'a' + i, rs->sc_size); 2449 } 2450 } 2451 2452 } 2453 /* 2454 * Take care of things one might want to take care of in the event 2455 * that a disklabel isn't present. 2456 */ 2457 static void 2458 raidmakedisklabel(struct raid_softc *rs) 2459 { 2460 struct disklabel *lp = rs->sc_dkdev.dk_label; 2461 db1_printf(("Making a label..\n")); 2462 2463 /* 2464 * For historical reasons, if there's no disklabel present 2465 * the raw partition must be marked FS_BSDFFS. 2466 */ 2467 2468 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2469 2470 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2471 2472 lp->d_checksum = dkcksum(lp); 2473 } 2474 /* 2475 * Wait interruptibly for an exclusive lock. 2476 * 2477 * XXX 2478 * Several drivers do this; it should be abstracted and made MP-safe. 2479 * (Hmm... where have we seen this warning before :-> GO ) 2480 */ 2481 static int 2482 raidlock(struct raid_softc *rs) 2483 { 2484 int error; 2485 2486 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2487 rs->sc_flags |= RAIDF_WANTED; 2488 if ((error = 2489 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2490 return (error); 2491 } 2492 rs->sc_flags |= RAIDF_LOCKED; 2493 return (0); 2494 } 2495 /* 2496 * Unlock and wake up any waiters. 2497 */ 2498 static void 2499 raidunlock(struct raid_softc *rs) 2500 { 2501 2502 rs->sc_flags &= ~RAIDF_LOCKED; 2503 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2504 rs->sc_flags &= ~RAIDF_WANTED; 2505 wakeup(rs); 2506 } 2507 } 2508 2509 2510 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2511 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2512 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2513 2514 static daddr_t 2515 rf_component_info_offset(void) 2516 { 2517 2518 return RF_COMPONENT_INFO_OFFSET; 2519 } 2520 2521 static daddr_t 2522 rf_component_info_size(unsigned secsize) 2523 { 2524 daddr_t info_size; 2525 2526 KASSERT(secsize); 2527 if (secsize > RF_COMPONENT_INFO_SIZE) 2528 info_size = secsize; 2529 else 2530 info_size = RF_COMPONENT_INFO_SIZE; 2531 2532 return info_size; 2533 } 2534 2535 static daddr_t 2536 rf_parity_map_offset(RF_Raid_t *raidPtr) 2537 { 2538 daddr_t map_offset; 2539 2540 KASSERT(raidPtr->bytesPerSector); 2541 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2542 map_offset = raidPtr->bytesPerSector; 2543 else 2544 map_offset = RF_COMPONENT_INFO_SIZE; 2545 map_offset += rf_component_info_offset(); 2546 2547 return map_offset; 2548 } 2549 2550 static daddr_t 2551 rf_parity_map_size(RF_Raid_t *raidPtr) 2552 { 2553 daddr_t map_size; 2554 2555 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2556 map_size = raidPtr->bytesPerSector; 2557 else 2558 map_size = RF_PARITY_MAP_SIZE; 2559 2560 return map_size; 2561 } 2562 2563 int 2564 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2565 { 2566 RF_ComponentLabel_t *clabel; 2567 2568 clabel = raidget_component_label(raidPtr, col); 2569 clabel->clean = RF_RAID_CLEAN; 2570 raidflush_component_label(raidPtr, col); 2571 return(0); 2572 } 2573 2574 2575 int 2576 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2577 { 2578 RF_ComponentLabel_t *clabel; 2579 2580 clabel = raidget_component_label(raidPtr, col); 2581 clabel->clean = RF_RAID_DIRTY; 2582 raidflush_component_label(raidPtr, col); 2583 return(0); 2584 } 2585 2586 int 2587 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2588 { 2589 KASSERT(raidPtr->bytesPerSector); 2590 return raidread_component_label(raidPtr->bytesPerSector, 2591 raidPtr->Disks[col].dev, 2592 raidPtr->raid_cinfo[col].ci_vp, 2593 &raidPtr->raid_cinfo[col].ci_label); 2594 } 2595 2596 RF_ComponentLabel_t * 2597 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2598 { 2599 return &raidPtr->raid_cinfo[col].ci_label; 2600 } 2601 2602 int 2603 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2604 { 2605 RF_ComponentLabel_t *label; 2606 2607 label = &raidPtr->raid_cinfo[col].ci_label; 2608 label->mod_counter = raidPtr->mod_counter; 2609 #ifndef RF_NO_PARITY_MAP 2610 label->parity_map_modcount = label->mod_counter; 2611 #endif 2612 return raidwrite_component_label(raidPtr->bytesPerSector, 2613 raidPtr->Disks[col].dev, 2614 raidPtr->raid_cinfo[col].ci_vp, label); 2615 } 2616 2617 2618 static int 2619 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2620 RF_ComponentLabel_t *clabel) 2621 { 2622 return raidread_component_area(dev, b_vp, clabel, 2623 sizeof(RF_ComponentLabel_t), 2624 rf_component_info_offset(), 2625 rf_component_info_size(secsize)); 2626 } 2627 2628 /* ARGSUSED */ 2629 static int 2630 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2631 size_t msize, daddr_t offset, daddr_t dsize) 2632 { 2633 struct buf *bp; 2634 const struct bdevsw *bdev; 2635 int error; 2636 2637 /* XXX should probably ensure that we don't try to do this if 2638 someone has changed rf_protected_sectors. */ 2639 2640 if (b_vp == NULL) { 2641 /* For whatever reason, this component is not valid. 2642 Don't try to read a component label from it. */ 2643 return(EINVAL); 2644 } 2645 2646 /* get a block of the appropriate size... */ 2647 bp = geteblk((int)dsize); 2648 bp->b_dev = dev; 2649 2650 /* get our ducks in a row for the read */ 2651 bp->b_blkno = offset / DEV_BSIZE; 2652 bp->b_bcount = dsize; 2653 bp->b_flags |= B_READ; 2654 bp->b_resid = dsize; 2655 2656 bdev = bdevsw_lookup(bp->b_dev); 2657 if (bdev == NULL) 2658 return (ENXIO); 2659 (*bdev->d_strategy)(bp); 2660 2661 error = biowait(bp); 2662 2663 if (!error) { 2664 memcpy(data, bp->b_data, msize); 2665 } 2666 2667 brelse(bp, 0); 2668 return(error); 2669 } 2670 2671 2672 static int 2673 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2674 RF_ComponentLabel_t *clabel) 2675 { 2676 return raidwrite_component_area(dev, b_vp, clabel, 2677 sizeof(RF_ComponentLabel_t), 2678 rf_component_info_offset(), 2679 rf_component_info_size(secsize), 0); 2680 } 2681 2682 /* ARGSUSED */ 2683 static int 2684 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2685 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2686 { 2687 struct buf *bp; 2688 const struct bdevsw *bdev; 2689 int error; 2690 2691 /* get a block of the appropriate size... */ 2692 bp = geteblk((int)dsize); 2693 bp->b_dev = dev; 2694 2695 /* get our ducks in a row for the write */ 2696 bp->b_blkno = offset / DEV_BSIZE; 2697 bp->b_bcount = dsize; 2698 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2699 bp->b_resid = dsize; 2700 2701 memset(bp->b_data, 0, dsize); 2702 memcpy(bp->b_data, data, msize); 2703 2704 bdev = bdevsw_lookup(bp->b_dev); 2705 if (bdev == NULL) 2706 return (ENXIO); 2707 (*bdev->d_strategy)(bp); 2708 if (asyncp) 2709 return 0; 2710 error = biowait(bp); 2711 brelse(bp, 0); 2712 if (error) { 2713 #if 1 2714 printf("Failed to write RAID component info!\n"); 2715 #endif 2716 } 2717 2718 return(error); 2719 } 2720 2721 void 2722 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2723 { 2724 int c; 2725 2726 for (c = 0; c < raidPtr->numCol; c++) { 2727 /* Skip dead disks. */ 2728 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2729 continue; 2730 /* XXXjld: what if an error occurs here? */ 2731 raidwrite_component_area(raidPtr->Disks[c].dev, 2732 raidPtr->raid_cinfo[c].ci_vp, map, 2733 RF_PARITYMAP_NBYTE, 2734 rf_parity_map_offset(raidPtr), 2735 rf_parity_map_size(raidPtr), 0); 2736 } 2737 } 2738 2739 void 2740 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2741 { 2742 struct rf_paritymap_ondisk tmp; 2743 int c,first; 2744 2745 first=1; 2746 for (c = 0; c < raidPtr->numCol; c++) { 2747 /* Skip dead disks. */ 2748 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2749 continue; 2750 raidread_component_area(raidPtr->Disks[c].dev, 2751 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2752 RF_PARITYMAP_NBYTE, 2753 rf_parity_map_offset(raidPtr), 2754 rf_parity_map_size(raidPtr)); 2755 if (first) { 2756 memcpy(map, &tmp, sizeof(*map)); 2757 first = 0; 2758 } else { 2759 rf_paritymap_merge(map, &tmp); 2760 } 2761 } 2762 } 2763 2764 void 2765 rf_markalldirty(RF_Raid_t *raidPtr) 2766 { 2767 RF_ComponentLabel_t *clabel; 2768 int sparecol; 2769 int c; 2770 int j; 2771 int scol = -1; 2772 2773 raidPtr->mod_counter++; 2774 for (c = 0; c < raidPtr->numCol; c++) { 2775 /* we don't want to touch (at all) a disk that has 2776 failed */ 2777 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2778 clabel = raidget_component_label(raidPtr, c); 2779 if (clabel->status == rf_ds_spared) { 2780 /* XXX do something special... 2781 but whatever you do, don't 2782 try to access it!! */ 2783 } else { 2784 raidmarkdirty(raidPtr, c); 2785 } 2786 } 2787 } 2788 2789 for( c = 0; c < raidPtr->numSpare ; c++) { 2790 sparecol = raidPtr->numCol + c; 2791 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2792 /* 2793 2794 we claim this disk is "optimal" if it's 2795 rf_ds_used_spare, as that means it should be 2796 directly substitutable for the disk it replaced. 2797 We note that too... 2798 2799 */ 2800 2801 for(j=0;j<raidPtr->numCol;j++) { 2802 if (raidPtr->Disks[j].spareCol == sparecol) { 2803 scol = j; 2804 break; 2805 } 2806 } 2807 2808 clabel = raidget_component_label(raidPtr, sparecol); 2809 /* make sure status is noted */ 2810 2811 raid_init_component_label(raidPtr, clabel); 2812 2813 clabel->row = 0; 2814 clabel->column = scol; 2815 /* Note: we *don't* change status from rf_ds_used_spare 2816 to rf_ds_optimal */ 2817 /* clabel.status = rf_ds_optimal; */ 2818 2819 raidmarkdirty(raidPtr, sparecol); 2820 } 2821 } 2822 } 2823 2824 2825 void 2826 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2827 { 2828 RF_ComponentLabel_t *clabel; 2829 int sparecol; 2830 int c; 2831 int j; 2832 int scol; 2833 2834 scol = -1; 2835 2836 /* XXX should do extra checks to make sure things really are clean, 2837 rather than blindly setting the clean bit... */ 2838 2839 raidPtr->mod_counter++; 2840 2841 for (c = 0; c < raidPtr->numCol; c++) { 2842 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2843 clabel = raidget_component_label(raidPtr, c); 2844 /* make sure status is noted */ 2845 clabel->status = rf_ds_optimal; 2846 2847 /* note what unit we are configured as */ 2848 clabel->last_unit = raidPtr->raidid; 2849 2850 raidflush_component_label(raidPtr, c); 2851 if (final == RF_FINAL_COMPONENT_UPDATE) { 2852 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2853 raidmarkclean(raidPtr, c); 2854 } 2855 } 2856 } 2857 /* else we don't touch it.. */ 2858 } 2859 2860 for( c = 0; c < raidPtr->numSpare ; c++) { 2861 sparecol = raidPtr->numCol + c; 2862 /* Need to ensure that the reconstruct actually completed! */ 2863 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2864 /* 2865 2866 we claim this disk is "optimal" if it's 2867 rf_ds_used_spare, as that means it should be 2868 directly substitutable for the disk it replaced. 2869 We note that too... 2870 2871 */ 2872 2873 for(j=0;j<raidPtr->numCol;j++) { 2874 if (raidPtr->Disks[j].spareCol == sparecol) { 2875 scol = j; 2876 break; 2877 } 2878 } 2879 2880 /* XXX shouldn't *really* need this... */ 2881 clabel = raidget_component_label(raidPtr, sparecol); 2882 /* make sure status is noted */ 2883 2884 raid_init_component_label(raidPtr, clabel); 2885 2886 clabel->column = scol; 2887 clabel->status = rf_ds_optimal; 2888 clabel->last_unit = raidPtr->raidid; 2889 2890 raidflush_component_label(raidPtr, sparecol); 2891 if (final == RF_FINAL_COMPONENT_UPDATE) { 2892 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2893 raidmarkclean(raidPtr, sparecol); 2894 } 2895 } 2896 } 2897 } 2898 } 2899 2900 void 2901 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2902 { 2903 2904 if (vp != NULL) { 2905 if (auto_configured == 1) { 2906 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2907 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2908 vput(vp); 2909 2910 } else { 2911 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2912 } 2913 } 2914 } 2915 2916 2917 void 2918 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2919 { 2920 int r,c; 2921 struct vnode *vp; 2922 int acd; 2923 2924 2925 /* We take this opportunity to close the vnodes like we should.. */ 2926 2927 for (c = 0; c < raidPtr->numCol; c++) { 2928 vp = raidPtr->raid_cinfo[c].ci_vp; 2929 acd = raidPtr->Disks[c].auto_configured; 2930 rf_close_component(raidPtr, vp, acd); 2931 raidPtr->raid_cinfo[c].ci_vp = NULL; 2932 raidPtr->Disks[c].auto_configured = 0; 2933 } 2934 2935 for (r = 0; r < raidPtr->numSpare; r++) { 2936 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2937 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2938 rf_close_component(raidPtr, vp, acd); 2939 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2940 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2941 } 2942 } 2943 2944 2945 void 2946 rf_ReconThread(struct rf_recon_req *req) 2947 { 2948 int s; 2949 RF_Raid_t *raidPtr; 2950 2951 s = splbio(); 2952 raidPtr = (RF_Raid_t *) req->raidPtr; 2953 raidPtr->recon_in_progress = 1; 2954 2955 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2956 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2957 2958 RF_Free(req, sizeof(*req)); 2959 2960 raidPtr->recon_in_progress = 0; 2961 splx(s); 2962 2963 /* That's all... */ 2964 kthread_exit(0); /* does not return */ 2965 } 2966 2967 void 2968 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2969 { 2970 int retcode; 2971 int s; 2972 2973 raidPtr->parity_rewrite_stripes_done = 0; 2974 raidPtr->parity_rewrite_in_progress = 1; 2975 s = splbio(); 2976 retcode = rf_RewriteParity(raidPtr); 2977 splx(s); 2978 if (retcode) { 2979 printf("raid%d: Error re-writing parity (%d)!\n", 2980 raidPtr->raidid, retcode); 2981 } else { 2982 /* set the clean bit! If we shutdown correctly, 2983 the clean bit on each component label will get 2984 set */ 2985 raidPtr->parity_good = RF_RAID_CLEAN; 2986 } 2987 raidPtr->parity_rewrite_in_progress = 0; 2988 2989 /* Anyone waiting for us to stop? If so, inform them... */ 2990 if (raidPtr->waitShutdown) { 2991 wakeup(&raidPtr->parity_rewrite_in_progress); 2992 } 2993 2994 /* That's all... */ 2995 kthread_exit(0); /* does not return */ 2996 } 2997 2998 2999 void 3000 rf_CopybackThread(RF_Raid_t *raidPtr) 3001 { 3002 int s; 3003 3004 raidPtr->copyback_in_progress = 1; 3005 s = splbio(); 3006 rf_CopybackReconstructedData(raidPtr); 3007 splx(s); 3008 raidPtr->copyback_in_progress = 0; 3009 3010 /* That's all... */ 3011 kthread_exit(0); /* does not return */ 3012 } 3013 3014 3015 void 3016 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 3017 { 3018 int s; 3019 RF_Raid_t *raidPtr; 3020 3021 s = splbio(); 3022 raidPtr = req->raidPtr; 3023 raidPtr->recon_in_progress = 1; 3024 rf_ReconstructInPlace(raidPtr, req->col); 3025 RF_Free(req, sizeof(*req)); 3026 raidPtr->recon_in_progress = 0; 3027 splx(s); 3028 3029 /* That's all... */ 3030 kthread_exit(0); /* does not return */ 3031 } 3032 3033 static RF_AutoConfig_t * 3034 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 3035 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 3036 unsigned secsize) 3037 { 3038 int good_one = 0; 3039 RF_ComponentLabel_t *clabel; 3040 RF_AutoConfig_t *ac; 3041 3042 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 3043 if (clabel == NULL) { 3044 oomem: 3045 while(ac_list) { 3046 ac = ac_list; 3047 if (ac->clabel) 3048 free(ac->clabel, M_RAIDFRAME); 3049 ac_list = ac_list->next; 3050 free(ac, M_RAIDFRAME); 3051 } 3052 printf("RAID auto config: out of memory!\n"); 3053 return NULL; /* XXX probably should panic? */ 3054 } 3055 3056 if (!raidread_component_label(secsize, dev, vp, clabel)) { 3057 /* Got the label. Does it look reasonable? */ 3058 if (rf_reasonable_label(clabel, numsecs) && 3059 (rf_component_label_partitionsize(clabel) <= size)) { 3060 #ifdef DEBUG 3061 printf("Component on: %s: %llu\n", 3062 cname, (unsigned long long)size); 3063 rf_print_component_label(clabel); 3064 #endif 3065 /* if it's reasonable, add it, else ignore it. */ 3066 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 3067 M_NOWAIT); 3068 if (ac == NULL) { 3069 free(clabel, M_RAIDFRAME); 3070 goto oomem; 3071 } 3072 strlcpy(ac->devname, cname, sizeof(ac->devname)); 3073 ac->dev = dev; 3074 ac->vp = vp; 3075 ac->clabel = clabel; 3076 ac->next = ac_list; 3077 ac_list = ac; 3078 good_one = 1; 3079 } 3080 } 3081 if (!good_one) { 3082 /* cleanup */ 3083 free(clabel, M_RAIDFRAME); 3084 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3085 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3086 vput(vp); 3087 } 3088 return ac_list; 3089 } 3090 3091 RF_AutoConfig_t * 3092 rf_find_raid_components(void) 3093 { 3094 struct vnode *vp; 3095 struct disklabel label; 3096 device_t dv; 3097 deviter_t di; 3098 dev_t dev; 3099 int bmajor, bminor, wedge, rf_part_found; 3100 int error; 3101 int i; 3102 RF_AutoConfig_t *ac_list; 3103 uint64_t numsecs; 3104 unsigned secsize; 3105 3106 /* initialize the AutoConfig list */ 3107 ac_list = NULL; 3108 3109 /* we begin by trolling through *all* the devices on the system */ 3110 3111 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 3112 dv = deviter_next(&di)) { 3113 3114 /* we are only interested in disks... */ 3115 if (device_class(dv) != DV_DISK) 3116 continue; 3117 3118 /* we don't care about floppies... */ 3119 if (device_is_a(dv, "fd")) { 3120 continue; 3121 } 3122 3123 /* we don't care about CD's... */ 3124 if (device_is_a(dv, "cd")) { 3125 continue; 3126 } 3127 3128 /* we don't care about md's... */ 3129 if (device_is_a(dv, "md")) { 3130 continue; 3131 } 3132 3133 /* hdfd is the Atari/Hades floppy driver */ 3134 if (device_is_a(dv, "hdfd")) { 3135 continue; 3136 } 3137 3138 /* fdisa is the Atari/Milan floppy driver */ 3139 if (device_is_a(dv, "fdisa")) { 3140 continue; 3141 } 3142 3143 /* need to find the device_name_to_block_device_major stuff */ 3144 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 3145 3146 rf_part_found = 0; /*No raid partition as yet*/ 3147 3148 /* get a vnode for the raw partition of this disk */ 3149 3150 wedge = device_is_a(dv, "dk"); 3151 bminor = minor(device_unit(dv)); 3152 dev = wedge ? makedev(bmajor, bminor) : 3153 MAKEDISKDEV(bmajor, bminor, RAW_PART); 3154 if (bdevvp(dev, &vp)) 3155 panic("RAID can't alloc vnode"); 3156 3157 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 3158 3159 if (error) { 3160 /* "Who cares." Continue looking 3161 for something that exists*/ 3162 vput(vp); 3163 continue; 3164 } 3165 3166 error = getdisksize(vp, &numsecs, &secsize); 3167 if (error) { 3168 vput(vp); 3169 continue; 3170 } 3171 if (wedge) { 3172 struct dkwedge_info dkw; 3173 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 3174 NOCRED); 3175 if (error) { 3176 printf("RAIDframe: can't get wedge info for " 3177 "dev %s (%d)\n", device_xname(dv), error); 3178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3179 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3180 vput(vp); 3181 continue; 3182 } 3183 3184 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 3185 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3186 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3187 vput(vp); 3188 continue; 3189 } 3190 3191 ac_list = rf_get_component(ac_list, dev, vp, 3192 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3193 rf_part_found = 1; /*There is a raid component on this disk*/ 3194 continue; 3195 } 3196 3197 /* Ok, the disk exists. Go get the disklabel. */ 3198 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3199 if (error) { 3200 /* 3201 * XXX can't happen - open() would 3202 * have errored out (or faked up one) 3203 */ 3204 if (error != ENOTTY) 3205 printf("RAIDframe: can't get label for dev " 3206 "%s (%d)\n", device_xname(dv), error); 3207 } 3208 3209 /* don't need this any more. We'll allocate it again 3210 a little later if we really do... */ 3211 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3212 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3213 vput(vp); 3214 3215 if (error) 3216 continue; 3217 3218 rf_part_found = 0; /*No raid partitions yet*/ 3219 for (i = 0; i < label.d_npartitions; i++) { 3220 char cname[sizeof(ac_list->devname)]; 3221 3222 /* We only support partitions marked as RAID */ 3223 if (label.d_partitions[i].p_fstype != FS_RAID) 3224 continue; 3225 3226 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3227 if (bdevvp(dev, &vp)) 3228 panic("RAID can't alloc vnode"); 3229 3230 error = VOP_OPEN(vp, FREAD, NOCRED); 3231 if (error) { 3232 /* Whatever... */ 3233 vput(vp); 3234 continue; 3235 } 3236 snprintf(cname, sizeof(cname), "%s%c", 3237 device_xname(dv), 'a' + i); 3238 ac_list = rf_get_component(ac_list, dev, vp, cname, 3239 label.d_partitions[i].p_size, numsecs, secsize); 3240 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 3241 } 3242 3243 /* 3244 *If there is no raid component on this disk, either in a 3245 *disklabel or inside a wedge, check the raw partition as well, 3246 *as it is possible to configure raid components on raw disk 3247 *devices. 3248 */ 3249 3250 if (!rf_part_found) { 3251 char cname[sizeof(ac_list->devname)]; 3252 3253 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 3254 if (bdevvp(dev, &vp)) 3255 panic("RAID can't alloc vnode"); 3256 3257 error = VOP_OPEN(vp, FREAD, NOCRED); 3258 if (error) { 3259 /* Whatever... */ 3260 vput(vp); 3261 continue; 3262 } 3263 snprintf(cname, sizeof(cname), "%s%c", 3264 device_xname(dv), 'a' + RAW_PART); 3265 ac_list = rf_get_component(ac_list, dev, vp, cname, 3266 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 3267 } 3268 } 3269 deviter_release(&di); 3270 return ac_list; 3271 } 3272 3273 3274 int 3275 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3276 { 3277 3278 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 3279 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 3280 ((clabel->clean == RF_RAID_CLEAN) || 3281 (clabel->clean == RF_RAID_DIRTY)) && 3282 clabel->row >=0 && 3283 clabel->column >= 0 && 3284 clabel->num_rows > 0 && 3285 clabel->num_columns > 0 && 3286 clabel->row < clabel->num_rows && 3287 clabel->column < clabel->num_columns && 3288 clabel->blockSize > 0 && 3289 /* 3290 * numBlocksHi may contain garbage, but it is ok since 3291 * the type is unsigned. If it is really garbage, 3292 * rf_fix_old_label_size() will fix it. 3293 */ 3294 rf_component_label_numblocks(clabel) > 0) { 3295 /* 3296 * label looks reasonable enough... 3297 * let's make sure it has no old garbage. 3298 */ 3299 if (numsecs) 3300 rf_fix_old_label_size(clabel, numsecs); 3301 return(1); 3302 } 3303 return(0); 3304 } 3305 3306 3307 /* 3308 * For reasons yet unknown, some old component labels have garbage in 3309 * the newer numBlocksHi region, and this causes lossage. Since those 3310 * disks will also have numsecs set to less than 32 bits of sectors, 3311 * we can determine when this corruption has occurred, and fix it. 3312 * 3313 * The exact same problem, with the same unknown reason, happens to 3314 * the partitionSizeHi member as well. 3315 */ 3316 static void 3317 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3318 { 3319 3320 if (numsecs < ((uint64_t)1 << 32)) { 3321 if (clabel->numBlocksHi) { 3322 printf("WARNING: total sectors < 32 bits, yet " 3323 "numBlocksHi set\n" 3324 "WARNING: resetting numBlocksHi to zero.\n"); 3325 clabel->numBlocksHi = 0; 3326 } 3327 3328 if (clabel->partitionSizeHi) { 3329 printf("WARNING: total sectors < 32 bits, yet " 3330 "partitionSizeHi set\n" 3331 "WARNING: resetting partitionSizeHi to zero.\n"); 3332 clabel->partitionSizeHi = 0; 3333 } 3334 } 3335 } 3336 3337 3338 #ifdef DEBUG 3339 void 3340 rf_print_component_label(RF_ComponentLabel_t *clabel) 3341 { 3342 uint64_t numBlocks; 3343 static const char *rp[] = { 3344 "No", "Force", "Soft", "*invalid*" 3345 }; 3346 3347 3348 numBlocks = rf_component_label_numblocks(clabel); 3349 3350 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3351 clabel->row, clabel->column, 3352 clabel->num_rows, clabel->num_columns); 3353 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3354 clabel->version, clabel->serial_number, 3355 clabel->mod_counter); 3356 printf(" Clean: %s Status: %d\n", 3357 clabel->clean ? "Yes" : "No", clabel->status); 3358 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3359 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3360 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3361 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3362 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3363 printf(" Root partition: %s\n", rp[clabel->root_partition & 3]); 3364 printf(" Last configured as: raid%d\n", clabel->last_unit); 3365 #if 0 3366 printf(" Config order: %d\n", clabel->config_order); 3367 #endif 3368 3369 } 3370 #endif 3371 3372 RF_ConfigSet_t * 3373 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3374 { 3375 RF_AutoConfig_t *ac; 3376 RF_ConfigSet_t *config_sets; 3377 RF_ConfigSet_t *cset; 3378 RF_AutoConfig_t *ac_next; 3379 3380 3381 config_sets = NULL; 3382 3383 /* Go through the AutoConfig list, and figure out which components 3384 belong to what sets. */ 3385 ac = ac_list; 3386 while(ac!=NULL) { 3387 /* we're going to putz with ac->next, so save it here 3388 for use at the end of the loop */ 3389 ac_next = ac->next; 3390 3391 if (config_sets == NULL) { 3392 /* will need at least this one... */ 3393 config_sets = (RF_ConfigSet_t *) 3394 malloc(sizeof(RF_ConfigSet_t), 3395 M_RAIDFRAME, M_NOWAIT); 3396 if (config_sets == NULL) { 3397 panic("rf_create_auto_sets: No memory!"); 3398 } 3399 /* this one is easy :) */ 3400 config_sets->ac = ac; 3401 config_sets->next = NULL; 3402 config_sets->rootable = 0; 3403 ac->next = NULL; 3404 } else { 3405 /* which set does this component fit into? */ 3406 cset = config_sets; 3407 while(cset!=NULL) { 3408 if (rf_does_it_fit(cset, ac)) { 3409 /* looks like it matches... */ 3410 ac->next = cset->ac; 3411 cset->ac = ac; 3412 break; 3413 } 3414 cset = cset->next; 3415 } 3416 if (cset==NULL) { 3417 /* didn't find a match above... new set..*/ 3418 cset = (RF_ConfigSet_t *) 3419 malloc(sizeof(RF_ConfigSet_t), 3420 M_RAIDFRAME, M_NOWAIT); 3421 if (cset == NULL) { 3422 panic("rf_create_auto_sets: No memory!"); 3423 } 3424 cset->ac = ac; 3425 ac->next = NULL; 3426 cset->next = config_sets; 3427 cset->rootable = 0; 3428 config_sets = cset; 3429 } 3430 } 3431 ac = ac_next; 3432 } 3433 3434 3435 return(config_sets); 3436 } 3437 3438 static int 3439 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3440 { 3441 RF_ComponentLabel_t *clabel1, *clabel2; 3442 3443 /* If this one matches the *first* one in the set, that's good 3444 enough, since the other members of the set would have been 3445 through here too... */ 3446 /* note that we are not checking partitionSize here.. 3447 3448 Note that we are also not checking the mod_counters here. 3449 If everything else matches except the mod_counter, that's 3450 good enough for this test. We will deal with the mod_counters 3451 a little later in the autoconfiguration process. 3452 3453 (clabel1->mod_counter == clabel2->mod_counter) && 3454 3455 The reason we don't check for this is that failed disks 3456 will have lower modification counts. If those disks are 3457 not added to the set they used to belong to, then they will 3458 form their own set, which may result in 2 different sets, 3459 for example, competing to be configured at raid0, and 3460 perhaps competing to be the root filesystem set. If the 3461 wrong ones get configured, or both attempt to become /, 3462 weird behaviour and or serious lossage will occur. Thus we 3463 need to bring them into the fold here, and kick them out at 3464 a later point. 3465 3466 */ 3467 3468 clabel1 = cset->ac->clabel; 3469 clabel2 = ac->clabel; 3470 if ((clabel1->version == clabel2->version) && 3471 (clabel1->serial_number == clabel2->serial_number) && 3472 (clabel1->num_rows == clabel2->num_rows) && 3473 (clabel1->num_columns == clabel2->num_columns) && 3474 (clabel1->sectPerSU == clabel2->sectPerSU) && 3475 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3476 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3477 (clabel1->parityConfig == clabel2->parityConfig) && 3478 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3479 (clabel1->blockSize == clabel2->blockSize) && 3480 rf_component_label_numblocks(clabel1) == 3481 rf_component_label_numblocks(clabel2) && 3482 (clabel1->autoconfigure == clabel2->autoconfigure) && 3483 (clabel1->root_partition == clabel2->root_partition) && 3484 (clabel1->last_unit == clabel2->last_unit) && 3485 (clabel1->config_order == clabel2->config_order)) { 3486 /* if it get's here, it almost *has* to be a match */ 3487 } else { 3488 /* it's not consistent with somebody in the set.. 3489 punt */ 3490 return(0); 3491 } 3492 /* all was fine.. it must fit... */ 3493 return(1); 3494 } 3495 3496 int 3497 rf_have_enough_components(RF_ConfigSet_t *cset) 3498 { 3499 RF_AutoConfig_t *ac; 3500 RF_AutoConfig_t *auto_config; 3501 RF_ComponentLabel_t *clabel; 3502 int c; 3503 int num_cols; 3504 int num_missing; 3505 int mod_counter; 3506 int mod_counter_found; 3507 int even_pair_failed; 3508 char parity_type; 3509 3510 3511 /* check to see that we have enough 'live' components 3512 of this set. If so, we can configure it if necessary */ 3513 3514 num_cols = cset->ac->clabel->num_columns; 3515 parity_type = cset->ac->clabel->parityConfig; 3516 3517 /* XXX Check for duplicate components!?!?!? */ 3518 3519 /* Determine what the mod_counter is supposed to be for this set. */ 3520 3521 mod_counter_found = 0; 3522 mod_counter = 0; 3523 ac = cset->ac; 3524 while(ac!=NULL) { 3525 if (mod_counter_found==0) { 3526 mod_counter = ac->clabel->mod_counter; 3527 mod_counter_found = 1; 3528 } else { 3529 if (ac->clabel->mod_counter > mod_counter) { 3530 mod_counter = ac->clabel->mod_counter; 3531 } 3532 } 3533 ac = ac->next; 3534 } 3535 3536 num_missing = 0; 3537 auto_config = cset->ac; 3538 3539 even_pair_failed = 0; 3540 for(c=0; c<num_cols; c++) { 3541 ac = auto_config; 3542 while(ac!=NULL) { 3543 if ((ac->clabel->column == c) && 3544 (ac->clabel->mod_counter == mod_counter)) { 3545 /* it's this one... */ 3546 #ifdef DEBUG 3547 printf("Found: %s at %d\n", 3548 ac->devname,c); 3549 #endif 3550 break; 3551 } 3552 ac=ac->next; 3553 } 3554 if (ac==NULL) { 3555 /* Didn't find one here! */ 3556 /* special case for RAID 1, especially 3557 where there are more than 2 3558 components (where RAIDframe treats 3559 things a little differently :( ) */ 3560 if (parity_type == '1') { 3561 if (c%2 == 0) { /* even component */ 3562 even_pair_failed = 1; 3563 } else { /* odd component. If 3564 we're failed, and 3565 so is the even 3566 component, it's 3567 "Good Night, Charlie" */ 3568 if (even_pair_failed == 1) { 3569 return(0); 3570 } 3571 } 3572 } else { 3573 /* normal accounting */ 3574 num_missing++; 3575 } 3576 } 3577 if ((parity_type == '1') && (c%2 == 1)) { 3578 /* Just did an even component, and we didn't 3579 bail.. reset the even_pair_failed flag, 3580 and go on to the next component.... */ 3581 even_pair_failed = 0; 3582 } 3583 } 3584 3585 clabel = cset->ac->clabel; 3586 3587 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3588 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3589 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3590 /* XXX this needs to be made *much* more general */ 3591 /* Too many failures */ 3592 return(0); 3593 } 3594 /* otherwise, all is well, and we've got enough to take a kick 3595 at autoconfiguring this set */ 3596 return(1); 3597 } 3598 3599 void 3600 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3601 RF_Raid_t *raidPtr) 3602 { 3603 RF_ComponentLabel_t *clabel; 3604 int i; 3605 3606 clabel = ac->clabel; 3607 3608 /* 1. Fill in the common stuff */ 3609 config->numRow = clabel->num_rows = 1; 3610 config->numCol = clabel->num_columns; 3611 config->numSpare = 0; /* XXX should this be set here? */ 3612 config->sectPerSU = clabel->sectPerSU; 3613 config->SUsPerPU = clabel->SUsPerPU; 3614 config->SUsPerRU = clabel->SUsPerRU; 3615 config->parityConfig = clabel->parityConfig; 3616 /* XXX... */ 3617 strcpy(config->diskQueueType,"fifo"); 3618 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3619 config->layoutSpecificSize = 0; /* XXX ?? */ 3620 3621 while(ac!=NULL) { 3622 /* row/col values will be in range due to the checks 3623 in reasonable_label() */ 3624 strcpy(config->devnames[0][ac->clabel->column], 3625 ac->devname); 3626 ac = ac->next; 3627 } 3628 3629 for(i=0;i<RF_MAXDBGV;i++) { 3630 config->debugVars[i][0] = 0; 3631 } 3632 } 3633 3634 int 3635 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3636 { 3637 RF_ComponentLabel_t *clabel; 3638 int column; 3639 int sparecol; 3640 3641 raidPtr->autoconfigure = new_value; 3642 3643 for(column=0; column<raidPtr->numCol; column++) { 3644 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3645 clabel = raidget_component_label(raidPtr, column); 3646 clabel->autoconfigure = new_value; 3647 raidflush_component_label(raidPtr, column); 3648 } 3649 } 3650 for(column = 0; column < raidPtr->numSpare ; column++) { 3651 sparecol = raidPtr->numCol + column; 3652 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3653 clabel = raidget_component_label(raidPtr, sparecol); 3654 clabel->autoconfigure = new_value; 3655 raidflush_component_label(raidPtr, sparecol); 3656 } 3657 } 3658 return(new_value); 3659 } 3660 3661 int 3662 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3663 { 3664 RF_ComponentLabel_t *clabel; 3665 int column; 3666 int sparecol; 3667 3668 raidPtr->root_partition = new_value; 3669 for(column=0; column<raidPtr->numCol; column++) { 3670 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3671 clabel = raidget_component_label(raidPtr, column); 3672 clabel->root_partition = new_value; 3673 raidflush_component_label(raidPtr, column); 3674 } 3675 } 3676 for(column = 0; column < raidPtr->numSpare ; column++) { 3677 sparecol = raidPtr->numCol + column; 3678 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3679 clabel = raidget_component_label(raidPtr, sparecol); 3680 clabel->root_partition = new_value; 3681 raidflush_component_label(raidPtr, sparecol); 3682 } 3683 } 3684 return(new_value); 3685 } 3686 3687 void 3688 rf_release_all_vps(RF_ConfigSet_t *cset) 3689 { 3690 RF_AutoConfig_t *ac; 3691 3692 ac = cset->ac; 3693 while(ac!=NULL) { 3694 /* Close the vp, and give it back */ 3695 if (ac->vp) { 3696 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3697 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3698 vput(ac->vp); 3699 ac->vp = NULL; 3700 } 3701 ac = ac->next; 3702 } 3703 } 3704 3705 3706 void 3707 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3708 { 3709 RF_AutoConfig_t *ac; 3710 RF_AutoConfig_t *next_ac; 3711 3712 ac = cset->ac; 3713 while(ac!=NULL) { 3714 next_ac = ac->next; 3715 /* nuke the label */ 3716 free(ac->clabel, M_RAIDFRAME); 3717 /* cleanup the config structure */ 3718 free(ac, M_RAIDFRAME); 3719 /* "next.." */ 3720 ac = next_ac; 3721 } 3722 /* and, finally, nuke the config set */ 3723 free(cset, M_RAIDFRAME); 3724 } 3725 3726 3727 void 3728 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3729 { 3730 /* current version number */ 3731 clabel->version = RF_COMPONENT_LABEL_VERSION; 3732 clabel->serial_number = raidPtr->serial_number; 3733 clabel->mod_counter = raidPtr->mod_counter; 3734 3735 clabel->num_rows = 1; 3736 clabel->num_columns = raidPtr->numCol; 3737 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3738 clabel->status = rf_ds_optimal; /* "It's good!" */ 3739 3740 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3741 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3742 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3743 3744 clabel->blockSize = raidPtr->bytesPerSector; 3745 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3746 3747 /* XXX not portable */ 3748 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3749 clabel->maxOutstanding = raidPtr->maxOutstanding; 3750 clabel->autoconfigure = raidPtr->autoconfigure; 3751 clabel->root_partition = raidPtr->root_partition; 3752 clabel->last_unit = raidPtr->raidid; 3753 clabel->config_order = raidPtr->config_order; 3754 3755 #ifndef RF_NO_PARITY_MAP 3756 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3757 #endif 3758 } 3759 3760 struct raid_softc * 3761 rf_auto_config_set(RF_ConfigSet_t *cset) 3762 { 3763 RF_Raid_t *raidPtr; 3764 RF_Config_t *config; 3765 int raidID; 3766 struct raid_softc *sc; 3767 3768 #ifdef DEBUG 3769 printf("RAID autoconfigure\n"); 3770 #endif 3771 3772 /* 1. Create a config structure */ 3773 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO); 3774 if (config == NULL) { 3775 printf("Out of mem!?!?\n"); 3776 /* XXX do something more intelligent here. */ 3777 return NULL; 3778 } 3779 3780 /* 3781 2. Figure out what RAID ID this one is supposed to live at 3782 See if we can get the same RAID dev that it was configured 3783 on last time.. 3784 */ 3785 3786 raidID = cset->ac->clabel->last_unit; 3787 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID)) 3788 continue; 3789 #ifdef DEBUG 3790 printf("Configuring raid%d:\n",raidID); 3791 #endif 3792 3793 raidPtr = &sc->sc_r; 3794 3795 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3796 raidPtr->softc = sc; 3797 raidPtr->raidid = raidID; 3798 raidPtr->openings = RAIDOUTSTANDING; 3799 3800 /* 3. Build the configuration structure */ 3801 rf_create_configuration(cset->ac, config, raidPtr); 3802 3803 /* 4. Do the configuration */ 3804 if (rf_Configure(raidPtr, config, cset->ac) == 0) { 3805 raidinit(sc); 3806 3807 rf_markalldirty(raidPtr); 3808 raidPtr->autoconfigure = 1; /* XXX do this here? */ 3809 switch (cset->ac->clabel->root_partition) { 3810 case 1: /* Force Root */ 3811 case 2: /* Soft Root: root when boot partition part of raid */ 3812 /* 3813 * everything configured just fine. Make a note 3814 * that this set is eligible to be root, 3815 * or forced to be root 3816 */ 3817 cset->rootable = cset->ac->clabel->root_partition; 3818 /* XXX do this here? */ 3819 raidPtr->root_partition = cset->rootable; 3820 break; 3821 default: 3822 break; 3823 } 3824 } else { 3825 raidput(sc); 3826 sc = NULL; 3827 } 3828 3829 /* 5. Cleanup */ 3830 free(config, M_RAIDFRAME); 3831 return sc; 3832 } 3833 3834 void 3835 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3836 { 3837 struct buf *bp; 3838 struct raid_softc *rs; 3839 3840 bp = (struct buf *)desc->bp; 3841 rs = desc->raidPtr->softc; 3842 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid), 3843 (bp->b_flags & B_READ)); 3844 } 3845 3846 void 3847 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3848 size_t xmin, size_t xmax) 3849 { 3850 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3851 pool_sethiwat(p, xmax); 3852 pool_prime(p, xmin); 3853 pool_setlowat(p, xmin); 3854 } 3855 3856 /* 3857 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see 3858 * if there is IO pending and if that IO could possibly be done for a 3859 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3860 * otherwise. 3861 * 3862 */ 3863 3864 int 3865 rf_buf_queue_check(RF_Raid_t *raidPtr) 3866 { 3867 struct raid_softc *rs = raidPtr->softc; 3868 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) { 3869 /* there is work to do */ 3870 return 0; 3871 } 3872 /* default is nothing to do */ 3873 return 1; 3874 } 3875 3876 int 3877 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3878 { 3879 uint64_t numsecs; 3880 unsigned secsize; 3881 int error; 3882 3883 error = getdisksize(vp, &numsecs, &secsize); 3884 if (error == 0) { 3885 diskPtr->blockSize = secsize; 3886 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3887 diskPtr->partitionSize = numsecs; 3888 return 0; 3889 } 3890 return error; 3891 } 3892 3893 static int 3894 raid_match(device_t self, cfdata_t cfdata, void *aux) 3895 { 3896 return 1; 3897 } 3898 3899 static void 3900 raid_attach(device_t parent, device_t self, void *aux) 3901 { 3902 3903 } 3904 3905 3906 static int 3907 raid_detach(device_t self, int flags) 3908 { 3909 int error; 3910 struct raid_softc *rs = raidget(device_unit(self)); 3911 3912 if (rs == NULL) 3913 return ENXIO; 3914 3915 if ((error = raidlock(rs)) != 0) 3916 return (error); 3917 3918 error = raid_detach_unlocked(rs); 3919 3920 raidunlock(rs); 3921 3922 /* XXXkd: raidput(rs) ??? */ 3923 3924 return error; 3925 } 3926 3927 static void 3928 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) 3929 { 3930 struct disk_geom *dg = &rs->sc_dkdev.dk_geom; 3931 3932 memset(dg, 0, sizeof(*dg)); 3933 3934 dg->dg_secperunit = raidPtr->totalSectors; 3935 dg->dg_secsize = raidPtr->bytesPerSector; 3936 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; 3937 dg->dg_ntracks = 4 * raidPtr->numCol; 3938 3939 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL); 3940 } 3941 3942 /* 3943 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3944 * We end up returning whatever error was returned by the first cache flush 3945 * that fails. 3946 */ 3947 3948 int 3949 rf_sync_component_caches(RF_Raid_t *raidPtr) 3950 { 3951 int c, sparecol; 3952 int e,error; 3953 int force = 1; 3954 3955 error = 0; 3956 for (c = 0; c < raidPtr->numCol; c++) { 3957 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3958 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3959 &force, FWRITE, NOCRED); 3960 if (e) { 3961 if (e != ENODEV) 3962 printf("raid%d: cache flush to component %s failed.\n", 3963 raidPtr->raidid, raidPtr->Disks[c].devname); 3964 if (error == 0) { 3965 error = e; 3966 } 3967 } 3968 } 3969 } 3970 3971 for( c = 0; c < raidPtr->numSpare ; c++) { 3972 sparecol = raidPtr->numCol + c; 3973 /* Need to ensure that the reconstruct actually completed! */ 3974 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3975 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3976 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3977 if (e) { 3978 if (e != ENODEV) 3979 printf("raid%d: cache flush to component %s failed.\n", 3980 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3981 if (error == 0) { 3982 error = e; 3983 } 3984 } 3985 } 3986 } 3987 return error; 3988 } 3989