1 /* $NetBSD: rf_netbsdkintf.c,v 1.304 2013/05/29 00:47:49 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Greg Oster; Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * Copyright (c) 1995 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Mark Holland, Jim Zelenka 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /*********************************************************** 98 * 99 * rf_kintf.c -- the kernel interface routines for RAIDframe 100 * 101 ***********************************************************/ 102 103 #include <sys/cdefs.h> 104 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.304 2013/05/29 00:47:49 christos Exp $"); 105 106 #ifdef _KERNEL_OPT 107 #include "opt_compat_netbsd.h" 108 #include "opt_raid_autoconfig.h" 109 #endif 110 111 #include <sys/param.h> 112 #include <sys/errno.h> 113 #include <sys/pool.h> 114 #include <sys/proc.h> 115 #include <sys/queue.h> 116 #include <sys/disk.h> 117 #include <sys/device.h> 118 #include <sys/stat.h> 119 #include <sys/ioctl.h> 120 #include <sys/fcntl.h> 121 #include <sys/systm.h> 122 #include <sys/vnode.h> 123 #include <sys/disklabel.h> 124 #include <sys/conf.h> 125 #include <sys/buf.h> 126 #include <sys/bufq.h> 127 #include <sys/reboot.h> 128 #include <sys/kauth.h> 129 130 #include <prop/proplib.h> 131 132 #include <dev/raidframe/raidframevar.h> 133 #include <dev/raidframe/raidframeio.h> 134 #include <dev/raidframe/rf_paritymap.h> 135 136 #include "rf_raid.h" 137 #include "rf_copyback.h" 138 #include "rf_dag.h" 139 #include "rf_dagflags.h" 140 #include "rf_desc.h" 141 #include "rf_diskqueue.h" 142 #include "rf_etimer.h" 143 #include "rf_general.h" 144 #include "rf_kintf.h" 145 #include "rf_options.h" 146 #include "rf_driver.h" 147 #include "rf_parityscan.h" 148 #include "rf_threadstuff.h" 149 150 #ifdef COMPAT_50 151 #include "rf_compat50.h" 152 #endif 153 154 #ifdef DEBUG 155 int rf_kdebug_level = 0; 156 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 157 #else /* DEBUG */ 158 #define db1_printf(a) { } 159 #endif /* DEBUG */ 160 161 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 162 static rf_declare_mutex2(rf_sparet_wait_mutex); 163 static rf_declare_cond2(rf_sparet_wait_cv); 164 static rf_declare_cond2(rf_sparet_resp_cv); 165 166 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 167 * spare table */ 168 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 169 * installation process */ 170 #endif 171 172 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 173 174 /* prototypes */ 175 static void KernelWakeupFunc(struct buf *); 176 static void InitBP(struct buf *, struct vnode *, unsigned, 177 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 178 void *, int, struct proc *); 179 struct raid_softc; 180 static void raidinit(struct raid_softc *); 181 182 void raidattach(int); 183 static int raid_match(device_t, cfdata_t, void *); 184 static void raid_attach(device_t, device_t, void *); 185 static int raid_detach(device_t, int); 186 187 static int raidread_component_area(dev_t, struct vnode *, void *, size_t, 188 daddr_t, daddr_t); 189 static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t, 190 daddr_t, daddr_t, int); 191 192 static int raidwrite_component_label(unsigned, 193 dev_t, struct vnode *, RF_ComponentLabel_t *); 194 static int raidread_component_label(unsigned, 195 dev_t, struct vnode *, RF_ComponentLabel_t *); 196 197 198 dev_type_open(raidopen); 199 dev_type_close(raidclose); 200 dev_type_read(raidread); 201 dev_type_write(raidwrite); 202 dev_type_ioctl(raidioctl); 203 dev_type_strategy(raidstrategy); 204 dev_type_dump(raiddump); 205 dev_type_size(raidsize); 206 207 const struct bdevsw raid_bdevsw = { 208 raidopen, raidclose, raidstrategy, raidioctl, 209 raiddump, raidsize, D_DISK 210 }; 211 212 const struct cdevsw raid_cdevsw = { 213 raidopen, raidclose, raidread, raidwrite, raidioctl, 214 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 215 }; 216 217 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 218 219 struct raid_softc { 220 device_t sc_dev; 221 int sc_unit; 222 int sc_flags; /* flags */ 223 int sc_cflags; /* configuration flags */ 224 uint64_t sc_size; /* size of the raid device */ 225 char sc_xname[20]; /* XXX external name */ 226 struct disk sc_dkdev; /* generic disk device info */ 227 struct bufq_state *buf_queue; /* used for the device queue */ 228 RF_Raid_t sc_r; 229 LIST_ENTRY(raid_softc) sc_link; 230 }; 231 /* sc_flags */ 232 #define RAIDF_INITED 0x01 /* unit has been initialized */ 233 #define RAIDF_WLABEL 0x02 /* label area is writable */ 234 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 235 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */ 236 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 237 #define RAIDF_LOCKED 0x80 /* unit is locked */ 238 239 #define raidunit(x) DISKUNIT(x) 240 241 extern struct cfdriver raid_cd; 242 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 243 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 244 DVF_DETACH_SHUTDOWN); 245 246 /* 247 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 248 * Be aware that large numbers can allow the driver to consume a lot of 249 * kernel memory, especially on writes, and in degraded mode reads. 250 * 251 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 252 * a single 64K write will typically require 64K for the old data, 253 * 64K for the old parity, and 64K for the new parity, for a total 254 * of 192K (if the parity buffer is not re-used immediately). 255 * Even it if is used immediately, that's still 128K, which when multiplied 256 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 257 * 258 * Now in degraded mode, for example, a 64K read on the above setup may 259 * require data reconstruction, which will require *all* of the 4 remaining 260 * disks to participate -- 4 * 32K/disk == 128K again. 261 */ 262 263 #ifndef RAIDOUTSTANDING 264 #define RAIDOUTSTANDING 6 265 #endif 266 267 #define RAIDLABELDEV(dev) \ 268 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 269 270 /* declared here, and made public, for the benefit of KVM stuff.. */ 271 272 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 273 struct disklabel *); 274 static void raidgetdisklabel(dev_t); 275 static void raidmakedisklabel(struct raid_softc *); 276 277 static int raidlock(struct raid_softc *); 278 static void raidunlock(struct raid_softc *); 279 280 static int raid_detach_unlocked(struct raid_softc *); 281 282 static void rf_markalldirty(RF_Raid_t *); 283 static void rf_set_geometry(struct raid_softc *, RF_Raid_t *); 284 285 void rf_ReconThread(struct rf_recon_req *); 286 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 287 void rf_CopybackThread(RF_Raid_t *raidPtr); 288 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 289 int rf_autoconfig(device_t); 290 void rf_buildroothack(RF_ConfigSet_t *); 291 292 RF_AutoConfig_t *rf_find_raid_components(void); 293 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 294 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 295 int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t); 296 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 297 int rf_set_autoconfig(RF_Raid_t *, int); 298 int rf_set_rootpartition(RF_Raid_t *, int); 299 void rf_release_all_vps(RF_ConfigSet_t *); 300 void rf_cleanup_config_set(RF_ConfigSet_t *); 301 int rf_have_enough_components(RF_ConfigSet_t *); 302 struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *); 303 static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t); 304 305 /* 306 * Debugging, mostly. Set to 0 to not allow autoconfig to take place. 307 * Note that this is overridden by having RAID_AUTOCONFIG as an option 308 * in the kernel config file. 309 */ 310 #ifdef RAID_AUTOCONFIG 311 int raidautoconfig = 1; 312 #else 313 int raidautoconfig = 0; 314 #endif 315 static bool raidautoconfigdone = false; 316 317 struct RF_Pools_s rf_pools; 318 319 static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids); 320 static kmutex_t raid_lock; 321 322 static struct raid_softc * 323 raidcreate(int unit) { 324 struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 325 if (sc == NULL) { 326 #ifdef DIAGNOSTIC 327 printf("%s: out of memory\n", __func__); 328 #endif 329 return NULL; 330 } 331 sc->sc_unit = unit; 332 bufq_alloc(&sc->buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK); 333 return sc; 334 } 335 336 static void 337 raiddestroy(struct raid_softc *sc) { 338 bufq_free(sc->buf_queue); 339 kmem_free(sc, sizeof(*sc)); 340 } 341 342 static struct raid_softc * 343 raidget(int unit) { 344 struct raid_softc *sc; 345 if (unit < 0) { 346 #ifdef DIAGNOSTIC 347 panic("%s: unit %d!", __func__, unit); 348 #endif 349 return NULL; 350 } 351 mutex_enter(&raid_lock); 352 LIST_FOREACH(sc, &raids, sc_link) { 353 if (sc->sc_unit == unit) { 354 mutex_exit(&raid_lock); 355 return sc; 356 } 357 } 358 mutex_exit(&raid_lock); 359 if ((sc = raidcreate(unit)) == NULL) 360 return NULL; 361 mutex_enter(&raid_lock); 362 LIST_INSERT_HEAD(&raids, sc, sc_link); 363 mutex_exit(&raid_lock); 364 return sc; 365 } 366 367 static void 368 raidput(struct raid_softc *sc) { 369 mutex_enter(&raid_lock); 370 LIST_REMOVE(sc, sc_link); 371 mutex_exit(&raid_lock); 372 raiddestroy(sc); 373 } 374 375 void 376 raidattach(int num) 377 { 378 mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE); 379 /* This is where all the initialization stuff gets done. */ 380 381 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 382 rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM); 383 rf_init_cond2(rf_sparet_wait_cv, "sparetw"); 384 rf_init_cond2(rf_sparet_resp_cv, "rfgst"); 385 386 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 387 #endif 388 389 if (rf_BootRaidframe() == 0) 390 aprint_verbose("Kernelized RAIDframe activated\n"); 391 else 392 panic("Serious error booting RAID!!"); 393 394 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 395 aprint_error("raidattach: config_cfattach_attach failed?\n"); 396 } 397 398 raidautoconfigdone = false; 399 400 /* 401 * Register a finalizer which will be used to auto-config RAID 402 * sets once all real hardware devices have been found. 403 */ 404 if (config_finalize_register(NULL, rf_autoconfig) != 0) 405 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 406 } 407 408 int 409 rf_autoconfig(device_t self) 410 { 411 RF_AutoConfig_t *ac_list; 412 RF_ConfigSet_t *config_sets; 413 414 if (!raidautoconfig || raidautoconfigdone == true) 415 return (0); 416 417 /* XXX This code can only be run once. */ 418 raidautoconfigdone = true; 419 420 /* 1. locate all RAID components on the system */ 421 aprint_debug("Searching for RAID components...\n"); 422 ac_list = rf_find_raid_components(); 423 424 /* 2. Sort them into their respective sets. */ 425 config_sets = rf_create_auto_sets(ac_list); 426 427 /* 428 * 3. Evaluate each set and configure the valid ones. 429 * This gets done in rf_buildroothack(). 430 */ 431 rf_buildroothack(config_sets); 432 433 return 1; 434 } 435 436 void 437 rf_buildroothack(RF_ConfigSet_t *config_sets) 438 { 439 RF_ConfigSet_t *cset; 440 RF_ConfigSet_t *next_cset; 441 int col; 442 int num_root; 443 char *devname; 444 struct raid_softc *sc, *rsc; 445 446 sc = rsc = NULL; 447 num_root = 0; 448 cset = config_sets; 449 while (cset != NULL) { 450 next_cset = cset->next; 451 if (rf_have_enough_components(cset) && 452 cset->ac->clabel->autoconfigure == 1) { 453 sc = rf_auto_config_set(cset); 454 if (sc != NULL) { 455 aprint_debug("raid%d: configured ok\n", 456 sc->sc_unit); 457 if (cset->rootable) { 458 rsc = sc; 459 num_root++; 460 } 461 } else { 462 /* The autoconfig didn't work :( */ 463 aprint_debug("Autoconfig failed\n"); 464 rf_release_all_vps(cset); 465 } 466 } else { 467 /* we're not autoconfiguring this set... 468 release the associated resources */ 469 rf_release_all_vps(cset); 470 } 471 /* cleanup */ 472 rf_cleanup_config_set(cset); 473 cset = next_cset; 474 } 475 476 /* if the user has specified what the root device should be 477 then we don't touch booted_device or boothowto... */ 478 479 if (rootspec != NULL) 480 return; 481 482 /* we found something bootable... */ 483 484 if (num_root == 1) { 485 if (rsc->sc_dkdev.dk_nwedges != 0) { 486 /* XXX: How do we find the real root partition? */ 487 char cname[sizeof(cset->ac->devname)]; 488 snprintf(cname, sizeof(cname), "%s%c", 489 device_xname(rsc->sc_dev), 'a'); 490 booted_device = dkwedge_find_by_wname(cname); 491 } else 492 booted_device = rsc->sc_dev; 493 } else if (num_root > 1) { 494 495 /* 496 * Maybe the MD code can help. If it cannot, then 497 * setroot() will discover that we have no 498 * booted_device and will ask the user if nothing was 499 * hardwired in the kernel config file 500 */ 501 502 if (booted_device == NULL) 503 cpu_rootconf(); 504 if (booted_device == NULL) 505 return; 506 507 num_root = 0; 508 mutex_enter(&raid_lock); 509 LIST_FOREACH(sc, &raids, sc_link) { 510 RF_Raid_t *r = &sc->sc_r; 511 if (r->valid == 0) 512 continue; 513 514 if (r->root_partition == 0) 515 continue; 516 517 for (col = 0; col < r->numCol; col++) { 518 devname = r->Disks[col].devname; 519 devname += sizeof("/dev/") - 1; 520 if (strncmp(devname, device_xname(booted_device), 521 strlen(device_xname(booted_device))) != 0) 522 continue; 523 aprint_debug("raid%d includes boot device %s\n", 524 sc->sc_unit, devname); 525 num_root++; 526 rsc = sc; 527 } 528 } 529 mutex_exit(&raid_lock); 530 531 if (num_root == 1) { 532 booted_device = rsc->sc_dev; 533 } else { 534 /* we can't guess.. require the user to answer... */ 535 boothowto |= RB_ASKNAME; 536 } 537 } 538 } 539 540 541 int 542 raidsize(dev_t dev) 543 { 544 struct raid_softc *rs; 545 struct disklabel *lp; 546 int part, unit, omask, size; 547 548 unit = raidunit(dev); 549 if ((rs = raidget(unit)) == NULL) 550 return -1; 551 if ((rs->sc_flags & RAIDF_INITED) == 0) 552 return (-1); 553 554 part = DISKPART(dev); 555 omask = rs->sc_dkdev.dk_openmask & (1 << part); 556 lp = rs->sc_dkdev.dk_label; 557 558 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 559 return (-1); 560 561 if (lp->d_partitions[part].p_fstype != FS_SWAP) 562 size = -1; 563 else 564 size = lp->d_partitions[part].p_size * 565 (lp->d_secsize / DEV_BSIZE); 566 567 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 568 return (-1); 569 570 return (size); 571 572 } 573 574 int 575 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 576 { 577 int unit = raidunit(dev); 578 struct raid_softc *rs; 579 const struct bdevsw *bdev; 580 struct disklabel *lp; 581 RF_Raid_t *raidPtr; 582 daddr_t offset; 583 int part, c, sparecol, j, scol, dumpto; 584 int error = 0; 585 586 if ((rs = raidget(unit)) == NULL) 587 return ENXIO; 588 589 raidPtr = &rs->sc_r; 590 591 if ((rs->sc_flags & RAIDF_INITED) == 0) 592 return ENXIO; 593 594 /* we only support dumping to RAID 1 sets */ 595 if (raidPtr->Layout.numDataCol != 1 || 596 raidPtr->Layout.numParityCol != 1) 597 return EINVAL; 598 599 600 if ((error = raidlock(rs)) != 0) 601 return error; 602 603 if (size % DEV_BSIZE != 0) { 604 error = EINVAL; 605 goto out; 606 } 607 608 if (blkno + size / DEV_BSIZE > rs->sc_size) { 609 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 610 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 611 size / DEV_BSIZE, rs->sc_size); 612 error = EINVAL; 613 goto out; 614 } 615 616 part = DISKPART(dev); 617 lp = rs->sc_dkdev.dk_label; 618 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 619 620 /* figure out what device is alive.. */ 621 622 /* 623 Look for a component to dump to. The preference for the 624 component to dump to is as follows: 625 1) the master 626 2) a used_spare of the master 627 3) the slave 628 4) a used_spare of the slave 629 */ 630 631 dumpto = -1; 632 for (c = 0; c < raidPtr->numCol; c++) { 633 if (raidPtr->Disks[c].status == rf_ds_optimal) { 634 /* this might be the one */ 635 dumpto = c; 636 break; 637 } 638 } 639 640 /* 641 At this point we have possibly selected a live master or a 642 live slave. We now check to see if there is a spared 643 master (or a spared slave), if we didn't find a live master 644 or a live slave. 645 */ 646 647 for (c = 0; c < raidPtr->numSpare; c++) { 648 sparecol = raidPtr->numCol + c; 649 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 650 /* How about this one? */ 651 scol = -1; 652 for(j=0;j<raidPtr->numCol;j++) { 653 if (raidPtr->Disks[j].spareCol == sparecol) { 654 scol = j; 655 break; 656 } 657 } 658 if (scol == 0) { 659 /* 660 We must have found a spared master! 661 We'll take that over anything else 662 found so far. (We couldn't have 663 found a real master before, since 664 this is a used spare, and it's 665 saying that it's replacing the 666 master.) On reboot (with 667 autoconfiguration turned on) 668 sparecol will become the 1st 669 component (component0) of this set. 670 */ 671 dumpto = sparecol; 672 break; 673 } else if (scol != -1) { 674 /* 675 Must be a spared slave. We'll dump 676 to that if we havn't found anything 677 else so far. 678 */ 679 if (dumpto == -1) 680 dumpto = sparecol; 681 } 682 } 683 } 684 685 if (dumpto == -1) { 686 /* we couldn't find any live components to dump to!?!? 687 */ 688 error = EINVAL; 689 goto out; 690 } 691 692 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 693 694 /* 695 Note that blkno is relative to this particular partition. 696 By adding the offset of this partition in the RAID 697 set, and also adding RF_PROTECTED_SECTORS, we get a 698 value that is relative to the partition used for the 699 underlying component. 700 */ 701 702 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 703 blkno + offset, va, size); 704 705 out: 706 raidunlock(rs); 707 708 return error; 709 } 710 /* ARGSUSED */ 711 int 712 raidopen(dev_t dev, int flags, int fmt, 713 struct lwp *l) 714 { 715 int unit = raidunit(dev); 716 struct raid_softc *rs; 717 struct disklabel *lp; 718 int part, pmask; 719 int error = 0; 720 721 if ((rs = raidget(unit)) == NULL) 722 return ENXIO; 723 if ((error = raidlock(rs)) != 0) 724 return (error); 725 726 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 727 error = EBUSY; 728 goto bad; 729 } 730 731 lp = rs->sc_dkdev.dk_label; 732 733 part = DISKPART(dev); 734 735 /* 736 * If there are wedges, and this is not RAW_PART, then we 737 * need to fail. 738 */ 739 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 740 error = EBUSY; 741 goto bad; 742 } 743 pmask = (1 << part); 744 745 if ((rs->sc_flags & RAIDF_INITED) && 746 (rs->sc_dkdev.dk_openmask == 0)) 747 raidgetdisklabel(dev); 748 749 /* make sure that this partition exists */ 750 751 if (part != RAW_PART) { 752 if (((rs->sc_flags & RAIDF_INITED) == 0) || 753 ((part >= lp->d_npartitions) || 754 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 755 error = ENXIO; 756 goto bad; 757 } 758 } 759 /* Prevent this unit from being unconfigured while open. */ 760 switch (fmt) { 761 case S_IFCHR: 762 rs->sc_dkdev.dk_copenmask |= pmask; 763 break; 764 765 case S_IFBLK: 766 rs->sc_dkdev.dk_bopenmask |= pmask; 767 break; 768 } 769 770 if ((rs->sc_dkdev.dk_openmask == 0) && 771 ((rs->sc_flags & RAIDF_INITED) != 0)) { 772 /* First one... mark things as dirty... Note that we *MUST* 773 have done a configure before this. I DO NOT WANT TO BE 774 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 775 THAT THEY BELONG TOGETHER!!!!! */ 776 /* XXX should check to see if we're only open for reading 777 here... If so, we needn't do this, but then need some 778 other way of keeping track of what's happened.. */ 779 780 rf_markalldirty(&rs->sc_r); 781 } 782 783 784 rs->sc_dkdev.dk_openmask = 785 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 786 787 bad: 788 raidunlock(rs); 789 790 return (error); 791 792 793 } 794 /* ARGSUSED */ 795 int 796 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 797 { 798 int unit = raidunit(dev); 799 struct raid_softc *rs; 800 int error = 0; 801 int part; 802 803 if ((rs = raidget(unit)) == NULL) 804 return ENXIO; 805 806 if ((error = raidlock(rs)) != 0) 807 return (error); 808 809 part = DISKPART(dev); 810 811 /* ...that much closer to allowing unconfiguration... */ 812 switch (fmt) { 813 case S_IFCHR: 814 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 815 break; 816 817 case S_IFBLK: 818 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 819 break; 820 } 821 rs->sc_dkdev.dk_openmask = 822 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 823 824 if ((rs->sc_dkdev.dk_openmask == 0) && 825 ((rs->sc_flags & RAIDF_INITED) != 0)) { 826 /* Last one... device is not unconfigured yet. 827 Device shutdown has taken care of setting the 828 clean bits if RAIDF_INITED is not set 829 mark things as clean... */ 830 831 rf_update_component_labels(&rs->sc_r, 832 RF_FINAL_COMPONENT_UPDATE); 833 834 /* If the kernel is shutting down, it will detach 835 * this RAID set soon enough. 836 */ 837 } 838 839 raidunlock(rs); 840 return (0); 841 842 } 843 844 void 845 raidstrategy(struct buf *bp) 846 { 847 unsigned int unit = raidunit(bp->b_dev); 848 RF_Raid_t *raidPtr; 849 int wlabel; 850 struct raid_softc *rs; 851 852 if ((rs = raidget(unit)) == NULL) { 853 bp->b_error = ENXIO; 854 goto done; 855 } 856 if ((rs->sc_flags & RAIDF_INITED) == 0) { 857 bp->b_error = ENXIO; 858 goto done; 859 } 860 raidPtr = &rs->sc_r; 861 if (!raidPtr->valid) { 862 bp->b_error = ENODEV; 863 goto done; 864 } 865 if (bp->b_bcount == 0) { 866 db1_printf(("b_bcount is zero..\n")); 867 goto done; 868 } 869 870 /* 871 * Do bounds checking and adjust transfer. If there's an 872 * error, the bounds check will flag that for us. 873 */ 874 875 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 876 if (DISKPART(bp->b_dev) == RAW_PART) { 877 uint64_t size; /* device size in DEV_BSIZE unit */ 878 879 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 880 size = raidPtr->totalSectors << 881 (raidPtr->logBytesPerSector - DEV_BSHIFT); 882 } else { 883 size = raidPtr->totalSectors >> 884 (DEV_BSHIFT - raidPtr->logBytesPerSector); 885 } 886 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 887 goto done; 888 } 889 } else { 890 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 891 db1_printf(("Bounds check failed!!:%d %d\n", 892 (int) bp->b_blkno, (int) wlabel)); 893 goto done; 894 } 895 } 896 897 rf_lock_mutex2(raidPtr->iodone_lock); 898 899 bp->b_resid = 0; 900 901 /* stuff it onto our queue */ 902 bufq_put(rs->buf_queue, bp); 903 904 /* scheduled the IO to happen at the next convenient time */ 905 rf_signal_cond2(raidPtr->iodone_cv); 906 rf_unlock_mutex2(raidPtr->iodone_lock); 907 908 return; 909 910 done: 911 bp->b_resid = bp->b_bcount; 912 biodone(bp); 913 } 914 /* ARGSUSED */ 915 int 916 raidread(dev_t dev, struct uio *uio, int flags) 917 { 918 int unit = raidunit(dev); 919 struct raid_softc *rs; 920 921 if ((rs = raidget(unit)) == NULL) 922 return ENXIO; 923 924 if ((rs->sc_flags & RAIDF_INITED) == 0) 925 return (ENXIO); 926 927 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 928 929 } 930 /* ARGSUSED */ 931 int 932 raidwrite(dev_t dev, struct uio *uio, int flags) 933 { 934 int unit = raidunit(dev); 935 struct raid_softc *rs; 936 937 if ((rs = raidget(unit)) == NULL) 938 return ENXIO; 939 940 if ((rs->sc_flags & RAIDF_INITED) == 0) 941 return (ENXIO); 942 943 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 944 945 } 946 947 static int 948 raid_detach_unlocked(struct raid_softc *rs) 949 { 950 int error; 951 RF_Raid_t *raidPtr; 952 953 raidPtr = &rs->sc_r; 954 955 /* 956 * If somebody has a partition mounted, we shouldn't 957 * shutdown. 958 */ 959 if (rs->sc_dkdev.dk_openmask != 0) 960 return EBUSY; 961 962 if ((rs->sc_flags & RAIDF_INITED) == 0) 963 ; /* not initialized: nothing to do */ 964 else if ((error = rf_Shutdown(raidPtr)) != 0) 965 return error; 966 else 967 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN); 968 969 /* Detach the disk. */ 970 dkwedge_delall(&rs->sc_dkdev); 971 disk_detach(&rs->sc_dkdev); 972 disk_destroy(&rs->sc_dkdev); 973 974 aprint_normal_dev(rs->sc_dev, "detached\n"); 975 976 return 0; 977 } 978 979 int 980 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 981 { 982 int unit = raidunit(dev); 983 int error = 0; 984 int part, pmask, s; 985 cfdata_t cf; 986 struct raid_softc *rs; 987 RF_Config_t *k_cfg, *u_cfg; 988 RF_Raid_t *raidPtr; 989 RF_RaidDisk_t *diskPtr; 990 RF_AccTotals_t *totals; 991 RF_DeviceConfig_t *d_cfg, **ucfgp; 992 u_char *specific_buf; 993 int retcode = 0; 994 int column; 995 /* int raidid; */ 996 struct rf_recon_req *rrcopy, *rr; 997 RF_ComponentLabel_t *clabel; 998 RF_ComponentLabel_t *ci_label; 999 RF_ComponentLabel_t **clabel_ptr; 1000 RF_SingleComponent_t *sparePtr,*componentPtr; 1001 RF_SingleComponent_t component; 1002 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 1003 int i, j, d; 1004 #ifdef __HAVE_OLD_DISKLABEL 1005 struct disklabel newlabel; 1006 #endif 1007 struct dkwedge_info *dkw; 1008 1009 if ((rs = raidget(unit)) == NULL) 1010 return ENXIO; 1011 raidPtr = &rs->sc_r; 1012 1013 db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev, 1014 (int) DISKPART(dev), (int) unit, cmd)); 1015 1016 /* Must be open for writes for these commands... */ 1017 switch (cmd) { 1018 #ifdef DIOCGSECTORSIZE 1019 case DIOCGSECTORSIZE: 1020 *(u_int *)data = raidPtr->bytesPerSector; 1021 return 0; 1022 case DIOCGMEDIASIZE: 1023 *(off_t *)data = 1024 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1025 return 0; 1026 #endif 1027 case DIOCSDINFO: 1028 case DIOCWDINFO: 1029 #ifdef __HAVE_OLD_DISKLABEL 1030 case ODIOCWDINFO: 1031 case ODIOCSDINFO: 1032 #endif 1033 case DIOCWLABEL: 1034 case DIOCAWEDGE: 1035 case DIOCDWEDGE: 1036 case DIOCSSTRATEGY: 1037 if ((flag & FWRITE) == 0) 1038 return (EBADF); 1039 } 1040 1041 /* Must be initialized for these... */ 1042 switch (cmd) { 1043 case DIOCGDINFO: 1044 case DIOCSDINFO: 1045 case DIOCWDINFO: 1046 #ifdef __HAVE_OLD_DISKLABEL 1047 case ODIOCGDINFO: 1048 case ODIOCWDINFO: 1049 case ODIOCSDINFO: 1050 case ODIOCGDEFLABEL: 1051 #endif 1052 case DIOCGPART: 1053 case DIOCWLABEL: 1054 case DIOCGDEFLABEL: 1055 case DIOCAWEDGE: 1056 case DIOCDWEDGE: 1057 case DIOCLWEDGES: 1058 case DIOCCACHESYNC: 1059 case RAIDFRAME_SHUTDOWN: 1060 case RAIDFRAME_REWRITEPARITY: 1061 case RAIDFRAME_GET_INFO: 1062 case RAIDFRAME_RESET_ACCTOTALS: 1063 case RAIDFRAME_GET_ACCTOTALS: 1064 case RAIDFRAME_KEEP_ACCTOTALS: 1065 case RAIDFRAME_GET_SIZE: 1066 case RAIDFRAME_FAIL_DISK: 1067 case RAIDFRAME_COPYBACK: 1068 case RAIDFRAME_CHECK_RECON_STATUS: 1069 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1070 case RAIDFRAME_GET_COMPONENT_LABEL: 1071 case RAIDFRAME_SET_COMPONENT_LABEL: 1072 case RAIDFRAME_ADD_HOT_SPARE: 1073 case RAIDFRAME_REMOVE_HOT_SPARE: 1074 case RAIDFRAME_INIT_LABELS: 1075 case RAIDFRAME_REBUILD_IN_PLACE: 1076 case RAIDFRAME_CHECK_PARITY: 1077 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1078 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1079 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1080 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1081 case RAIDFRAME_SET_AUTOCONFIG: 1082 case RAIDFRAME_SET_ROOT: 1083 case RAIDFRAME_DELETE_COMPONENT: 1084 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1085 case RAIDFRAME_PARITYMAP_STATUS: 1086 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1087 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1088 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1089 case DIOCGSTRATEGY: 1090 case DIOCSSTRATEGY: 1091 if ((rs->sc_flags & RAIDF_INITED) == 0) 1092 return (ENXIO); 1093 } 1094 1095 switch (cmd) { 1096 #ifdef COMPAT_50 1097 case RAIDFRAME_GET_INFO50: 1098 return rf_get_info50(raidPtr, data); 1099 1100 case RAIDFRAME_CONFIGURE50: 1101 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1102 return retcode; 1103 goto config; 1104 #endif 1105 /* configure the system */ 1106 case RAIDFRAME_CONFIGURE: 1107 1108 if (raidPtr->valid) { 1109 /* There is a valid RAID set running on this unit! */ 1110 printf("raid%d: Device already configured!\n",unit); 1111 return(EINVAL); 1112 } 1113 1114 /* copy-in the configuration information */ 1115 /* data points to a pointer to the configuration structure */ 1116 1117 u_cfg = *((RF_Config_t **) data); 1118 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1119 if (k_cfg == NULL) { 1120 return (ENOMEM); 1121 } 1122 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1123 if (retcode) { 1124 RF_Free(k_cfg, sizeof(RF_Config_t)); 1125 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1126 retcode)); 1127 return (retcode); 1128 } 1129 goto config; 1130 config: 1131 /* allocate a buffer for the layout-specific data, and copy it 1132 * in */ 1133 if (k_cfg->layoutSpecificSize) { 1134 if (k_cfg->layoutSpecificSize > 10000) { 1135 /* sanity check */ 1136 RF_Free(k_cfg, sizeof(RF_Config_t)); 1137 return (EINVAL); 1138 } 1139 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1140 (u_char *)); 1141 if (specific_buf == NULL) { 1142 RF_Free(k_cfg, sizeof(RF_Config_t)); 1143 return (ENOMEM); 1144 } 1145 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1146 k_cfg->layoutSpecificSize); 1147 if (retcode) { 1148 RF_Free(k_cfg, sizeof(RF_Config_t)); 1149 RF_Free(specific_buf, 1150 k_cfg->layoutSpecificSize); 1151 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1152 retcode)); 1153 return (retcode); 1154 } 1155 } else 1156 specific_buf = NULL; 1157 k_cfg->layoutSpecific = specific_buf; 1158 1159 /* should do some kind of sanity check on the configuration. 1160 * Store the sum of all the bytes in the last byte? */ 1161 1162 /* configure the system */ 1163 1164 /* 1165 * Clear the entire RAID descriptor, just to make sure 1166 * there is no stale data left in the case of a 1167 * reconfiguration 1168 */ 1169 memset(raidPtr, 0, sizeof(*raidPtr)); 1170 raidPtr->softc = rs; 1171 raidPtr->raidid = unit; 1172 1173 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1174 1175 if (retcode == 0) { 1176 1177 /* allow this many simultaneous IO's to 1178 this RAID device */ 1179 raidPtr->openings = RAIDOUTSTANDING; 1180 1181 raidinit(rs); 1182 rf_markalldirty(raidPtr); 1183 } 1184 /* free the buffers. No return code here. */ 1185 if (k_cfg->layoutSpecificSize) { 1186 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1187 } 1188 RF_Free(k_cfg, sizeof(RF_Config_t)); 1189 1190 return (retcode); 1191 1192 /* shutdown the system */ 1193 case RAIDFRAME_SHUTDOWN: 1194 1195 part = DISKPART(dev); 1196 pmask = (1 << part); 1197 1198 if ((error = raidlock(rs)) != 0) 1199 return (error); 1200 1201 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1202 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1203 (rs->sc_dkdev.dk_copenmask & pmask))) 1204 retcode = EBUSY; 1205 else { 1206 rs->sc_flags |= RAIDF_SHUTDOWN; 1207 rs->sc_dkdev.dk_copenmask &= ~pmask; 1208 rs->sc_dkdev.dk_bopenmask &= ~pmask; 1209 rs->sc_dkdev.dk_openmask &= ~pmask; 1210 retcode = 0; 1211 } 1212 1213 raidunlock(rs); 1214 1215 if (retcode != 0) 1216 return retcode; 1217 1218 /* free the pseudo device attach bits */ 1219 1220 cf = device_cfdata(rs->sc_dev); 1221 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0) 1222 free(cf, M_RAIDFRAME); 1223 1224 return (retcode); 1225 case RAIDFRAME_GET_COMPONENT_LABEL: 1226 clabel_ptr = (RF_ComponentLabel_t **) data; 1227 /* need to read the component label for the disk indicated 1228 by row,column in clabel */ 1229 1230 /* 1231 * Perhaps there should be an option to skip the in-core 1232 * copy and hit the disk, as with disklabel(8). 1233 */ 1234 RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *)); 1235 1236 retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel)); 1237 1238 if (retcode) { 1239 RF_Free(clabel, sizeof(*clabel)); 1240 return retcode; 1241 } 1242 1243 clabel->row = 0; /* Don't allow looking at anything else.*/ 1244 1245 column = clabel->column; 1246 1247 if ((column < 0) || (column >= raidPtr->numCol + 1248 raidPtr->numSpare)) { 1249 RF_Free(clabel, sizeof(*clabel)); 1250 return EINVAL; 1251 } 1252 1253 RF_Free(clabel, sizeof(*clabel)); 1254 1255 clabel = raidget_component_label(raidPtr, column); 1256 1257 return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr)); 1258 1259 #if 0 1260 case RAIDFRAME_SET_COMPONENT_LABEL: 1261 clabel = (RF_ComponentLabel_t *) data; 1262 1263 /* XXX check the label for valid stuff... */ 1264 /* Note that some things *should not* get modified -- 1265 the user should be re-initing the labels instead of 1266 trying to patch things. 1267 */ 1268 1269 raidid = raidPtr->raidid; 1270 #ifdef DEBUG 1271 printf("raid%d: Got component label:\n", raidid); 1272 printf("raid%d: Version: %d\n", raidid, clabel->version); 1273 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1274 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1275 printf("raid%d: Column: %d\n", raidid, clabel->column); 1276 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1277 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1278 printf("raid%d: Status: %d\n", raidid, clabel->status); 1279 #endif 1280 clabel->row = 0; 1281 column = clabel->column; 1282 1283 if ((column < 0) || (column >= raidPtr->numCol)) { 1284 return(EINVAL); 1285 } 1286 1287 /* XXX this isn't allowed to do anything for now :-) */ 1288 1289 /* XXX and before it is, we need to fill in the rest 1290 of the fields!?!?!?! */ 1291 memcpy(raidget_component_label(raidPtr, column), 1292 clabel, sizeof(*clabel)); 1293 raidflush_component_label(raidPtr, column); 1294 return (0); 1295 #endif 1296 1297 case RAIDFRAME_INIT_LABELS: 1298 clabel = (RF_ComponentLabel_t *) data; 1299 /* 1300 we only want the serial number from 1301 the above. We get all the rest of the information 1302 from the config that was used to create this RAID 1303 set. 1304 */ 1305 1306 raidPtr->serial_number = clabel->serial_number; 1307 1308 for(column=0;column<raidPtr->numCol;column++) { 1309 diskPtr = &raidPtr->Disks[column]; 1310 if (!RF_DEAD_DISK(diskPtr->status)) { 1311 ci_label = raidget_component_label(raidPtr, 1312 column); 1313 /* Zeroing this is important. */ 1314 memset(ci_label, 0, sizeof(*ci_label)); 1315 raid_init_component_label(raidPtr, ci_label); 1316 ci_label->serial_number = 1317 raidPtr->serial_number; 1318 ci_label->row = 0; /* we dont' pretend to support more */ 1319 rf_component_label_set_partitionsize(ci_label, 1320 diskPtr->partitionSize); 1321 ci_label->column = column; 1322 raidflush_component_label(raidPtr, column); 1323 } 1324 /* XXXjld what about the spares? */ 1325 } 1326 1327 return (retcode); 1328 case RAIDFRAME_SET_AUTOCONFIG: 1329 d = rf_set_autoconfig(raidPtr, *(int *) data); 1330 printf("raid%d: New autoconfig value is: %d\n", 1331 raidPtr->raidid, d); 1332 *(int *) data = d; 1333 return (retcode); 1334 1335 case RAIDFRAME_SET_ROOT: 1336 d = rf_set_rootpartition(raidPtr, *(int *) data); 1337 printf("raid%d: New rootpartition value is: %d\n", 1338 raidPtr->raidid, d); 1339 *(int *) data = d; 1340 return (retcode); 1341 1342 /* initialize all parity */ 1343 case RAIDFRAME_REWRITEPARITY: 1344 1345 if (raidPtr->Layout.map->faultsTolerated == 0) { 1346 /* Parity for RAID 0 is trivially correct */ 1347 raidPtr->parity_good = RF_RAID_CLEAN; 1348 return(0); 1349 } 1350 1351 if (raidPtr->parity_rewrite_in_progress == 1) { 1352 /* Re-write is already in progress! */ 1353 return(EINVAL); 1354 } 1355 1356 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1357 rf_RewriteParityThread, 1358 raidPtr,"raid_parity"); 1359 return (retcode); 1360 1361 1362 case RAIDFRAME_ADD_HOT_SPARE: 1363 sparePtr = (RF_SingleComponent_t *) data; 1364 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1365 retcode = rf_add_hot_spare(raidPtr, &component); 1366 return(retcode); 1367 1368 case RAIDFRAME_REMOVE_HOT_SPARE: 1369 return(retcode); 1370 1371 case RAIDFRAME_DELETE_COMPONENT: 1372 componentPtr = (RF_SingleComponent_t *)data; 1373 memcpy( &component, componentPtr, 1374 sizeof(RF_SingleComponent_t)); 1375 retcode = rf_delete_component(raidPtr, &component); 1376 return(retcode); 1377 1378 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1379 componentPtr = (RF_SingleComponent_t *)data; 1380 memcpy( &component, componentPtr, 1381 sizeof(RF_SingleComponent_t)); 1382 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1383 return(retcode); 1384 1385 case RAIDFRAME_REBUILD_IN_PLACE: 1386 1387 if (raidPtr->Layout.map->faultsTolerated == 0) { 1388 /* Can't do this on a RAID 0!! */ 1389 return(EINVAL); 1390 } 1391 1392 if (raidPtr->recon_in_progress == 1) { 1393 /* a reconstruct is already in progress! */ 1394 return(EINVAL); 1395 } 1396 1397 componentPtr = (RF_SingleComponent_t *) data; 1398 memcpy( &component, componentPtr, 1399 sizeof(RF_SingleComponent_t)); 1400 component.row = 0; /* we don't support any more */ 1401 column = component.column; 1402 1403 if ((column < 0) || (column >= raidPtr->numCol)) { 1404 return(EINVAL); 1405 } 1406 1407 rf_lock_mutex2(raidPtr->mutex); 1408 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1409 (raidPtr->numFailures > 0)) { 1410 /* XXX 0 above shouldn't be constant!!! */ 1411 /* some component other than this has failed. 1412 Let's not make things worse than they already 1413 are... */ 1414 printf("raid%d: Unable to reconstruct to disk at:\n", 1415 raidPtr->raidid); 1416 printf("raid%d: Col: %d Too many failures.\n", 1417 raidPtr->raidid, column); 1418 rf_unlock_mutex2(raidPtr->mutex); 1419 return (EINVAL); 1420 } 1421 if (raidPtr->Disks[column].status == 1422 rf_ds_reconstructing) { 1423 printf("raid%d: Unable to reconstruct to disk at:\n", 1424 raidPtr->raidid); 1425 printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column); 1426 1427 rf_unlock_mutex2(raidPtr->mutex); 1428 return (EINVAL); 1429 } 1430 if (raidPtr->Disks[column].status == rf_ds_spared) { 1431 rf_unlock_mutex2(raidPtr->mutex); 1432 return (EINVAL); 1433 } 1434 rf_unlock_mutex2(raidPtr->mutex); 1435 1436 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1437 if (rrcopy == NULL) 1438 return(ENOMEM); 1439 1440 rrcopy->raidPtr = (void *) raidPtr; 1441 rrcopy->col = column; 1442 1443 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1444 rf_ReconstructInPlaceThread, 1445 rrcopy,"raid_reconip"); 1446 return(retcode); 1447 1448 case RAIDFRAME_GET_INFO: 1449 if (!raidPtr->valid) 1450 return (ENODEV); 1451 ucfgp = (RF_DeviceConfig_t **) data; 1452 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1453 (RF_DeviceConfig_t *)); 1454 if (d_cfg == NULL) 1455 return (ENOMEM); 1456 d_cfg->rows = 1; /* there is only 1 row now */ 1457 d_cfg->cols = raidPtr->numCol; 1458 d_cfg->ndevs = raidPtr->numCol; 1459 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1460 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1461 return (ENOMEM); 1462 } 1463 d_cfg->nspares = raidPtr->numSpare; 1464 if (d_cfg->nspares >= RF_MAX_DISKS) { 1465 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1466 return (ENOMEM); 1467 } 1468 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1469 d = 0; 1470 for (j = 0; j < d_cfg->cols; j++) { 1471 d_cfg->devs[d] = raidPtr->Disks[j]; 1472 d++; 1473 } 1474 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1475 d_cfg->spares[i] = raidPtr->Disks[j]; 1476 } 1477 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1478 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1479 1480 return (retcode); 1481 1482 case RAIDFRAME_CHECK_PARITY: 1483 *(int *) data = raidPtr->parity_good; 1484 return (0); 1485 1486 case RAIDFRAME_PARITYMAP_STATUS: 1487 if (rf_paritymap_ineligible(raidPtr)) 1488 return EINVAL; 1489 rf_paritymap_status(raidPtr->parity_map, 1490 (struct rf_pmstat *)data); 1491 return 0; 1492 1493 case RAIDFRAME_PARITYMAP_SET_PARAMS: 1494 if (rf_paritymap_ineligible(raidPtr)) 1495 return EINVAL; 1496 if (raidPtr->parity_map == NULL) 1497 return ENOENT; /* ??? */ 1498 if (0 != rf_paritymap_set_params(raidPtr->parity_map, 1499 (struct rf_pmparams *)data, 1)) 1500 return EINVAL; 1501 return 0; 1502 1503 case RAIDFRAME_PARITYMAP_GET_DISABLE: 1504 if (rf_paritymap_ineligible(raidPtr)) 1505 return EINVAL; 1506 *(int *) data = rf_paritymap_get_disable(raidPtr); 1507 return 0; 1508 1509 case RAIDFRAME_PARITYMAP_SET_DISABLE: 1510 if (rf_paritymap_ineligible(raidPtr)) 1511 return EINVAL; 1512 rf_paritymap_set_disable(raidPtr, *(int *)data); 1513 /* XXX should errors be passed up? */ 1514 return 0; 1515 1516 case RAIDFRAME_RESET_ACCTOTALS: 1517 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1518 return (0); 1519 1520 case RAIDFRAME_GET_ACCTOTALS: 1521 totals = (RF_AccTotals_t *) data; 1522 *totals = raidPtr->acc_totals; 1523 return (0); 1524 1525 case RAIDFRAME_KEEP_ACCTOTALS: 1526 raidPtr->keep_acc_totals = *(int *)data; 1527 return (0); 1528 1529 case RAIDFRAME_GET_SIZE: 1530 *(int *) data = raidPtr->totalSectors; 1531 return (0); 1532 1533 /* fail a disk & optionally start reconstruction */ 1534 case RAIDFRAME_FAIL_DISK: 1535 1536 if (raidPtr->Layout.map->faultsTolerated == 0) { 1537 /* Can't do this on a RAID 0!! */ 1538 return(EINVAL); 1539 } 1540 1541 rr = (struct rf_recon_req *) data; 1542 rr->row = 0; 1543 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1544 return (EINVAL); 1545 1546 1547 rf_lock_mutex2(raidPtr->mutex); 1548 if (raidPtr->status == rf_rs_reconstructing) { 1549 /* you can't fail a disk while we're reconstructing! */ 1550 /* XXX wrong for RAID6 */ 1551 rf_unlock_mutex2(raidPtr->mutex); 1552 return (EINVAL); 1553 } 1554 if ((raidPtr->Disks[rr->col].status == 1555 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1556 /* some other component has failed. Let's not make 1557 things worse. XXX wrong for RAID6 */ 1558 rf_unlock_mutex2(raidPtr->mutex); 1559 return (EINVAL); 1560 } 1561 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1562 /* Can't fail a spared disk! */ 1563 rf_unlock_mutex2(raidPtr->mutex); 1564 return (EINVAL); 1565 } 1566 rf_unlock_mutex2(raidPtr->mutex); 1567 1568 /* make a copy of the recon request so that we don't rely on 1569 * the user's buffer */ 1570 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1571 if (rrcopy == NULL) 1572 return(ENOMEM); 1573 memcpy(rrcopy, rr, sizeof(*rr)); 1574 rrcopy->raidPtr = (void *) raidPtr; 1575 1576 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1577 rf_ReconThread, 1578 rrcopy,"raid_recon"); 1579 return (0); 1580 1581 /* invoke a copyback operation after recon on whatever disk 1582 * needs it, if any */ 1583 case RAIDFRAME_COPYBACK: 1584 1585 if (raidPtr->Layout.map->faultsTolerated == 0) { 1586 /* This makes no sense on a RAID 0!! */ 1587 return(EINVAL); 1588 } 1589 1590 if (raidPtr->copyback_in_progress == 1) { 1591 /* Copyback is already in progress! */ 1592 return(EINVAL); 1593 } 1594 1595 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1596 rf_CopybackThread, 1597 raidPtr,"raid_copyback"); 1598 return (retcode); 1599 1600 /* return the percentage completion of reconstruction */ 1601 case RAIDFRAME_CHECK_RECON_STATUS: 1602 if (raidPtr->Layout.map->faultsTolerated == 0) { 1603 /* This makes no sense on a RAID 0, so tell the 1604 user it's done. */ 1605 *(int *) data = 100; 1606 return(0); 1607 } 1608 if (raidPtr->status != rf_rs_reconstructing) 1609 *(int *) data = 100; 1610 else { 1611 if (raidPtr->reconControl->numRUsTotal > 0) { 1612 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1613 } else { 1614 *(int *) data = 0; 1615 } 1616 } 1617 return (0); 1618 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1619 progressInfoPtr = (RF_ProgressInfo_t **) data; 1620 if (raidPtr->status != rf_rs_reconstructing) { 1621 progressInfo.remaining = 0; 1622 progressInfo.completed = 100; 1623 progressInfo.total = 100; 1624 } else { 1625 progressInfo.total = 1626 raidPtr->reconControl->numRUsTotal; 1627 progressInfo.completed = 1628 raidPtr->reconControl->numRUsComplete; 1629 progressInfo.remaining = progressInfo.total - 1630 progressInfo.completed; 1631 } 1632 retcode = copyout(&progressInfo, *progressInfoPtr, 1633 sizeof(RF_ProgressInfo_t)); 1634 return (retcode); 1635 1636 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1637 if (raidPtr->Layout.map->faultsTolerated == 0) { 1638 /* This makes no sense on a RAID 0, so tell the 1639 user it's done. */ 1640 *(int *) data = 100; 1641 return(0); 1642 } 1643 if (raidPtr->parity_rewrite_in_progress == 1) { 1644 *(int *) data = 100 * 1645 raidPtr->parity_rewrite_stripes_done / 1646 raidPtr->Layout.numStripe; 1647 } else { 1648 *(int *) data = 100; 1649 } 1650 return (0); 1651 1652 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1653 progressInfoPtr = (RF_ProgressInfo_t **) data; 1654 if (raidPtr->parity_rewrite_in_progress == 1) { 1655 progressInfo.total = raidPtr->Layout.numStripe; 1656 progressInfo.completed = 1657 raidPtr->parity_rewrite_stripes_done; 1658 progressInfo.remaining = progressInfo.total - 1659 progressInfo.completed; 1660 } else { 1661 progressInfo.remaining = 0; 1662 progressInfo.completed = 100; 1663 progressInfo.total = 100; 1664 } 1665 retcode = copyout(&progressInfo, *progressInfoPtr, 1666 sizeof(RF_ProgressInfo_t)); 1667 return (retcode); 1668 1669 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1670 if (raidPtr->Layout.map->faultsTolerated == 0) { 1671 /* This makes no sense on a RAID 0 */ 1672 *(int *) data = 100; 1673 return(0); 1674 } 1675 if (raidPtr->copyback_in_progress == 1) { 1676 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1677 raidPtr->Layout.numStripe; 1678 } else { 1679 *(int *) data = 100; 1680 } 1681 return (0); 1682 1683 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1684 progressInfoPtr = (RF_ProgressInfo_t **) data; 1685 if (raidPtr->copyback_in_progress == 1) { 1686 progressInfo.total = raidPtr->Layout.numStripe; 1687 progressInfo.completed = 1688 raidPtr->copyback_stripes_done; 1689 progressInfo.remaining = progressInfo.total - 1690 progressInfo.completed; 1691 } else { 1692 progressInfo.remaining = 0; 1693 progressInfo.completed = 100; 1694 progressInfo.total = 100; 1695 } 1696 retcode = copyout(&progressInfo, *progressInfoPtr, 1697 sizeof(RF_ProgressInfo_t)); 1698 return (retcode); 1699 1700 /* the sparetable daemon calls this to wait for the kernel to 1701 * need a spare table. this ioctl does not return until a 1702 * spare table is needed. XXX -- calling mpsleep here in the 1703 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1704 * -- I should either compute the spare table in the kernel, 1705 * or have a different -- XXX XXX -- interface (a different 1706 * character device) for delivering the table -- XXX */ 1707 #if 0 1708 case RAIDFRAME_SPARET_WAIT: 1709 rf_lock_mutex2(rf_sparet_wait_mutex); 1710 while (!rf_sparet_wait_queue) 1711 rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex); 1712 waitreq = rf_sparet_wait_queue; 1713 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1714 rf_unlock_mutex2(rf_sparet_wait_mutex); 1715 1716 /* structure assignment */ 1717 *((RF_SparetWait_t *) data) = *waitreq; 1718 1719 RF_Free(waitreq, sizeof(*waitreq)); 1720 return (0); 1721 1722 /* wakes up a process waiting on SPARET_WAIT and puts an error 1723 * code in it that will cause the dameon to exit */ 1724 case RAIDFRAME_ABORT_SPARET_WAIT: 1725 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1726 waitreq->fcol = -1; 1727 rf_lock_mutex2(rf_sparet_wait_mutex); 1728 waitreq->next = rf_sparet_wait_queue; 1729 rf_sparet_wait_queue = waitreq; 1730 rf_broadcast_conf2(rf_sparet_wait_cv); 1731 rf_unlock_mutex2(rf_sparet_wait_mutex); 1732 return (0); 1733 1734 /* used by the spare table daemon to deliver a spare table 1735 * into the kernel */ 1736 case RAIDFRAME_SEND_SPARET: 1737 1738 /* install the spare table */ 1739 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1740 1741 /* respond to the requestor. the return status of the spare 1742 * table installation is passed in the "fcol" field */ 1743 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1744 waitreq->fcol = retcode; 1745 rf_lock_mutex2(rf_sparet_wait_mutex); 1746 waitreq->next = rf_sparet_resp_queue; 1747 rf_sparet_resp_queue = waitreq; 1748 rf_broadcast_cond2(rf_sparet_resp_cv); 1749 rf_unlock_mutex2(rf_sparet_wait_mutex); 1750 1751 return (retcode); 1752 #endif 1753 1754 default: 1755 break; /* fall through to the os-specific code below */ 1756 1757 } 1758 1759 if (!raidPtr->valid) 1760 return (EINVAL); 1761 1762 /* 1763 * Add support for "regular" device ioctls here. 1764 */ 1765 1766 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l); 1767 if (error != EPASSTHROUGH) 1768 return (error); 1769 1770 switch (cmd) { 1771 case DIOCGDINFO: 1772 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1773 break; 1774 #ifdef __HAVE_OLD_DISKLABEL 1775 case ODIOCGDINFO: 1776 newlabel = *(rs->sc_dkdev.dk_label); 1777 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1778 return ENOTTY; 1779 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1780 break; 1781 #endif 1782 1783 case DIOCGPART: 1784 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1785 ((struct partinfo *) data)->part = 1786 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1787 break; 1788 1789 case DIOCWDINFO: 1790 case DIOCSDINFO: 1791 #ifdef __HAVE_OLD_DISKLABEL 1792 case ODIOCWDINFO: 1793 case ODIOCSDINFO: 1794 #endif 1795 { 1796 struct disklabel *lp; 1797 #ifdef __HAVE_OLD_DISKLABEL 1798 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1799 memset(&newlabel, 0, sizeof newlabel); 1800 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1801 lp = &newlabel; 1802 } else 1803 #endif 1804 lp = (struct disklabel *)data; 1805 1806 if ((error = raidlock(rs)) != 0) 1807 return (error); 1808 1809 rs->sc_flags |= RAIDF_LABELLING; 1810 1811 error = setdisklabel(rs->sc_dkdev.dk_label, 1812 lp, 0, rs->sc_dkdev.dk_cpulabel); 1813 if (error == 0) { 1814 if (cmd == DIOCWDINFO 1815 #ifdef __HAVE_OLD_DISKLABEL 1816 || cmd == ODIOCWDINFO 1817 #endif 1818 ) 1819 error = writedisklabel(RAIDLABELDEV(dev), 1820 raidstrategy, rs->sc_dkdev.dk_label, 1821 rs->sc_dkdev.dk_cpulabel); 1822 } 1823 rs->sc_flags &= ~RAIDF_LABELLING; 1824 1825 raidunlock(rs); 1826 1827 if (error) 1828 return (error); 1829 break; 1830 } 1831 1832 case DIOCWLABEL: 1833 if (*(int *) data != 0) 1834 rs->sc_flags |= RAIDF_WLABEL; 1835 else 1836 rs->sc_flags &= ~RAIDF_WLABEL; 1837 break; 1838 1839 case DIOCGDEFLABEL: 1840 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1841 break; 1842 1843 #ifdef __HAVE_OLD_DISKLABEL 1844 case ODIOCGDEFLABEL: 1845 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1846 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1847 return ENOTTY; 1848 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1849 break; 1850 #endif 1851 1852 case DIOCAWEDGE: 1853 case DIOCDWEDGE: 1854 dkw = (void *)data; 1855 1856 /* If the ioctl happens here, the parent is us. */ 1857 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1858 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1859 1860 case DIOCLWEDGES: 1861 return dkwedge_list(&rs->sc_dkdev, 1862 (struct dkwedge_list *)data, l); 1863 case DIOCCACHESYNC: 1864 return rf_sync_component_caches(raidPtr); 1865 1866 case DIOCGSTRATEGY: 1867 { 1868 struct disk_strategy *dks = (void *)data; 1869 1870 s = splbio(); 1871 strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue), 1872 sizeof(dks->dks_name)); 1873 splx(s); 1874 dks->dks_paramlen = 0; 1875 1876 return 0; 1877 } 1878 1879 case DIOCSSTRATEGY: 1880 { 1881 struct disk_strategy *dks = (void *)data; 1882 struct bufq_state *new; 1883 struct bufq_state *old; 1884 1885 if (dks->dks_param != NULL) { 1886 return EINVAL; 1887 } 1888 dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */ 1889 error = bufq_alloc(&new, dks->dks_name, 1890 BUFQ_EXACT|BUFQ_SORT_RAWBLOCK); 1891 if (error) { 1892 return error; 1893 } 1894 s = splbio(); 1895 old = rs->buf_queue; 1896 bufq_move(new, old); 1897 rs->buf_queue = new; 1898 splx(s); 1899 bufq_free(old); 1900 1901 return 0; 1902 } 1903 1904 default: 1905 retcode = ENOTTY; 1906 } 1907 return (retcode); 1908 1909 } 1910 1911 1912 /* raidinit -- complete the rest of the initialization for the 1913 RAIDframe device. */ 1914 1915 1916 static void 1917 raidinit(struct raid_softc *rs) 1918 { 1919 cfdata_t cf; 1920 int unit; 1921 RF_Raid_t *raidPtr = &rs->sc_r; 1922 1923 unit = raidPtr->raidid; 1924 1925 1926 /* XXX should check return code first... */ 1927 rs->sc_flags |= RAIDF_INITED; 1928 1929 /* XXX doesn't check bounds. */ 1930 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1931 1932 /* attach the pseudo device */ 1933 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1934 cf->cf_name = raid_cd.cd_name; 1935 cf->cf_atname = raid_cd.cd_name; 1936 cf->cf_unit = unit; 1937 cf->cf_fstate = FSTATE_STAR; 1938 1939 rs->sc_dev = config_attach_pseudo(cf); 1940 1941 if (rs->sc_dev == NULL) { 1942 printf("raid%d: config_attach_pseudo failed\n", 1943 raidPtr->raidid); 1944 rs->sc_flags &= ~RAIDF_INITED; 1945 free(cf, M_RAIDFRAME); 1946 return; 1947 } 1948 1949 /* disk_attach actually creates space for the CPU disklabel, among 1950 * other things, so it's critical to call this *BEFORE* we try putzing 1951 * with disklabels. */ 1952 1953 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1954 disk_attach(&rs->sc_dkdev); 1955 disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector); 1956 1957 /* XXX There may be a weird interaction here between this, and 1958 * protectedSectors, as used in RAIDframe. */ 1959 1960 rs->sc_size = raidPtr->totalSectors; 1961 1962 dkwedge_discover(&rs->sc_dkdev); 1963 1964 rf_set_geometry(rs, raidPtr); 1965 1966 } 1967 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1968 /* wake up the daemon & tell it to get us a spare table 1969 * XXX 1970 * the entries in the queues should be tagged with the raidPtr 1971 * so that in the extremely rare case that two recons happen at once, 1972 * we know for which device were requesting a spare table 1973 * XXX 1974 * 1975 * XXX This code is not currently used. GO 1976 */ 1977 int 1978 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1979 { 1980 int retcode; 1981 1982 rf_lock_mutex2(rf_sparet_wait_mutex); 1983 req->next = rf_sparet_wait_queue; 1984 rf_sparet_wait_queue = req; 1985 rf_broadcast_cond2(rf_sparet_wait_cv); 1986 1987 /* mpsleep unlocks the mutex */ 1988 while (!rf_sparet_resp_queue) { 1989 rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex); 1990 } 1991 req = rf_sparet_resp_queue; 1992 rf_sparet_resp_queue = req->next; 1993 rf_unlock_mutex2(rf_sparet_wait_mutex); 1994 1995 retcode = req->fcol; 1996 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1997 * alloc'd */ 1998 return (retcode); 1999 } 2000 #endif 2001 2002 /* a wrapper around rf_DoAccess that extracts appropriate info from the 2003 * bp & passes it down. 2004 * any calls originating in the kernel must use non-blocking I/O 2005 * do some extra sanity checking to return "appropriate" error values for 2006 * certain conditions (to make some standard utilities work) 2007 * 2008 * Formerly known as: rf_DoAccessKernel 2009 */ 2010 void 2011 raidstart(RF_Raid_t *raidPtr) 2012 { 2013 RF_SectorCount_t num_blocks, pb, sum; 2014 RF_RaidAddr_t raid_addr; 2015 struct partition *pp; 2016 daddr_t blocknum; 2017 struct raid_softc *rs; 2018 int do_async; 2019 struct buf *bp; 2020 int rc; 2021 2022 rs = raidPtr->softc; 2023 /* quick check to see if anything has died recently */ 2024 rf_lock_mutex2(raidPtr->mutex); 2025 if (raidPtr->numNewFailures > 0) { 2026 rf_unlock_mutex2(raidPtr->mutex); 2027 rf_update_component_labels(raidPtr, 2028 RF_NORMAL_COMPONENT_UPDATE); 2029 rf_lock_mutex2(raidPtr->mutex); 2030 raidPtr->numNewFailures--; 2031 } 2032 2033 /* Check to see if we're at the limit... */ 2034 while (raidPtr->openings > 0) { 2035 rf_unlock_mutex2(raidPtr->mutex); 2036 2037 /* get the next item, if any, from the queue */ 2038 if ((bp = bufq_get(rs->buf_queue)) == NULL) { 2039 /* nothing more to do */ 2040 return; 2041 } 2042 2043 /* Ok, for the bp we have here, bp->b_blkno is relative to the 2044 * partition.. Need to make it absolute to the underlying 2045 * device.. */ 2046 2047 blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector; 2048 if (DISKPART(bp->b_dev) != RAW_PART) { 2049 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 2050 blocknum += pp->p_offset; 2051 } 2052 2053 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2054 (int) blocknum)); 2055 2056 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2057 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2058 2059 /* *THIS* is where we adjust what block we're going to... 2060 * but DO NOT TOUCH bp->b_blkno!!! */ 2061 raid_addr = blocknum; 2062 2063 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2064 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2065 sum = raid_addr + num_blocks + pb; 2066 if (1 || rf_debugKernelAccess) { 2067 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2068 (int) raid_addr, (int) sum, (int) num_blocks, 2069 (int) pb, (int) bp->b_resid)); 2070 } 2071 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2072 || (sum < num_blocks) || (sum < pb)) { 2073 bp->b_error = ENOSPC; 2074 bp->b_resid = bp->b_bcount; 2075 biodone(bp); 2076 rf_lock_mutex2(raidPtr->mutex); 2077 continue; 2078 } 2079 /* 2080 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2081 */ 2082 2083 if (bp->b_bcount & raidPtr->sectorMask) { 2084 bp->b_error = EINVAL; 2085 bp->b_resid = bp->b_bcount; 2086 biodone(bp); 2087 rf_lock_mutex2(raidPtr->mutex); 2088 continue; 2089 2090 } 2091 db1_printf(("Calling DoAccess..\n")); 2092 2093 2094 rf_lock_mutex2(raidPtr->mutex); 2095 raidPtr->openings--; 2096 rf_unlock_mutex2(raidPtr->mutex); 2097 2098 /* 2099 * Everything is async. 2100 */ 2101 do_async = 1; 2102 2103 disk_busy(&rs->sc_dkdev); 2104 2105 /* XXX we're still at splbio() here... do we *really* 2106 need to be? */ 2107 2108 /* don't ever condition on bp->b_flags & B_WRITE. 2109 * always condition on B_READ instead */ 2110 2111 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2112 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2113 do_async, raid_addr, num_blocks, 2114 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2115 2116 if (rc) { 2117 bp->b_error = rc; 2118 bp->b_resid = bp->b_bcount; 2119 biodone(bp); 2120 /* continue loop */ 2121 } 2122 2123 rf_lock_mutex2(raidPtr->mutex); 2124 } 2125 rf_unlock_mutex2(raidPtr->mutex); 2126 } 2127 2128 2129 2130 2131 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2132 2133 int 2134 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2135 { 2136 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2137 struct buf *bp; 2138 2139 req->queue = queue; 2140 bp = req->bp; 2141 2142 switch (req->type) { 2143 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2144 /* XXX need to do something extra here.. */ 2145 /* I'm leaving this in, as I've never actually seen it used, 2146 * and I'd like folks to report it... GO */ 2147 printf(("WAKEUP CALLED\n")); 2148 queue->numOutstanding++; 2149 2150 bp->b_flags = 0; 2151 bp->b_private = req; 2152 2153 KernelWakeupFunc(bp); 2154 break; 2155 2156 case RF_IO_TYPE_READ: 2157 case RF_IO_TYPE_WRITE: 2158 #if RF_ACC_TRACE > 0 2159 if (req->tracerec) { 2160 RF_ETIMER_START(req->tracerec->timer); 2161 } 2162 #endif 2163 InitBP(bp, queue->rf_cinfo->ci_vp, 2164 op, queue->rf_cinfo->ci_dev, 2165 req->sectorOffset, req->numSector, 2166 req->buf, KernelWakeupFunc, (void *) req, 2167 queue->raidPtr->logBytesPerSector, req->b_proc); 2168 2169 if (rf_debugKernelAccess) { 2170 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2171 (long) bp->b_blkno)); 2172 } 2173 queue->numOutstanding++; 2174 queue->last_deq_sector = req->sectorOffset; 2175 /* acc wouldn't have been let in if there were any pending 2176 * reqs at any other priority */ 2177 queue->curPriority = req->priority; 2178 2179 db1_printf(("Going for %c to unit %d col %d\n", 2180 req->type, queue->raidPtr->raidid, 2181 queue->col)); 2182 db1_printf(("sector %d count %d (%d bytes) %d\n", 2183 (int) req->sectorOffset, (int) req->numSector, 2184 (int) (req->numSector << 2185 queue->raidPtr->logBytesPerSector), 2186 (int) queue->raidPtr->logBytesPerSector)); 2187 2188 /* 2189 * XXX: drop lock here since this can block at 2190 * least with backing SCSI devices. Retake it 2191 * to minimize fuss with calling interfaces. 2192 */ 2193 2194 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2195 bdev_strategy(bp); 2196 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2197 break; 2198 2199 default: 2200 panic("bad req->type in rf_DispatchKernelIO"); 2201 } 2202 db1_printf(("Exiting from DispatchKernelIO\n")); 2203 2204 return (0); 2205 } 2206 /* this is the callback function associated with a I/O invoked from 2207 kernel code. 2208 */ 2209 static void 2210 KernelWakeupFunc(struct buf *bp) 2211 { 2212 RF_DiskQueueData_t *req = NULL; 2213 RF_DiskQueue_t *queue; 2214 2215 db1_printf(("recovering the request queue:\n")); 2216 2217 req = bp->b_private; 2218 2219 queue = (RF_DiskQueue_t *) req->queue; 2220 2221 rf_lock_mutex2(queue->raidPtr->iodone_lock); 2222 2223 #if RF_ACC_TRACE > 0 2224 if (req->tracerec) { 2225 RF_ETIMER_STOP(req->tracerec->timer); 2226 RF_ETIMER_EVAL(req->tracerec->timer); 2227 rf_lock_mutex2(rf_tracing_mutex); 2228 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2229 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2230 req->tracerec->num_phys_ios++; 2231 rf_unlock_mutex2(rf_tracing_mutex); 2232 } 2233 #endif 2234 2235 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2236 * ballistic, and mark the component as hosed... */ 2237 2238 if (bp->b_error != 0) { 2239 /* Mark the disk as dead */ 2240 /* but only mark it once... */ 2241 /* and only if it wouldn't leave this RAID set 2242 completely broken */ 2243 if (((queue->raidPtr->Disks[queue->col].status == 2244 rf_ds_optimal) || 2245 (queue->raidPtr->Disks[queue->col].status == 2246 rf_ds_used_spare)) && 2247 (queue->raidPtr->numFailures < 2248 queue->raidPtr->Layout.map->faultsTolerated)) { 2249 printf("raid%d: IO Error. Marking %s as failed.\n", 2250 queue->raidPtr->raidid, 2251 queue->raidPtr->Disks[queue->col].devname); 2252 queue->raidPtr->Disks[queue->col].status = 2253 rf_ds_failed; 2254 queue->raidPtr->status = rf_rs_degraded; 2255 queue->raidPtr->numFailures++; 2256 queue->raidPtr->numNewFailures++; 2257 } else { /* Disk is already dead... */ 2258 /* printf("Disk already marked as dead!\n"); */ 2259 } 2260 2261 } 2262 2263 /* Fill in the error value */ 2264 req->error = bp->b_error; 2265 2266 /* Drop this one on the "finished" queue... */ 2267 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2268 2269 /* Let the raidio thread know there is work to be done. */ 2270 rf_signal_cond2(queue->raidPtr->iodone_cv); 2271 2272 rf_unlock_mutex2(queue->raidPtr->iodone_lock); 2273 } 2274 2275 2276 /* 2277 * initialize a buf structure for doing an I/O in the kernel. 2278 */ 2279 static void 2280 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2281 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2282 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2283 struct proc *b_proc) 2284 { 2285 /* bp->b_flags = B_PHYS | rw_flag; */ 2286 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2287 bp->b_oflags = 0; 2288 bp->b_cflags = 0; 2289 bp->b_bcount = numSect << logBytesPerSector; 2290 bp->b_bufsize = bp->b_bcount; 2291 bp->b_error = 0; 2292 bp->b_dev = dev; 2293 bp->b_data = bf; 2294 bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT; 2295 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2296 if (bp->b_bcount == 0) { 2297 panic("bp->b_bcount is zero in InitBP!!"); 2298 } 2299 bp->b_proc = b_proc; 2300 bp->b_iodone = cbFunc; 2301 bp->b_private = cbArg; 2302 } 2303 2304 static void 2305 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2306 struct disklabel *lp) 2307 { 2308 memset(lp, 0, sizeof(*lp)); 2309 2310 /* fabricate a label... */ 2311 lp->d_secperunit = raidPtr->totalSectors; 2312 lp->d_secsize = raidPtr->bytesPerSector; 2313 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2314 lp->d_ntracks = 4 * raidPtr->numCol; 2315 lp->d_ncylinders = raidPtr->totalSectors / 2316 (lp->d_nsectors * lp->d_ntracks); 2317 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2318 2319 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2320 lp->d_type = DTYPE_RAID; 2321 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2322 lp->d_rpm = 3600; 2323 lp->d_interleave = 1; 2324 lp->d_flags = 0; 2325 2326 lp->d_partitions[RAW_PART].p_offset = 0; 2327 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2328 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2329 lp->d_npartitions = RAW_PART + 1; 2330 2331 lp->d_magic = DISKMAGIC; 2332 lp->d_magic2 = DISKMAGIC; 2333 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2334 2335 } 2336 /* 2337 * Read the disklabel from the raid device. If one is not present, fake one 2338 * up. 2339 */ 2340 static void 2341 raidgetdisklabel(dev_t dev) 2342 { 2343 int unit = raidunit(dev); 2344 struct raid_softc *rs; 2345 const char *errstring; 2346 struct disklabel *lp; 2347 struct cpu_disklabel *clp; 2348 RF_Raid_t *raidPtr; 2349 2350 if ((rs = raidget(unit)) == NULL) 2351 return; 2352 2353 lp = rs->sc_dkdev.dk_label; 2354 clp = rs->sc_dkdev.dk_cpulabel; 2355 2356 db1_printf(("Getting the disklabel...\n")); 2357 2358 memset(clp, 0, sizeof(*clp)); 2359 2360 raidPtr = &rs->sc_r; 2361 2362 raidgetdefaultlabel(raidPtr, rs, lp); 2363 2364 /* 2365 * Call the generic disklabel extraction routine. 2366 */ 2367 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2368 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2369 if (errstring) 2370 raidmakedisklabel(rs); 2371 else { 2372 int i; 2373 struct partition *pp; 2374 2375 /* 2376 * Sanity check whether the found disklabel is valid. 2377 * 2378 * This is necessary since total size of the raid device 2379 * may vary when an interleave is changed even though exactly 2380 * same components are used, and old disklabel may used 2381 * if that is found. 2382 */ 2383 if (lp->d_secperunit != rs->sc_size) 2384 printf("raid%d: WARNING: %s: " 2385 "total sector size in disklabel (%" PRIu32 ") != " 2386 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname, 2387 lp->d_secperunit, rs->sc_size); 2388 for (i = 0; i < lp->d_npartitions; i++) { 2389 pp = &lp->d_partitions[i]; 2390 if (pp->p_offset + pp->p_size > rs->sc_size) 2391 printf("raid%d: WARNING: %s: end of partition `%c' " 2392 "exceeds the size of raid (%" PRIu64 ")\n", 2393 unit, rs->sc_xname, 'a' + i, rs->sc_size); 2394 } 2395 } 2396 2397 } 2398 /* 2399 * Take care of things one might want to take care of in the event 2400 * that a disklabel isn't present. 2401 */ 2402 static void 2403 raidmakedisklabel(struct raid_softc *rs) 2404 { 2405 struct disklabel *lp = rs->sc_dkdev.dk_label; 2406 db1_printf(("Making a label..\n")); 2407 2408 /* 2409 * For historical reasons, if there's no disklabel present 2410 * the raw partition must be marked FS_BSDFFS. 2411 */ 2412 2413 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2414 2415 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2416 2417 lp->d_checksum = dkcksum(lp); 2418 } 2419 /* 2420 * Wait interruptibly for an exclusive lock. 2421 * 2422 * XXX 2423 * Several drivers do this; it should be abstracted and made MP-safe. 2424 * (Hmm... where have we seen this warning before :-> GO ) 2425 */ 2426 static int 2427 raidlock(struct raid_softc *rs) 2428 { 2429 int error; 2430 2431 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2432 rs->sc_flags |= RAIDF_WANTED; 2433 if ((error = 2434 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2435 return (error); 2436 } 2437 rs->sc_flags |= RAIDF_LOCKED; 2438 return (0); 2439 } 2440 /* 2441 * Unlock and wake up any waiters. 2442 */ 2443 static void 2444 raidunlock(struct raid_softc *rs) 2445 { 2446 2447 rs->sc_flags &= ~RAIDF_LOCKED; 2448 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2449 rs->sc_flags &= ~RAIDF_WANTED; 2450 wakeup(rs); 2451 } 2452 } 2453 2454 2455 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2456 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2457 #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE 2458 2459 static daddr_t 2460 rf_component_info_offset(void) 2461 { 2462 2463 return RF_COMPONENT_INFO_OFFSET; 2464 } 2465 2466 static daddr_t 2467 rf_component_info_size(unsigned secsize) 2468 { 2469 daddr_t info_size; 2470 2471 KASSERT(secsize); 2472 if (secsize > RF_COMPONENT_INFO_SIZE) 2473 info_size = secsize; 2474 else 2475 info_size = RF_COMPONENT_INFO_SIZE; 2476 2477 return info_size; 2478 } 2479 2480 static daddr_t 2481 rf_parity_map_offset(RF_Raid_t *raidPtr) 2482 { 2483 daddr_t map_offset; 2484 2485 KASSERT(raidPtr->bytesPerSector); 2486 if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE) 2487 map_offset = raidPtr->bytesPerSector; 2488 else 2489 map_offset = RF_COMPONENT_INFO_SIZE; 2490 map_offset += rf_component_info_offset(); 2491 2492 return map_offset; 2493 } 2494 2495 static daddr_t 2496 rf_parity_map_size(RF_Raid_t *raidPtr) 2497 { 2498 daddr_t map_size; 2499 2500 if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE) 2501 map_size = raidPtr->bytesPerSector; 2502 else 2503 map_size = RF_PARITY_MAP_SIZE; 2504 2505 return map_size; 2506 } 2507 2508 int 2509 raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col) 2510 { 2511 RF_ComponentLabel_t *clabel; 2512 2513 clabel = raidget_component_label(raidPtr, col); 2514 clabel->clean = RF_RAID_CLEAN; 2515 raidflush_component_label(raidPtr, col); 2516 return(0); 2517 } 2518 2519 2520 int 2521 raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col) 2522 { 2523 RF_ComponentLabel_t *clabel; 2524 2525 clabel = raidget_component_label(raidPtr, col); 2526 clabel->clean = RF_RAID_DIRTY; 2527 raidflush_component_label(raidPtr, col); 2528 return(0); 2529 } 2530 2531 int 2532 raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2533 { 2534 KASSERT(raidPtr->bytesPerSector); 2535 return raidread_component_label(raidPtr->bytesPerSector, 2536 raidPtr->Disks[col].dev, 2537 raidPtr->raid_cinfo[col].ci_vp, 2538 &raidPtr->raid_cinfo[col].ci_label); 2539 } 2540 2541 RF_ComponentLabel_t * 2542 raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2543 { 2544 return &raidPtr->raid_cinfo[col].ci_label; 2545 } 2546 2547 int 2548 raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col) 2549 { 2550 RF_ComponentLabel_t *label; 2551 2552 label = &raidPtr->raid_cinfo[col].ci_label; 2553 label->mod_counter = raidPtr->mod_counter; 2554 #ifndef RF_NO_PARITY_MAP 2555 label->parity_map_modcount = label->mod_counter; 2556 #endif 2557 return raidwrite_component_label(raidPtr->bytesPerSector, 2558 raidPtr->Disks[col].dev, 2559 raidPtr->raid_cinfo[col].ci_vp, label); 2560 } 2561 2562 2563 static int 2564 raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2565 RF_ComponentLabel_t *clabel) 2566 { 2567 return raidread_component_area(dev, b_vp, clabel, 2568 sizeof(RF_ComponentLabel_t), 2569 rf_component_info_offset(), 2570 rf_component_info_size(secsize)); 2571 } 2572 2573 /* ARGSUSED */ 2574 static int 2575 raidread_component_area(dev_t dev, struct vnode *b_vp, void *data, 2576 size_t msize, daddr_t offset, daddr_t dsize) 2577 { 2578 struct buf *bp; 2579 const struct bdevsw *bdev; 2580 int error; 2581 2582 /* XXX should probably ensure that we don't try to do this if 2583 someone has changed rf_protected_sectors. */ 2584 2585 if (b_vp == NULL) { 2586 /* For whatever reason, this component is not valid. 2587 Don't try to read a component label from it. */ 2588 return(EINVAL); 2589 } 2590 2591 /* get a block of the appropriate size... */ 2592 bp = geteblk((int)dsize); 2593 bp->b_dev = dev; 2594 2595 /* get our ducks in a row for the read */ 2596 bp->b_blkno = offset / DEV_BSIZE; 2597 bp->b_bcount = dsize; 2598 bp->b_flags |= B_READ; 2599 bp->b_resid = dsize; 2600 2601 bdev = bdevsw_lookup(bp->b_dev); 2602 if (bdev == NULL) 2603 return (ENXIO); 2604 (*bdev->d_strategy)(bp); 2605 2606 error = biowait(bp); 2607 2608 if (!error) { 2609 memcpy(data, bp->b_data, msize); 2610 } 2611 2612 brelse(bp, 0); 2613 return(error); 2614 } 2615 2616 2617 static int 2618 raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp, 2619 RF_ComponentLabel_t *clabel) 2620 { 2621 return raidwrite_component_area(dev, b_vp, clabel, 2622 sizeof(RF_ComponentLabel_t), 2623 rf_component_info_offset(), 2624 rf_component_info_size(secsize), 0); 2625 } 2626 2627 /* ARGSUSED */ 2628 static int 2629 raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data, 2630 size_t msize, daddr_t offset, daddr_t dsize, int asyncp) 2631 { 2632 struct buf *bp; 2633 const struct bdevsw *bdev; 2634 int error; 2635 2636 /* get a block of the appropriate size... */ 2637 bp = geteblk((int)dsize); 2638 bp->b_dev = dev; 2639 2640 /* get our ducks in a row for the write */ 2641 bp->b_blkno = offset / DEV_BSIZE; 2642 bp->b_bcount = dsize; 2643 bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0); 2644 bp->b_resid = dsize; 2645 2646 memset(bp->b_data, 0, dsize); 2647 memcpy(bp->b_data, data, msize); 2648 2649 bdev = bdevsw_lookup(bp->b_dev); 2650 if (bdev == NULL) 2651 return (ENXIO); 2652 (*bdev->d_strategy)(bp); 2653 if (asyncp) 2654 return 0; 2655 error = biowait(bp); 2656 brelse(bp, 0); 2657 if (error) { 2658 #if 1 2659 printf("Failed to write RAID component info!\n"); 2660 #endif 2661 } 2662 2663 return(error); 2664 } 2665 2666 void 2667 rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2668 { 2669 int c; 2670 2671 for (c = 0; c < raidPtr->numCol; c++) { 2672 /* Skip dead disks. */ 2673 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2674 continue; 2675 /* XXXjld: what if an error occurs here? */ 2676 raidwrite_component_area(raidPtr->Disks[c].dev, 2677 raidPtr->raid_cinfo[c].ci_vp, map, 2678 RF_PARITYMAP_NBYTE, 2679 rf_parity_map_offset(raidPtr), 2680 rf_parity_map_size(raidPtr), 0); 2681 } 2682 } 2683 2684 void 2685 rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map) 2686 { 2687 struct rf_paritymap_ondisk tmp; 2688 int c,first; 2689 2690 first=1; 2691 for (c = 0; c < raidPtr->numCol; c++) { 2692 /* Skip dead disks. */ 2693 if (RF_DEAD_DISK(raidPtr->Disks[c].status)) 2694 continue; 2695 raidread_component_area(raidPtr->Disks[c].dev, 2696 raidPtr->raid_cinfo[c].ci_vp, &tmp, 2697 RF_PARITYMAP_NBYTE, 2698 rf_parity_map_offset(raidPtr), 2699 rf_parity_map_size(raidPtr)); 2700 if (first) { 2701 memcpy(map, &tmp, sizeof(*map)); 2702 first = 0; 2703 } else { 2704 rf_paritymap_merge(map, &tmp); 2705 } 2706 } 2707 } 2708 2709 void 2710 rf_markalldirty(RF_Raid_t *raidPtr) 2711 { 2712 RF_ComponentLabel_t *clabel; 2713 int sparecol; 2714 int c; 2715 int j; 2716 int scol = -1; 2717 2718 raidPtr->mod_counter++; 2719 for (c = 0; c < raidPtr->numCol; c++) { 2720 /* we don't want to touch (at all) a disk that has 2721 failed */ 2722 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2723 clabel = raidget_component_label(raidPtr, c); 2724 if (clabel->status == rf_ds_spared) { 2725 /* XXX do something special... 2726 but whatever you do, don't 2727 try to access it!! */ 2728 } else { 2729 raidmarkdirty(raidPtr, c); 2730 } 2731 } 2732 } 2733 2734 for( c = 0; c < raidPtr->numSpare ; c++) { 2735 sparecol = raidPtr->numCol + c; 2736 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2737 /* 2738 2739 we claim this disk is "optimal" if it's 2740 rf_ds_used_spare, as that means it should be 2741 directly substitutable for the disk it replaced. 2742 We note that too... 2743 2744 */ 2745 2746 for(j=0;j<raidPtr->numCol;j++) { 2747 if (raidPtr->Disks[j].spareCol == sparecol) { 2748 scol = j; 2749 break; 2750 } 2751 } 2752 2753 clabel = raidget_component_label(raidPtr, sparecol); 2754 /* make sure status is noted */ 2755 2756 raid_init_component_label(raidPtr, clabel); 2757 2758 clabel->row = 0; 2759 clabel->column = scol; 2760 /* Note: we *don't* change status from rf_ds_used_spare 2761 to rf_ds_optimal */ 2762 /* clabel.status = rf_ds_optimal; */ 2763 2764 raidmarkdirty(raidPtr, sparecol); 2765 } 2766 } 2767 } 2768 2769 2770 void 2771 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2772 { 2773 RF_ComponentLabel_t *clabel; 2774 int sparecol; 2775 int c; 2776 int j; 2777 int scol; 2778 2779 scol = -1; 2780 2781 /* XXX should do extra checks to make sure things really are clean, 2782 rather than blindly setting the clean bit... */ 2783 2784 raidPtr->mod_counter++; 2785 2786 for (c = 0; c < raidPtr->numCol; c++) { 2787 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2788 clabel = raidget_component_label(raidPtr, c); 2789 /* make sure status is noted */ 2790 clabel->status = rf_ds_optimal; 2791 2792 /* note what unit we are configured as */ 2793 clabel->last_unit = raidPtr->raidid; 2794 2795 raidflush_component_label(raidPtr, c); 2796 if (final == RF_FINAL_COMPONENT_UPDATE) { 2797 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2798 raidmarkclean(raidPtr, c); 2799 } 2800 } 2801 } 2802 /* else we don't touch it.. */ 2803 } 2804 2805 for( c = 0; c < raidPtr->numSpare ; c++) { 2806 sparecol = raidPtr->numCol + c; 2807 /* Need to ensure that the reconstruct actually completed! */ 2808 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2809 /* 2810 2811 we claim this disk is "optimal" if it's 2812 rf_ds_used_spare, as that means it should be 2813 directly substitutable for the disk it replaced. 2814 We note that too... 2815 2816 */ 2817 2818 for(j=0;j<raidPtr->numCol;j++) { 2819 if (raidPtr->Disks[j].spareCol == sparecol) { 2820 scol = j; 2821 break; 2822 } 2823 } 2824 2825 /* XXX shouldn't *really* need this... */ 2826 clabel = raidget_component_label(raidPtr, sparecol); 2827 /* make sure status is noted */ 2828 2829 raid_init_component_label(raidPtr, clabel); 2830 2831 clabel->column = scol; 2832 clabel->status = rf_ds_optimal; 2833 clabel->last_unit = raidPtr->raidid; 2834 2835 raidflush_component_label(raidPtr, sparecol); 2836 if (final == RF_FINAL_COMPONENT_UPDATE) { 2837 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2838 raidmarkclean(raidPtr, sparecol); 2839 } 2840 } 2841 } 2842 } 2843 } 2844 2845 void 2846 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2847 { 2848 2849 if (vp != NULL) { 2850 if (auto_configured == 1) { 2851 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2852 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2853 vput(vp); 2854 2855 } else { 2856 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2857 } 2858 } 2859 } 2860 2861 2862 void 2863 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2864 { 2865 int r,c; 2866 struct vnode *vp; 2867 int acd; 2868 2869 2870 /* We take this opportunity to close the vnodes like we should.. */ 2871 2872 for (c = 0; c < raidPtr->numCol; c++) { 2873 vp = raidPtr->raid_cinfo[c].ci_vp; 2874 acd = raidPtr->Disks[c].auto_configured; 2875 rf_close_component(raidPtr, vp, acd); 2876 raidPtr->raid_cinfo[c].ci_vp = NULL; 2877 raidPtr->Disks[c].auto_configured = 0; 2878 } 2879 2880 for (r = 0; r < raidPtr->numSpare; r++) { 2881 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2882 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2883 rf_close_component(raidPtr, vp, acd); 2884 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2885 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2886 } 2887 } 2888 2889 2890 void 2891 rf_ReconThread(struct rf_recon_req *req) 2892 { 2893 int s; 2894 RF_Raid_t *raidPtr; 2895 2896 s = splbio(); 2897 raidPtr = (RF_Raid_t *) req->raidPtr; 2898 raidPtr->recon_in_progress = 1; 2899 2900 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2901 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2902 2903 RF_Free(req, sizeof(*req)); 2904 2905 raidPtr->recon_in_progress = 0; 2906 splx(s); 2907 2908 /* That's all... */ 2909 kthread_exit(0); /* does not return */ 2910 } 2911 2912 void 2913 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2914 { 2915 int retcode; 2916 int s; 2917 2918 raidPtr->parity_rewrite_stripes_done = 0; 2919 raidPtr->parity_rewrite_in_progress = 1; 2920 s = splbio(); 2921 retcode = rf_RewriteParity(raidPtr); 2922 splx(s); 2923 if (retcode) { 2924 printf("raid%d: Error re-writing parity (%d)!\n", 2925 raidPtr->raidid, retcode); 2926 } else { 2927 /* set the clean bit! If we shutdown correctly, 2928 the clean bit on each component label will get 2929 set */ 2930 raidPtr->parity_good = RF_RAID_CLEAN; 2931 } 2932 raidPtr->parity_rewrite_in_progress = 0; 2933 2934 /* Anyone waiting for us to stop? If so, inform them... */ 2935 if (raidPtr->waitShutdown) { 2936 wakeup(&raidPtr->parity_rewrite_in_progress); 2937 } 2938 2939 /* That's all... */ 2940 kthread_exit(0); /* does not return */ 2941 } 2942 2943 2944 void 2945 rf_CopybackThread(RF_Raid_t *raidPtr) 2946 { 2947 int s; 2948 2949 raidPtr->copyback_in_progress = 1; 2950 s = splbio(); 2951 rf_CopybackReconstructedData(raidPtr); 2952 splx(s); 2953 raidPtr->copyback_in_progress = 0; 2954 2955 /* That's all... */ 2956 kthread_exit(0); /* does not return */ 2957 } 2958 2959 2960 void 2961 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2962 { 2963 int s; 2964 RF_Raid_t *raidPtr; 2965 2966 s = splbio(); 2967 raidPtr = req->raidPtr; 2968 raidPtr->recon_in_progress = 1; 2969 rf_ReconstructInPlace(raidPtr, req->col); 2970 RF_Free(req, sizeof(*req)); 2971 raidPtr->recon_in_progress = 0; 2972 splx(s); 2973 2974 /* That's all... */ 2975 kthread_exit(0); /* does not return */ 2976 } 2977 2978 static RF_AutoConfig_t * 2979 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2980 const char *cname, RF_SectorCount_t size, uint64_t numsecs, 2981 unsigned secsize) 2982 { 2983 int good_one = 0; 2984 RF_ComponentLabel_t *clabel; 2985 RF_AutoConfig_t *ac; 2986 2987 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 2988 if (clabel == NULL) { 2989 oomem: 2990 while(ac_list) { 2991 ac = ac_list; 2992 if (ac->clabel) 2993 free(ac->clabel, M_RAIDFRAME); 2994 ac_list = ac_list->next; 2995 free(ac, M_RAIDFRAME); 2996 } 2997 printf("RAID auto config: out of memory!\n"); 2998 return NULL; /* XXX probably should panic? */ 2999 } 3000 3001 if (!raidread_component_label(secsize, dev, vp, clabel)) { 3002 /* Got the label. Does it look reasonable? */ 3003 if (rf_reasonable_label(clabel, numsecs) && 3004 (rf_component_label_partitionsize(clabel) <= size)) { 3005 #ifdef DEBUG 3006 printf("Component on: %s: %llu\n", 3007 cname, (unsigned long long)size); 3008 rf_print_component_label(clabel); 3009 #endif 3010 /* if it's reasonable, add it, else ignore it. */ 3011 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 3012 M_NOWAIT); 3013 if (ac == NULL) { 3014 free(clabel, M_RAIDFRAME); 3015 goto oomem; 3016 } 3017 strlcpy(ac->devname, cname, sizeof(ac->devname)); 3018 ac->dev = dev; 3019 ac->vp = vp; 3020 ac->clabel = clabel; 3021 ac->next = ac_list; 3022 ac_list = ac; 3023 good_one = 1; 3024 } 3025 } 3026 if (!good_one) { 3027 /* cleanup */ 3028 free(clabel, M_RAIDFRAME); 3029 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3030 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3031 vput(vp); 3032 } 3033 return ac_list; 3034 } 3035 3036 RF_AutoConfig_t * 3037 rf_find_raid_components(void) 3038 { 3039 struct vnode *vp; 3040 struct disklabel label; 3041 device_t dv; 3042 deviter_t di; 3043 dev_t dev; 3044 int bmajor, bminor, wedge, rf_part_found; 3045 int error; 3046 int i; 3047 RF_AutoConfig_t *ac_list; 3048 uint64_t numsecs; 3049 unsigned secsize; 3050 3051 /* initialize the AutoConfig list */ 3052 ac_list = NULL; 3053 3054 /* we begin by trolling through *all* the devices on the system */ 3055 3056 for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL; 3057 dv = deviter_next(&di)) { 3058 3059 /* we are only interested in disks... */ 3060 if (device_class(dv) != DV_DISK) 3061 continue; 3062 3063 /* we don't care about floppies... */ 3064 if (device_is_a(dv, "fd")) { 3065 continue; 3066 } 3067 3068 /* we don't care about CD's... */ 3069 if (device_is_a(dv, "cd")) { 3070 continue; 3071 } 3072 3073 /* we don't care about md's... */ 3074 if (device_is_a(dv, "md")) { 3075 continue; 3076 } 3077 3078 /* hdfd is the Atari/Hades floppy driver */ 3079 if (device_is_a(dv, "hdfd")) { 3080 continue; 3081 } 3082 3083 /* fdisa is the Atari/Milan floppy driver */ 3084 if (device_is_a(dv, "fdisa")) { 3085 continue; 3086 } 3087 3088 /* need to find the device_name_to_block_device_major stuff */ 3089 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 3090 3091 rf_part_found = 0; /*No raid partition as yet*/ 3092 3093 /* get a vnode for the raw partition of this disk */ 3094 3095 wedge = device_is_a(dv, "dk"); 3096 bminor = minor(device_unit(dv)); 3097 dev = wedge ? makedev(bmajor, bminor) : 3098 MAKEDISKDEV(bmajor, bminor, RAW_PART); 3099 if (bdevvp(dev, &vp)) 3100 panic("RAID can't alloc vnode"); 3101 3102 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 3103 3104 if (error) { 3105 /* "Who cares." Continue looking 3106 for something that exists*/ 3107 vput(vp); 3108 continue; 3109 } 3110 3111 error = getdisksize(vp, &numsecs, &secsize); 3112 if (error) { 3113 vput(vp); 3114 continue; 3115 } 3116 if (wedge) { 3117 struct dkwedge_info dkw; 3118 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 3119 NOCRED); 3120 if (error) { 3121 printf("RAIDframe: can't get wedge info for " 3122 "dev %s (%d)\n", device_xname(dv), error); 3123 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3124 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3125 vput(vp); 3126 continue; 3127 } 3128 3129 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 3130 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3131 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3132 vput(vp); 3133 continue; 3134 } 3135 3136 ac_list = rf_get_component(ac_list, dev, vp, 3137 device_xname(dv), dkw.dkw_size, numsecs, secsize); 3138 rf_part_found = 1; /*There is a raid component on this disk*/ 3139 continue; 3140 } 3141 3142 /* Ok, the disk exists. Go get the disklabel. */ 3143 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 3144 if (error) { 3145 /* 3146 * XXX can't happen - open() would 3147 * have errored out (or faked up one) 3148 */ 3149 if (error != ENOTTY) 3150 printf("RAIDframe: can't get label for dev " 3151 "%s (%d)\n", device_xname(dv), error); 3152 } 3153 3154 /* don't need this any more. We'll allocate it again 3155 a little later if we really do... */ 3156 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3157 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 3158 vput(vp); 3159 3160 if (error) 3161 continue; 3162 3163 rf_part_found = 0; /*No raid partitions yet*/ 3164 for (i = 0; i < label.d_npartitions; i++) { 3165 char cname[sizeof(ac_list->devname)]; 3166 3167 /* We only support partitions marked as RAID */ 3168 if (label.d_partitions[i].p_fstype != FS_RAID) 3169 continue; 3170 3171 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 3172 if (bdevvp(dev, &vp)) 3173 panic("RAID can't alloc vnode"); 3174 3175 error = VOP_OPEN(vp, FREAD, NOCRED); 3176 if (error) { 3177 /* Whatever... */ 3178 vput(vp); 3179 continue; 3180 } 3181 snprintf(cname, sizeof(cname), "%s%c", 3182 device_xname(dv), 'a' + i); 3183 ac_list = rf_get_component(ac_list, dev, vp, cname, 3184 label.d_partitions[i].p_size, numsecs, secsize); 3185 rf_part_found = 1; /*There is at least one raid partition on this disk*/ 3186 } 3187 3188 /* 3189 *If there is no raid component on this disk, either in a 3190 *disklabel or inside a wedge, check the raw partition as well, 3191 *as it is possible to configure raid components on raw disk 3192 *devices. 3193 */ 3194 3195 if (!rf_part_found) { 3196 char cname[sizeof(ac_list->devname)]; 3197 3198 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 3199 if (bdevvp(dev, &vp)) 3200 panic("RAID can't alloc vnode"); 3201 3202 error = VOP_OPEN(vp, FREAD, NOCRED); 3203 if (error) { 3204 /* Whatever... */ 3205 vput(vp); 3206 continue; 3207 } 3208 snprintf(cname, sizeof(cname), "%s%c", 3209 device_xname(dv), 'a' + RAW_PART); 3210 ac_list = rf_get_component(ac_list, dev, vp, cname, 3211 label.d_partitions[RAW_PART].p_size, numsecs, secsize); 3212 } 3213 } 3214 deviter_release(&di); 3215 return ac_list; 3216 } 3217 3218 3219 int 3220 rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3221 { 3222 3223 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 3224 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 3225 ((clabel->clean == RF_RAID_CLEAN) || 3226 (clabel->clean == RF_RAID_DIRTY)) && 3227 clabel->row >=0 && 3228 clabel->column >= 0 && 3229 clabel->num_rows > 0 && 3230 clabel->num_columns > 0 && 3231 clabel->row < clabel->num_rows && 3232 clabel->column < clabel->num_columns && 3233 clabel->blockSize > 0 && 3234 /* 3235 * numBlocksHi may contain garbage, but it is ok since 3236 * the type is unsigned. If it is really garbage, 3237 * rf_fix_old_label_size() will fix it. 3238 */ 3239 rf_component_label_numblocks(clabel) > 0) { 3240 /* 3241 * label looks reasonable enough... 3242 * let's make sure it has no old garbage. 3243 */ 3244 if (numsecs) 3245 rf_fix_old_label_size(clabel, numsecs); 3246 return(1); 3247 } 3248 return(0); 3249 } 3250 3251 3252 /* 3253 * For reasons yet unknown, some old component labels have garbage in 3254 * the newer numBlocksHi region, and this causes lossage. Since those 3255 * disks will also have numsecs set to less than 32 bits of sectors, 3256 * we can determine when this corruption has occurred, and fix it. 3257 * 3258 * The exact same problem, with the same unknown reason, happens to 3259 * the partitionSizeHi member as well. 3260 */ 3261 static void 3262 rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs) 3263 { 3264 3265 if (numsecs < ((uint64_t)1 << 32)) { 3266 if (clabel->numBlocksHi) { 3267 printf("WARNING: total sectors < 32 bits, yet " 3268 "numBlocksHi set\n" 3269 "WARNING: resetting numBlocksHi to zero.\n"); 3270 clabel->numBlocksHi = 0; 3271 } 3272 3273 if (clabel->partitionSizeHi) { 3274 printf("WARNING: total sectors < 32 bits, yet " 3275 "partitionSizeHi set\n" 3276 "WARNING: resetting partitionSizeHi to zero.\n"); 3277 clabel->partitionSizeHi = 0; 3278 } 3279 } 3280 } 3281 3282 3283 #ifdef DEBUG 3284 void 3285 rf_print_component_label(RF_ComponentLabel_t *clabel) 3286 { 3287 uint64_t numBlocks; 3288 3289 numBlocks = rf_component_label_numblocks(clabel); 3290 3291 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3292 clabel->row, clabel->column, 3293 clabel->num_rows, clabel->num_columns); 3294 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3295 clabel->version, clabel->serial_number, 3296 clabel->mod_counter); 3297 printf(" Clean: %s Status: %d\n", 3298 clabel->clean ? "Yes" : "No", clabel->status); 3299 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3300 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3301 printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n", 3302 (char) clabel->parityConfig, clabel->blockSize, numBlocks); 3303 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No"); 3304 printf(" Contains root partition: %s\n", 3305 clabel->root_partition ? "Yes" : "No"); 3306 printf(" Last configured as: raid%d\n", clabel->last_unit); 3307 #if 0 3308 printf(" Config order: %d\n", clabel->config_order); 3309 #endif 3310 3311 } 3312 #endif 3313 3314 RF_ConfigSet_t * 3315 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3316 { 3317 RF_AutoConfig_t *ac; 3318 RF_ConfigSet_t *config_sets; 3319 RF_ConfigSet_t *cset; 3320 RF_AutoConfig_t *ac_next; 3321 3322 3323 config_sets = NULL; 3324 3325 /* Go through the AutoConfig list, and figure out which components 3326 belong to what sets. */ 3327 ac = ac_list; 3328 while(ac!=NULL) { 3329 /* we're going to putz with ac->next, so save it here 3330 for use at the end of the loop */ 3331 ac_next = ac->next; 3332 3333 if (config_sets == NULL) { 3334 /* will need at least this one... */ 3335 config_sets = (RF_ConfigSet_t *) 3336 malloc(sizeof(RF_ConfigSet_t), 3337 M_RAIDFRAME, M_NOWAIT); 3338 if (config_sets == NULL) { 3339 panic("rf_create_auto_sets: No memory!"); 3340 } 3341 /* this one is easy :) */ 3342 config_sets->ac = ac; 3343 config_sets->next = NULL; 3344 config_sets->rootable = 0; 3345 ac->next = NULL; 3346 } else { 3347 /* which set does this component fit into? */ 3348 cset = config_sets; 3349 while(cset!=NULL) { 3350 if (rf_does_it_fit(cset, ac)) { 3351 /* looks like it matches... */ 3352 ac->next = cset->ac; 3353 cset->ac = ac; 3354 break; 3355 } 3356 cset = cset->next; 3357 } 3358 if (cset==NULL) { 3359 /* didn't find a match above... new set..*/ 3360 cset = (RF_ConfigSet_t *) 3361 malloc(sizeof(RF_ConfigSet_t), 3362 M_RAIDFRAME, M_NOWAIT); 3363 if (cset == NULL) { 3364 panic("rf_create_auto_sets: No memory!"); 3365 } 3366 cset->ac = ac; 3367 ac->next = NULL; 3368 cset->next = config_sets; 3369 cset->rootable = 0; 3370 config_sets = cset; 3371 } 3372 } 3373 ac = ac_next; 3374 } 3375 3376 3377 return(config_sets); 3378 } 3379 3380 static int 3381 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3382 { 3383 RF_ComponentLabel_t *clabel1, *clabel2; 3384 3385 /* If this one matches the *first* one in the set, that's good 3386 enough, since the other members of the set would have been 3387 through here too... */ 3388 /* note that we are not checking partitionSize here.. 3389 3390 Note that we are also not checking the mod_counters here. 3391 If everything else matches except the mod_counter, that's 3392 good enough for this test. We will deal with the mod_counters 3393 a little later in the autoconfiguration process. 3394 3395 (clabel1->mod_counter == clabel2->mod_counter) && 3396 3397 The reason we don't check for this is that failed disks 3398 will have lower modification counts. If those disks are 3399 not added to the set they used to belong to, then they will 3400 form their own set, which may result in 2 different sets, 3401 for example, competing to be configured at raid0, and 3402 perhaps competing to be the root filesystem set. If the 3403 wrong ones get configured, or both attempt to become /, 3404 weird behaviour and or serious lossage will occur. Thus we 3405 need to bring them into the fold here, and kick them out at 3406 a later point. 3407 3408 */ 3409 3410 clabel1 = cset->ac->clabel; 3411 clabel2 = ac->clabel; 3412 if ((clabel1->version == clabel2->version) && 3413 (clabel1->serial_number == clabel2->serial_number) && 3414 (clabel1->num_rows == clabel2->num_rows) && 3415 (clabel1->num_columns == clabel2->num_columns) && 3416 (clabel1->sectPerSU == clabel2->sectPerSU) && 3417 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3418 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3419 (clabel1->parityConfig == clabel2->parityConfig) && 3420 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3421 (clabel1->blockSize == clabel2->blockSize) && 3422 rf_component_label_numblocks(clabel1) == 3423 rf_component_label_numblocks(clabel2) && 3424 (clabel1->autoconfigure == clabel2->autoconfigure) && 3425 (clabel1->root_partition == clabel2->root_partition) && 3426 (clabel1->last_unit == clabel2->last_unit) && 3427 (clabel1->config_order == clabel2->config_order)) { 3428 /* if it get's here, it almost *has* to be a match */ 3429 } else { 3430 /* it's not consistent with somebody in the set.. 3431 punt */ 3432 return(0); 3433 } 3434 /* all was fine.. it must fit... */ 3435 return(1); 3436 } 3437 3438 int 3439 rf_have_enough_components(RF_ConfigSet_t *cset) 3440 { 3441 RF_AutoConfig_t *ac; 3442 RF_AutoConfig_t *auto_config; 3443 RF_ComponentLabel_t *clabel; 3444 int c; 3445 int num_cols; 3446 int num_missing; 3447 int mod_counter; 3448 int mod_counter_found; 3449 int even_pair_failed; 3450 char parity_type; 3451 3452 3453 /* check to see that we have enough 'live' components 3454 of this set. If so, we can configure it if necessary */ 3455 3456 num_cols = cset->ac->clabel->num_columns; 3457 parity_type = cset->ac->clabel->parityConfig; 3458 3459 /* XXX Check for duplicate components!?!?!? */ 3460 3461 /* Determine what the mod_counter is supposed to be for this set. */ 3462 3463 mod_counter_found = 0; 3464 mod_counter = 0; 3465 ac = cset->ac; 3466 while(ac!=NULL) { 3467 if (mod_counter_found==0) { 3468 mod_counter = ac->clabel->mod_counter; 3469 mod_counter_found = 1; 3470 } else { 3471 if (ac->clabel->mod_counter > mod_counter) { 3472 mod_counter = ac->clabel->mod_counter; 3473 } 3474 } 3475 ac = ac->next; 3476 } 3477 3478 num_missing = 0; 3479 auto_config = cset->ac; 3480 3481 even_pair_failed = 0; 3482 for(c=0; c<num_cols; c++) { 3483 ac = auto_config; 3484 while(ac!=NULL) { 3485 if ((ac->clabel->column == c) && 3486 (ac->clabel->mod_counter == mod_counter)) { 3487 /* it's this one... */ 3488 #ifdef DEBUG 3489 printf("Found: %s at %d\n", 3490 ac->devname,c); 3491 #endif 3492 break; 3493 } 3494 ac=ac->next; 3495 } 3496 if (ac==NULL) { 3497 /* Didn't find one here! */ 3498 /* special case for RAID 1, especially 3499 where there are more than 2 3500 components (where RAIDframe treats 3501 things a little differently :( ) */ 3502 if (parity_type == '1') { 3503 if (c%2 == 0) { /* even component */ 3504 even_pair_failed = 1; 3505 } else { /* odd component. If 3506 we're failed, and 3507 so is the even 3508 component, it's 3509 "Good Night, Charlie" */ 3510 if (even_pair_failed == 1) { 3511 return(0); 3512 } 3513 } 3514 } else { 3515 /* normal accounting */ 3516 num_missing++; 3517 } 3518 } 3519 if ((parity_type == '1') && (c%2 == 1)) { 3520 /* Just did an even component, and we didn't 3521 bail.. reset the even_pair_failed flag, 3522 and go on to the next component.... */ 3523 even_pair_failed = 0; 3524 } 3525 } 3526 3527 clabel = cset->ac->clabel; 3528 3529 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3530 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3531 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3532 /* XXX this needs to be made *much* more general */ 3533 /* Too many failures */ 3534 return(0); 3535 } 3536 /* otherwise, all is well, and we've got enough to take a kick 3537 at autoconfiguring this set */ 3538 return(1); 3539 } 3540 3541 void 3542 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3543 RF_Raid_t *raidPtr) 3544 { 3545 RF_ComponentLabel_t *clabel; 3546 int i; 3547 3548 clabel = ac->clabel; 3549 3550 /* 1. Fill in the common stuff */ 3551 config->numRow = clabel->num_rows = 1; 3552 config->numCol = clabel->num_columns; 3553 config->numSpare = 0; /* XXX should this be set here? */ 3554 config->sectPerSU = clabel->sectPerSU; 3555 config->SUsPerPU = clabel->SUsPerPU; 3556 config->SUsPerRU = clabel->SUsPerRU; 3557 config->parityConfig = clabel->parityConfig; 3558 /* XXX... */ 3559 strcpy(config->diskQueueType,"fifo"); 3560 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3561 config->layoutSpecificSize = 0; /* XXX ?? */ 3562 3563 while(ac!=NULL) { 3564 /* row/col values will be in range due to the checks 3565 in reasonable_label() */ 3566 strcpy(config->devnames[0][ac->clabel->column], 3567 ac->devname); 3568 ac = ac->next; 3569 } 3570 3571 for(i=0;i<RF_MAXDBGV;i++) { 3572 config->debugVars[i][0] = 0; 3573 } 3574 } 3575 3576 int 3577 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3578 { 3579 RF_ComponentLabel_t *clabel; 3580 int column; 3581 int sparecol; 3582 3583 raidPtr->autoconfigure = new_value; 3584 3585 for(column=0; column<raidPtr->numCol; column++) { 3586 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3587 clabel = raidget_component_label(raidPtr, column); 3588 clabel->autoconfigure = new_value; 3589 raidflush_component_label(raidPtr, column); 3590 } 3591 } 3592 for(column = 0; column < raidPtr->numSpare ; column++) { 3593 sparecol = raidPtr->numCol + column; 3594 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3595 clabel = raidget_component_label(raidPtr, sparecol); 3596 clabel->autoconfigure = new_value; 3597 raidflush_component_label(raidPtr, sparecol); 3598 } 3599 } 3600 return(new_value); 3601 } 3602 3603 int 3604 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3605 { 3606 RF_ComponentLabel_t *clabel; 3607 int column; 3608 int sparecol; 3609 3610 raidPtr->root_partition = new_value; 3611 for(column=0; column<raidPtr->numCol; column++) { 3612 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3613 clabel = raidget_component_label(raidPtr, column); 3614 clabel->root_partition = new_value; 3615 raidflush_component_label(raidPtr, column); 3616 } 3617 } 3618 for(column = 0; column < raidPtr->numSpare ; column++) { 3619 sparecol = raidPtr->numCol + column; 3620 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3621 clabel = raidget_component_label(raidPtr, sparecol); 3622 clabel->root_partition = new_value; 3623 raidflush_component_label(raidPtr, sparecol); 3624 } 3625 } 3626 return(new_value); 3627 } 3628 3629 void 3630 rf_release_all_vps(RF_ConfigSet_t *cset) 3631 { 3632 RF_AutoConfig_t *ac; 3633 3634 ac = cset->ac; 3635 while(ac!=NULL) { 3636 /* Close the vp, and give it back */ 3637 if (ac->vp) { 3638 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3639 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3640 vput(ac->vp); 3641 ac->vp = NULL; 3642 } 3643 ac = ac->next; 3644 } 3645 } 3646 3647 3648 void 3649 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3650 { 3651 RF_AutoConfig_t *ac; 3652 RF_AutoConfig_t *next_ac; 3653 3654 ac = cset->ac; 3655 while(ac!=NULL) { 3656 next_ac = ac->next; 3657 /* nuke the label */ 3658 free(ac->clabel, M_RAIDFRAME); 3659 /* cleanup the config structure */ 3660 free(ac, M_RAIDFRAME); 3661 /* "next.." */ 3662 ac = next_ac; 3663 } 3664 /* and, finally, nuke the config set */ 3665 free(cset, M_RAIDFRAME); 3666 } 3667 3668 3669 void 3670 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3671 { 3672 /* current version number */ 3673 clabel->version = RF_COMPONENT_LABEL_VERSION; 3674 clabel->serial_number = raidPtr->serial_number; 3675 clabel->mod_counter = raidPtr->mod_counter; 3676 3677 clabel->num_rows = 1; 3678 clabel->num_columns = raidPtr->numCol; 3679 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3680 clabel->status = rf_ds_optimal; /* "It's good!" */ 3681 3682 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3683 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3684 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3685 3686 clabel->blockSize = raidPtr->bytesPerSector; 3687 rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk); 3688 3689 /* XXX not portable */ 3690 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3691 clabel->maxOutstanding = raidPtr->maxOutstanding; 3692 clabel->autoconfigure = raidPtr->autoconfigure; 3693 clabel->root_partition = raidPtr->root_partition; 3694 clabel->last_unit = raidPtr->raidid; 3695 clabel->config_order = raidPtr->config_order; 3696 3697 #ifndef RF_NO_PARITY_MAP 3698 rf_paritymap_init_label(raidPtr->parity_map, clabel); 3699 #endif 3700 } 3701 3702 struct raid_softc * 3703 rf_auto_config_set(RF_ConfigSet_t *cset) 3704 { 3705 RF_Raid_t *raidPtr; 3706 RF_Config_t *config; 3707 int raidID; 3708 struct raid_softc *sc; 3709 3710 #ifdef DEBUG 3711 printf("RAID autoconfigure\n"); 3712 #endif 3713 3714 /* 1. Create a config structure */ 3715 config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO); 3716 if (config == NULL) { 3717 printf("Out of mem!?!?\n"); 3718 /* XXX do something more intelligent here. */ 3719 return NULL; 3720 } 3721 3722 /* 3723 2. Figure out what RAID ID this one is supposed to live at 3724 See if we can get the same RAID dev that it was configured 3725 on last time.. 3726 */ 3727 3728 raidID = cset->ac->clabel->last_unit; 3729 for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID)) 3730 continue; 3731 #ifdef DEBUG 3732 printf("Configuring raid%d:\n",raidID); 3733 #endif 3734 3735 raidPtr = &sc->sc_r; 3736 3737 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3738 raidPtr->softc = sc; 3739 raidPtr->raidid = raidID; 3740 raidPtr->openings = RAIDOUTSTANDING; 3741 3742 /* 3. Build the configuration structure */ 3743 rf_create_configuration(cset->ac, config, raidPtr); 3744 3745 /* 4. Do the configuration */ 3746 if (rf_Configure(raidPtr, config, cset->ac) == 0) { 3747 raidinit(sc); 3748 3749 rf_markalldirty(raidPtr); 3750 raidPtr->autoconfigure = 1; /* XXX do this here? */ 3751 if (cset->ac->clabel->root_partition==1) { 3752 /* everything configured just fine. Make a note 3753 that this set is eligible to be root. */ 3754 cset->rootable = 1; 3755 /* XXX do this here? */ 3756 raidPtr->root_partition = 1; 3757 } 3758 } else { 3759 raidput(sc); 3760 sc = NULL; 3761 } 3762 3763 /* 5. Cleanup */ 3764 free(config, M_RAIDFRAME); 3765 return sc; 3766 } 3767 3768 void 3769 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3770 { 3771 struct buf *bp; 3772 struct raid_softc *rs; 3773 3774 bp = (struct buf *)desc->bp; 3775 rs = desc->raidPtr->softc; 3776 disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid), 3777 (bp->b_flags & B_READ)); 3778 } 3779 3780 void 3781 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3782 size_t xmin, size_t xmax) 3783 { 3784 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3785 pool_sethiwat(p, xmax); 3786 pool_prime(p, xmin); 3787 pool_setlowat(p, xmin); 3788 } 3789 3790 /* 3791 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see 3792 * if there is IO pending and if that IO could possibly be done for a 3793 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3794 * otherwise. 3795 * 3796 */ 3797 3798 int 3799 rf_buf_queue_check(RF_Raid_t *raidPtr) 3800 { 3801 struct raid_softc *rs = raidPtr->softc; 3802 if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) { 3803 /* there is work to do */ 3804 return 0; 3805 } 3806 /* default is nothing to do */ 3807 return 1; 3808 } 3809 3810 int 3811 rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr) 3812 { 3813 uint64_t numsecs; 3814 unsigned secsize; 3815 int error; 3816 3817 error = getdisksize(vp, &numsecs, &secsize); 3818 if (error == 0) { 3819 diskPtr->blockSize = secsize; 3820 diskPtr->numBlocks = numsecs - rf_protectedSectors; 3821 diskPtr->partitionSize = numsecs; 3822 return 0; 3823 } 3824 return error; 3825 } 3826 3827 static int 3828 raid_match(device_t self, cfdata_t cfdata, void *aux) 3829 { 3830 return 1; 3831 } 3832 3833 static void 3834 raid_attach(device_t parent, device_t self, void *aux) 3835 { 3836 3837 } 3838 3839 3840 static int 3841 raid_detach(device_t self, int flags) 3842 { 3843 int error; 3844 struct raid_softc *rs = raidget(device_unit(self)); 3845 3846 if (rs == NULL) 3847 return ENXIO; 3848 3849 if ((error = raidlock(rs)) != 0) 3850 return (error); 3851 3852 error = raid_detach_unlocked(rs); 3853 3854 raidunlock(rs); 3855 3856 /* XXXkd: raidput(rs) ??? */ 3857 3858 return error; 3859 } 3860 3861 static void 3862 rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr) 3863 { 3864 struct disk_geom *dg = &rs->sc_dkdev.dk_geom; 3865 3866 memset(dg, 0, sizeof(*dg)); 3867 3868 dg->dg_secperunit = raidPtr->totalSectors; 3869 dg->dg_secsize = raidPtr->bytesPerSector; 3870 dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe; 3871 dg->dg_ntracks = 4 * raidPtr->numCol; 3872 3873 disk_set_info(rs->sc_dev, &rs->sc_dkdev, NULL); 3874 } 3875 3876 /* 3877 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3878 * We end up returning whatever error was returned by the first cache flush 3879 * that fails. 3880 */ 3881 3882 int 3883 rf_sync_component_caches(RF_Raid_t *raidPtr) 3884 { 3885 int c, sparecol; 3886 int e,error; 3887 int force = 1; 3888 3889 error = 0; 3890 for (c = 0; c < raidPtr->numCol; c++) { 3891 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3892 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3893 &force, FWRITE, NOCRED); 3894 if (e) { 3895 if (e != ENODEV) 3896 printf("raid%d: cache flush to component %s failed.\n", 3897 raidPtr->raidid, raidPtr->Disks[c].devname); 3898 if (error == 0) { 3899 error = e; 3900 } 3901 } 3902 } 3903 } 3904 3905 for( c = 0; c < raidPtr->numSpare ; c++) { 3906 sparecol = raidPtr->numCol + c; 3907 /* Need to ensure that the reconstruct actually completed! */ 3908 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3909 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3910 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3911 if (e) { 3912 if (e != ENODEV) 3913 printf("raid%d: cache flush to component %s failed.\n", 3914 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3915 if (error == 0) { 3916 error = e; 3917 } 3918 } 3919 } 3920 } 3921 return error; 3922 } 3923