1 /* $NetBSD: rf_netbsdkintf.c,v 1.267 2009/10/13 22:46:28 pooka Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 1990, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * This code is derived from software contributed to Berkeley by 36 * the Systems Programming Group of the University of Utah Computer 37 * Science Department. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 64 * 65 * @(#)cd.c 8.2 (Berkeley) 11/16/93 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * 71 * This code is derived from software contributed to Berkeley by 72 * the Systems Programming Group of the University of Utah Computer 73 * Science Department. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 3. All advertising materials mentioning features or use of this software 84 * must display the following acknowledgement: 85 * This product includes software developed by the University of 86 * California, Berkeley and its contributors. 87 * 4. Neither the name of the University nor the names of its contributors 88 * may be used to endorse or promote products derived from this software 89 * without specific prior written permission. 90 * 91 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 94 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 95 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 96 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 97 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 98 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 99 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 100 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 101 * SUCH DAMAGE. 102 * 103 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 104 * 105 * @(#)cd.c 8.2 (Berkeley) 11/16/93 106 */ 107 108 /* 109 * Copyright (c) 1995 Carnegie-Mellon University. 110 * All rights reserved. 111 * 112 * Authors: Mark Holland, Jim Zelenka 113 * 114 * Permission to use, copy, modify and distribute this software and 115 * its documentation is hereby granted, provided that both the copyright 116 * notice and this permission notice appear in all copies of the 117 * software, derivative works or modified versions, and any portions 118 * thereof, and that both notices appear in supporting documentation. 119 * 120 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 121 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 122 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 123 * 124 * Carnegie Mellon requests users of this software to return to 125 * 126 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 127 * School of Computer Science 128 * Carnegie Mellon University 129 * Pittsburgh PA 15213-3890 130 * 131 * any improvements or extensions that they make and grant Carnegie the 132 * rights to redistribute these changes. 133 */ 134 135 /*********************************************************** 136 * 137 * rf_kintf.c -- the kernel interface routines for RAIDframe 138 * 139 ***********************************************************/ 140 141 #include <sys/cdefs.h> 142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.267 2009/10/13 22:46:28 pooka Exp $"); 143 144 #ifdef _KERNEL_OPT 145 #include "opt_compat_netbsd.h" 146 #include "opt_raid_autoconfig.h" 147 #include "raid.h" 148 #endif 149 150 #include <sys/param.h> 151 #include <sys/errno.h> 152 #include <sys/pool.h> 153 #include <sys/proc.h> 154 #include <sys/queue.h> 155 #include <sys/disk.h> 156 #include <sys/device.h> 157 #include <sys/stat.h> 158 #include <sys/ioctl.h> 159 #include <sys/fcntl.h> 160 #include <sys/systm.h> 161 #include <sys/vnode.h> 162 #include <sys/disklabel.h> 163 #include <sys/conf.h> 164 #include <sys/buf.h> 165 #include <sys/bufq.h> 166 #include <sys/reboot.h> 167 #include <sys/kauth.h> 168 169 #include <prop/proplib.h> 170 171 #include <dev/raidframe/raidframevar.h> 172 #include <dev/raidframe/raidframeio.h> 173 174 #include "rf_raid.h" 175 #include "rf_copyback.h" 176 #include "rf_dag.h" 177 #include "rf_dagflags.h" 178 #include "rf_desc.h" 179 #include "rf_diskqueue.h" 180 #include "rf_etimer.h" 181 #include "rf_general.h" 182 #include "rf_kintf.h" 183 #include "rf_options.h" 184 #include "rf_driver.h" 185 #include "rf_parityscan.h" 186 #include "rf_threadstuff.h" 187 188 #ifdef COMPAT_50 189 #include "rf_compat50.h" 190 #endif 191 192 #ifdef DEBUG 193 int rf_kdebug_level = 0; 194 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 195 #else /* DEBUG */ 196 #define db1_printf(a) { } 197 #endif /* DEBUG */ 198 199 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 200 201 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 202 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 203 204 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 205 * spare table */ 206 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 207 * installation process */ 208 #endif 209 210 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 211 212 /* prototypes */ 213 static void KernelWakeupFunc(struct buf *); 214 static void InitBP(struct buf *, struct vnode *, unsigned, 215 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 216 void *, int, struct proc *); 217 static void raidinit(RF_Raid_t *); 218 219 void raidattach(int); 220 static int raid_match(device_t, cfdata_t, void *); 221 static void raid_attach(device_t, device_t, void *); 222 static int raid_detach(device_t, int); 223 224 dev_type_open(raidopen); 225 dev_type_close(raidclose); 226 dev_type_read(raidread); 227 dev_type_write(raidwrite); 228 dev_type_ioctl(raidioctl); 229 dev_type_strategy(raidstrategy); 230 dev_type_dump(raiddump); 231 dev_type_size(raidsize); 232 233 const struct bdevsw raid_bdevsw = { 234 raidopen, raidclose, raidstrategy, raidioctl, 235 raiddump, raidsize, D_DISK 236 }; 237 238 const struct cdevsw raid_cdevsw = { 239 raidopen, raidclose, raidread, raidwrite, raidioctl, 240 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 241 }; 242 243 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 244 245 /* XXX Not sure if the following should be replacing the raidPtrs above, 246 or if it should be used in conjunction with that... 247 */ 248 249 struct raid_softc { 250 device_t sc_dev; 251 int sc_flags; /* flags */ 252 int sc_cflags; /* configuration flags */ 253 uint64_t sc_size; /* size of the raid device */ 254 char sc_xname[20]; /* XXX external name */ 255 struct disk sc_dkdev; /* generic disk device info */ 256 struct bufq_state *buf_queue; /* used for the device queue */ 257 }; 258 /* sc_flags */ 259 #define RAIDF_INITED 0x01 /* unit has been initialized */ 260 #define RAIDF_WLABEL 0x02 /* label area is writable */ 261 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 262 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */ 263 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 264 #define RAIDF_LOCKED 0x80 /* unit is locked */ 265 266 #define raidunit(x) DISKUNIT(x) 267 int numraid = 0; 268 269 extern struct cfdriver raid_cd; 270 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 271 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 272 DVF_DETACH_SHUTDOWN); 273 274 /* 275 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 276 * Be aware that large numbers can allow the driver to consume a lot of 277 * kernel memory, especially on writes, and in degraded mode reads. 278 * 279 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 280 * a single 64K write will typically require 64K for the old data, 281 * 64K for the old parity, and 64K for the new parity, for a total 282 * of 192K (if the parity buffer is not re-used immediately). 283 * Even it if is used immediately, that's still 128K, which when multiplied 284 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 285 * 286 * Now in degraded mode, for example, a 64K read on the above setup may 287 * require data reconstruction, which will require *all* of the 4 remaining 288 * disks to participate -- 4 * 32K/disk == 128K again. 289 */ 290 291 #ifndef RAIDOUTSTANDING 292 #define RAIDOUTSTANDING 6 293 #endif 294 295 #define RAIDLABELDEV(dev) \ 296 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 297 298 /* declared here, and made public, for the benefit of KVM stuff.. */ 299 struct raid_softc *raid_softc; 300 301 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 302 struct disklabel *); 303 static void raidgetdisklabel(dev_t); 304 static void raidmakedisklabel(struct raid_softc *); 305 306 static int raidlock(struct raid_softc *); 307 static void raidunlock(struct raid_softc *); 308 309 static int raid_detach_unlocked(struct raid_softc *); 310 311 static void rf_markalldirty(RF_Raid_t *); 312 static void rf_set_properties(struct raid_softc *, RF_Raid_t *); 313 314 void rf_ReconThread(struct rf_recon_req *); 315 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 316 void rf_CopybackThread(RF_Raid_t *raidPtr); 317 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 318 int rf_autoconfig(device_t); 319 void rf_buildroothack(RF_ConfigSet_t *); 320 321 RF_AutoConfig_t *rf_find_raid_components(void); 322 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 323 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 324 static int rf_reasonable_label(RF_ComponentLabel_t *); 325 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 326 int rf_set_autoconfig(RF_Raid_t *, int); 327 int rf_set_rootpartition(RF_Raid_t *, int); 328 void rf_release_all_vps(RF_ConfigSet_t *); 329 void rf_cleanup_config_set(RF_ConfigSet_t *); 330 int rf_have_enough_components(RF_ConfigSet_t *); 331 int rf_auto_config_set(RF_ConfigSet_t *, int *); 332 static int rf_sync_component_caches(RF_Raid_t *raidPtr); 333 334 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 335 allow autoconfig to take place. 336 Note that this is overridden by having 337 RAID_AUTOCONFIG as an option in the 338 kernel config file. */ 339 340 struct RF_Pools_s rf_pools; 341 342 void 343 raidattach(int num) 344 { 345 int raidID; 346 int i, rc; 347 348 aprint_debug("raidattach: Asked for %d units\n", num); 349 350 if (num <= 0) { 351 #ifdef DIAGNOSTIC 352 panic("raidattach: count <= 0"); 353 #endif 354 return; 355 } 356 /* This is where all the initialization stuff gets done. */ 357 358 numraid = num; 359 360 /* Make some space for requested number of units... */ 361 362 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 363 if (raidPtrs == NULL) { 364 panic("raidPtrs is NULL!!"); 365 } 366 367 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 368 rf_mutex_init(&rf_sparet_wait_mutex); 369 370 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 371 #endif 372 373 for (i = 0; i < num; i++) 374 raidPtrs[i] = NULL; 375 rc = rf_BootRaidframe(); 376 if (rc == 0) 377 aprint_normal("Kernelized RAIDframe activated\n"); 378 else 379 panic("Serious error booting RAID!!"); 380 381 /* put together some datastructures like the CCD device does.. This 382 * lets us lock the device and what-not when it gets opened. */ 383 384 raid_softc = (struct raid_softc *) 385 malloc(num * sizeof(struct raid_softc), 386 M_RAIDFRAME, M_NOWAIT); 387 if (raid_softc == NULL) { 388 aprint_error("WARNING: no memory for RAIDframe driver\n"); 389 return; 390 } 391 392 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 393 394 for (raidID = 0; raidID < num; raidID++) { 395 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0); 396 397 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 398 (RF_Raid_t *)); 399 if (raidPtrs[raidID] == NULL) { 400 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID); 401 numraid = raidID; 402 return; 403 } 404 } 405 406 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 407 aprint_error("raidattach: config_cfattach_attach failed?\n"); 408 } 409 410 #ifdef RAID_AUTOCONFIG 411 raidautoconfig = 1; 412 #endif 413 414 /* 415 * Register a finalizer which will be used to auto-config RAID 416 * sets once all real hardware devices have been found. 417 */ 418 if (config_finalize_register(NULL, rf_autoconfig) != 0) 419 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 420 } 421 422 int 423 rf_autoconfig(device_t self) 424 { 425 RF_AutoConfig_t *ac_list; 426 RF_ConfigSet_t *config_sets; 427 428 if (raidautoconfig == 0) 429 return (0); 430 431 /* XXX This code can only be run once. */ 432 raidautoconfig = 0; 433 434 /* 1. locate all RAID components on the system */ 435 aprint_debug("Searching for RAID components...\n"); 436 ac_list = rf_find_raid_components(); 437 438 /* 2. Sort them into their respective sets. */ 439 config_sets = rf_create_auto_sets(ac_list); 440 441 /* 442 * 3. Evaluate each set andconfigure the valid ones. 443 * This gets done in rf_buildroothack(). 444 */ 445 rf_buildroothack(config_sets); 446 447 return 1; 448 } 449 450 void 451 rf_buildroothack(RF_ConfigSet_t *config_sets) 452 { 453 RF_ConfigSet_t *cset; 454 RF_ConfigSet_t *next_cset; 455 int retcode; 456 int raidID; 457 int rootID; 458 int col; 459 int num_root; 460 char *devname; 461 462 rootID = 0; 463 num_root = 0; 464 cset = config_sets; 465 while(cset != NULL ) { 466 next_cset = cset->next; 467 if (rf_have_enough_components(cset) && 468 cset->ac->clabel->autoconfigure==1) { 469 retcode = rf_auto_config_set(cset,&raidID); 470 if (!retcode) { 471 aprint_debug("raid%d: configured ok\n", raidID); 472 if (cset->rootable) { 473 rootID = raidID; 474 num_root++; 475 } 476 } else { 477 /* The autoconfig didn't work :( */ 478 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 479 rf_release_all_vps(cset); 480 } 481 } else { 482 /* we're not autoconfiguring this set... 483 release the associated resources */ 484 rf_release_all_vps(cset); 485 } 486 /* cleanup */ 487 rf_cleanup_config_set(cset); 488 cset = next_cset; 489 } 490 491 /* if the user has specified what the root device should be 492 then we don't touch booted_device or boothowto... */ 493 494 if (rootspec != NULL) 495 return; 496 497 /* we found something bootable... */ 498 499 if (num_root == 1) { 500 booted_device = raid_softc[rootID].sc_dev; 501 } else if (num_root > 1) { 502 503 /* 504 * Maybe the MD code can help. If it cannot, then 505 * setroot() will discover that we have no 506 * booted_device and will ask the user if nothing was 507 * hardwired in the kernel config file 508 */ 509 510 if (booted_device == NULL) 511 cpu_rootconf(); 512 if (booted_device == NULL) 513 return; 514 515 num_root = 0; 516 for (raidID = 0; raidID < numraid; raidID++) { 517 if (raidPtrs[raidID]->valid == 0) 518 continue; 519 520 if (raidPtrs[raidID]->root_partition == 0) 521 continue; 522 523 for (col = 0; col < raidPtrs[raidID]->numCol; col++) { 524 devname = raidPtrs[raidID]->Disks[col].devname; 525 devname += sizeof("/dev/") - 1; 526 if (strncmp(devname, device_xname(booted_device), 527 strlen(device_xname(booted_device))) != 0) 528 continue; 529 aprint_debug("raid%d includes boot device %s\n", 530 raidID, devname); 531 num_root++; 532 rootID = raidID; 533 } 534 } 535 536 if (num_root == 1) { 537 booted_device = raid_softc[rootID].sc_dev; 538 } else { 539 /* we can't guess.. require the user to answer... */ 540 boothowto |= RB_ASKNAME; 541 } 542 } 543 } 544 545 546 int 547 raidsize(dev_t dev) 548 { 549 struct raid_softc *rs; 550 struct disklabel *lp; 551 int part, unit, omask, size; 552 553 unit = raidunit(dev); 554 if (unit >= numraid) 555 return (-1); 556 rs = &raid_softc[unit]; 557 558 if ((rs->sc_flags & RAIDF_INITED) == 0) 559 return (-1); 560 561 part = DISKPART(dev); 562 omask = rs->sc_dkdev.dk_openmask & (1 << part); 563 lp = rs->sc_dkdev.dk_label; 564 565 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 566 return (-1); 567 568 if (lp->d_partitions[part].p_fstype != FS_SWAP) 569 size = -1; 570 else 571 size = lp->d_partitions[part].p_size * 572 (lp->d_secsize / DEV_BSIZE); 573 574 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 575 return (-1); 576 577 return (size); 578 579 } 580 581 int 582 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 583 { 584 int unit = raidunit(dev); 585 struct raid_softc *rs; 586 const struct bdevsw *bdev; 587 struct disklabel *lp; 588 RF_Raid_t *raidPtr; 589 daddr_t offset; 590 int part, c, sparecol, j, scol, dumpto; 591 int error = 0; 592 593 if (unit >= numraid) 594 return (ENXIO); 595 596 rs = &raid_softc[unit]; 597 raidPtr = raidPtrs[unit]; 598 599 if ((rs->sc_flags & RAIDF_INITED) == 0) 600 return ENXIO; 601 602 /* we only support dumping to RAID 1 sets */ 603 if (raidPtr->Layout.numDataCol != 1 || 604 raidPtr->Layout.numParityCol != 1) 605 return EINVAL; 606 607 608 if ((error = raidlock(rs)) != 0) 609 return error; 610 611 if (size % DEV_BSIZE != 0) { 612 error = EINVAL; 613 goto out; 614 } 615 616 if (blkno + size / DEV_BSIZE > rs->sc_size) { 617 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 618 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 619 size / DEV_BSIZE, rs->sc_size); 620 error = EINVAL; 621 goto out; 622 } 623 624 part = DISKPART(dev); 625 lp = rs->sc_dkdev.dk_label; 626 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 627 628 /* figure out what device is alive.. */ 629 630 /* 631 Look for a component to dump to. The preference for the 632 component to dump to is as follows: 633 1) the master 634 2) a used_spare of the master 635 3) the slave 636 4) a used_spare of the slave 637 */ 638 639 dumpto = -1; 640 for (c = 0; c < raidPtr->numCol; c++) { 641 if (raidPtr->Disks[c].status == rf_ds_optimal) { 642 /* this might be the one */ 643 dumpto = c; 644 break; 645 } 646 } 647 648 /* 649 At this point we have possibly selected a live master or a 650 live slave. We now check to see if there is a spared 651 master (or a spared slave), if we didn't find a live master 652 or a live slave. 653 */ 654 655 for (c = 0; c < raidPtr->numSpare; c++) { 656 sparecol = raidPtr->numCol + c; 657 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 658 /* How about this one? */ 659 scol = -1; 660 for(j=0;j<raidPtr->numCol;j++) { 661 if (raidPtr->Disks[j].spareCol == sparecol) { 662 scol = j; 663 break; 664 } 665 } 666 if (scol == 0) { 667 /* 668 We must have found a spared master! 669 We'll take that over anything else 670 found so far. (We couldn't have 671 found a real master before, since 672 this is a used spare, and it's 673 saying that it's replacing the 674 master.) On reboot (with 675 autoconfiguration turned on) 676 sparecol will become the 1st 677 component (component0) of this set. 678 */ 679 dumpto = sparecol; 680 break; 681 } else if (scol != -1) { 682 /* 683 Must be a spared slave. We'll dump 684 to that if we havn't found anything 685 else so far. 686 */ 687 if (dumpto == -1) 688 dumpto = sparecol; 689 } 690 } 691 } 692 693 if (dumpto == -1) { 694 /* we couldn't find any live components to dump to!?!? 695 */ 696 error = EINVAL; 697 goto out; 698 } 699 700 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 701 702 /* 703 Note that blkno is relative to this particular partition. 704 By adding the offset of this partition in the RAID 705 set, and also adding RF_PROTECTED_SECTORS, we get a 706 value that is relative to the partition used for the 707 underlying component. 708 */ 709 710 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 711 blkno + offset, va, size); 712 713 out: 714 raidunlock(rs); 715 716 return error; 717 } 718 /* ARGSUSED */ 719 int 720 raidopen(dev_t dev, int flags, int fmt, 721 struct lwp *l) 722 { 723 int unit = raidunit(dev); 724 struct raid_softc *rs; 725 struct disklabel *lp; 726 int part, pmask; 727 int error = 0; 728 729 if (unit >= numraid) 730 return (ENXIO); 731 rs = &raid_softc[unit]; 732 733 if ((error = raidlock(rs)) != 0) 734 return (error); 735 736 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 737 error = EBUSY; 738 goto bad; 739 } 740 741 lp = rs->sc_dkdev.dk_label; 742 743 part = DISKPART(dev); 744 745 /* 746 * If there are wedges, and this is not RAW_PART, then we 747 * need to fail. 748 */ 749 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 750 error = EBUSY; 751 goto bad; 752 } 753 pmask = (1 << part); 754 755 if ((rs->sc_flags & RAIDF_INITED) && 756 (rs->sc_dkdev.dk_openmask == 0)) 757 raidgetdisklabel(dev); 758 759 /* make sure that this partition exists */ 760 761 if (part != RAW_PART) { 762 if (((rs->sc_flags & RAIDF_INITED) == 0) || 763 ((part >= lp->d_npartitions) || 764 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 765 error = ENXIO; 766 goto bad; 767 } 768 } 769 /* Prevent this unit from being unconfigured while open. */ 770 switch (fmt) { 771 case S_IFCHR: 772 rs->sc_dkdev.dk_copenmask |= pmask; 773 break; 774 775 case S_IFBLK: 776 rs->sc_dkdev.dk_bopenmask |= pmask; 777 break; 778 } 779 780 if ((rs->sc_dkdev.dk_openmask == 0) && 781 ((rs->sc_flags & RAIDF_INITED) != 0)) { 782 /* First one... mark things as dirty... Note that we *MUST* 783 have done a configure before this. I DO NOT WANT TO BE 784 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 785 THAT THEY BELONG TOGETHER!!!!! */ 786 /* XXX should check to see if we're only open for reading 787 here... If so, we needn't do this, but then need some 788 other way of keeping track of what's happened.. */ 789 790 rf_markalldirty( raidPtrs[unit] ); 791 } 792 793 794 rs->sc_dkdev.dk_openmask = 795 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 796 797 bad: 798 raidunlock(rs); 799 800 return (error); 801 802 803 } 804 /* ARGSUSED */ 805 int 806 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 807 { 808 int unit = raidunit(dev); 809 struct raid_softc *rs; 810 int error = 0; 811 int part; 812 813 if (unit >= numraid) 814 return (ENXIO); 815 rs = &raid_softc[unit]; 816 817 if ((error = raidlock(rs)) != 0) 818 return (error); 819 820 part = DISKPART(dev); 821 822 /* ...that much closer to allowing unconfiguration... */ 823 switch (fmt) { 824 case S_IFCHR: 825 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 826 break; 827 828 case S_IFBLK: 829 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 830 break; 831 } 832 rs->sc_dkdev.dk_openmask = 833 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 834 835 if ((rs->sc_dkdev.dk_openmask == 0) && 836 ((rs->sc_flags & RAIDF_INITED) != 0)) { 837 /* Last one... device is not unconfigured yet. 838 Device shutdown has taken care of setting the 839 clean bits if RAIDF_INITED is not set 840 mark things as clean... */ 841 842 rf_update_component_labels(raidPtrs[unit], 843 RF_FINAL_COMPONENT_UPDATE); 844 845 /* If the kernel is shutting down, it will detach 846 * this RAID set soon enough. 847 */ 848 } 849 850 raidunlock(rs); 851 return (0); 852 853 } 854 855 void 856 raidstrategy(struct buf *bp) 857 { 858 int s; 859 860 unsigned int raidID = raidunit(bp->b_dev); 861 RF_Raid_t *raidPtr; 862 struct raid_softc *rs = &raid_softc[raidID]; 863 int wlabel; 864 865 if ((rs->sc_flags & RAIDF_INITED) ==0) { 866 bp->b_error = ENXIO; 867 goto done; 868 } 869 if (raidID >= numraid || !raidPtrs[raidID]) { 870 bp->b_error = ENODEV; 871 goto done; 872 } 873 raidPtr = raidPtrs[raidID]; 874 if (!raidPtr->valid) { 875 bp->b_error = ENODEV; 876 goto done; 877 } 878 if (bp->b_bcount == 0) { 879 db1_printf(("b_bcount is zero..\n")); 880 goto done; 881 } 882 883 /* 884 * Do bounds checking and adjust transfer. If there's an 885 * error, the bounds check will flag that for us. 886 */ 887 888 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 889 if (DISKPART(bp->b_dev) == RAW_PART) { 890 uint64_t size; /* device size in DEV_BSIZE unit */ 891 892 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 893 size = raidPtr->totalSectors << 894 (raidPtr->logBytesPerSector - DEV_BSHIFT); 895 } else { 896 size = raidPtr->totalSectors >> 897 (DEV_BSHIFT - raidPtr->logBytesPerSector); 898 } 899 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 900 goto done; 901 } 902 } else { 903 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 904 db1_printf(("Bounds check failed!!:%d %d\n", 905 (int) bp->b_blkno, (int) wlabel)); 906 goto done; 907 } 908 } 909 s = splbio(); 910 911 bp->b_resid = 0; 912 913 /* stuff it onto our queue */ 914 bufq_put(rs->buf_queue, bp); 915 916 /* scheduled the IO to happen at the next convenient time */ 917 wakeup(&(raidPtrs[raidID]->iodone)); 918 919 splx(s); 920 return; 921 922 done: 923 bp->b_resid = bp->b_bcount; 924 biodone(bp); 925 } 926 /* ARGSUSED */ 927 int 928 raidread(dev_t dev, struct uio *uio, int flags) 929 { 930 int unit = raidunit(dev); 931 struct raid_softc *rs; 932 933 if (unit >= numraid) 934 return (ENXIO); 935 rs = &raid_softc[unit]; 936 937 if ((rs->sc_flags & RAIDF_INITED) == 0) 938 return (ENXIO); 939 940 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 941 942 } 943 /* ARGSUSED */ 944 int 945 raidwrite(dev_t dev, struct uio *uio, int flags) 946 { 947 int unit = raidunit(dev); 948 struct raid_softc *rs; 949 950 if (unit >= numraid) 951 return (ENXIO); 952 rs = &raid_softc[unit]; 953 954 if ((rs->sc_flags & RAIDF_INITED) == 0) 955 return (ENXIO); 956 957 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 958 959 } 960 961 static int 962 raid_detach_unlocked(struct raid_softc *rs) 963 { 964 int error; 965 RF_Raid_t *raidPtr; 966 967 raidPtr = raidPtrs[device_unit(rs->sc_dev)]; 968 969 /* 970 * If somebody has a partition mounted, we shouldn't 971 * shutdown. 972 */ 973 if (rs->sc_dkdev.dk_openmask != 0) 974 return EBUSY; 975 976 if ((rs->sc_flags & RAIDF_INITED) == 0) 977 ; /* not initialized: nothing to do */ 978 else if ((error = rf_Shutdown(raidPtr)) != 0) 979 return error; 980 else 981 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN); 982 983 /* Detach the disk. */ 984 disk_detach(&rs->sc_dkdev); 985 disk_destroy(&rs->sc_dkdev); 986 987 return 0; 988 } 989 990 int 991 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 992 { 993 int unit = raidunit(dev); 994 int error = 0; 995 int part, pmask; 996 cfdata_t cf; 997 struct raid_softc *rs; 998 RF_Config_t *k_cfg, *u_cfg; 999 RF_Raid_t *raidPtr; 1000 RF_RaidDisk_t *diskPtr; 1001 RF_AccTotals_t *totals; 1002 RF_DeviceConfig_t *d_cfg, **ucfgp; 1003 u_char *specific_buf; 1004 int retcode = 0; 1005 int column; 1006 int raidid; 1007 struct rf_recon_req *rrcopy, *rr; 1008 RF_ComponentLabel_t *clabel; 1009 RF_ComponentLabel_t *ci_label; 1010 RF_ComponentLabel_t **clabel_ptr; 1011 RF_SingleComponent_t *sparePtr,*componentPtr; 1012 RF_SingleComponent_t component; 1013 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 1014 int i, j, d; 1015 #ifdef __HAVE_OLD_DISKLABEL 1016 struct disklabel newlabel; 1017 #endif 1018 struct dkwedge_info *dkw; 1019 1020 if (unit >= numraid) 1021 return (ENXIO); 1022 rs = &raid_softc[unit]; 1023 raidPtr = raidPtrs[unit]; 1024 1025 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 1026 (int) DISKPART(dev), (int) unit, (int) cmd)); 1027 1028 /* Must be open for writes for these commands... */ 1029 switch (cmd) { 1030 #ifdef DIOCGSECTORSIZE 1031 case DIOCGSECTORSIZE: 1032 *(u_int *)data = raidPtr->bytesPerSector; 1033 return 0; 1034 case DIOCGMEDIASIZE: 1035 *(off_t *)data = 1036 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1037 return 0; 1038 #endif 1039 case DIOCSDINFO: 1040 case DIOCWDINFO: 1041 #ifdef __HAVE_OLD_DISKLABEL 1042 case ODIOCWDINFO: 1043 case ODIOCSDINFO: 1044 #endif 1045 case DIOCWLABEL: 1046 case DIOCAWEDGE: 1047 case DIOCDWEDGE: 1048 if ((flag & FWRITE) == 0) 1049 return (EBADF); 1050 } 1051 1052 /* Must be initialized for these... */ 1053 switch (cmd) { 1054 case DIOCGDINFO: 1055 case DIOCSDINFO: 1056 case DIOCWDINFO: 1057 #ifdef __HAVE_OLD_DISKLABEL 1058 case ODIOCGDINFO: 1059 case ODIOCWDINFO: 1060 case ODIOCSDINFO: 1061 case ODIOCGDEFLABEL: 1062 #endif 1063 case DIOCGPART: 1064 case DIOCWLABEL: 1065 case DIOCGDEFLABEL: 1066 case DIOCAWEDGE: 1067 case DIOCDWEDGE: 1068 case DIOCLWEDGES: 1069 case DIOCCACHESYNC: 1070 case RAIDFRAME_SHUTDOWN: 1071 case RAIDFRAME_REWRITEPARITY: 1072 case RAIDFRAME_GET_INFO: 1073 case RAIDFRAME_RESET_ACCTOTALS: 1074 case RAIDFRAME_GET_ACCTOTALS: 1075 case RAIDFRAME_KEEP_ACCTOTALS: 1076 case RAIDFRAME_GET_SIZE: 1077 case RAIDFRAME_FAIL_DISK: 1078 case RAIDFRAME_COPYBACK: 1079 case RAIDFRAME_CHECK_RECON_STATUS: 1080 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1081 case RAIDFRAME_GET_COMPONENT_LABEL: 1082 case RAIDFRAME_SET_COMPONENT_LABEL: 1083 case RAIDFRAME_ADD_HOT_SPARE: 1084 case RAIDFRAME_REMOVE_HOT_SPARE: 1085 case RAIDFRAME_INIT_LABELS: 1086 case RAIDFRAME_REBUILD_IN_PLACE: 1087 case RAIDFRAME_CHECK_PARITY: 1088 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1089 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1090 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1091 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1092 case RAIDFRAME_SET_AUTOCONFIG: 1093 case RAIDFRAME_SET_ROOT: 1094 case RAIDFRAME_DELETE_COMPONENT: 1095 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1096 if ((rs->sc_flags & RAIDF_INITED) == 0) 1097 return (ENXIO); 1098 } 1099 1100 switch (cmd) { 1101 #ifdef COMPAT_50 1102 case RAIDFRAME_GET_INFO50: 1103 return rf_get_info50(raidPtr, data); 1104 1105 case RAIDFRAME_CONFIGURE50: 1106 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1107 return retcode; 1108 goto config; 1109 #endif 1110 /* configure the system */ 1111 case RAIDFRAME_CONFIGURE: 1112 1113 if (raidPtr->valid) { 1114 /* There is a valid RAID set running on this unit! */ 1115 printf("raid%d: Device already configured!\n",unit); 1116 return(EINVAL); 1117 } 1118 1119 /* copy-in the configuration information */ 1120 /* data points to a pointer to the configuration structure */ 1121 1122 u_cfg = *((RF_Config_t **) data); 1123 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1124 if (k_cfg == NULL) { 1125 return (ENOMEM); 1126 } 1127 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1128 if (retcode) { 1129 RF_Free(k_cfg, sizeof(RF_Config_t)); 1130 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1131 retcode)); 1132 return (retcode); 1133 } 1134 goto config; 1135 config: 1136 /* allocate a buffer for the layout-specific data, and copy it 1137 * in */ 1138 if (k_cfg->layoutSpecificSize) { 1139 if (k_cfg->layoutSpecificSize > 10000) { 1140 /* sanity check */ 1141 RF_Free(k_cfg, sizeof(RF_Config_t)); 1142 return (EINVAL); 1143 } 1144 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1145 (u_char *)); 1146 if (specific_buf == NULL) { 1147 RF_Free(k_cfg, sizeof(RF_Config_t)); 1148 return (ENOMEM); 1149 } 1150 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1151 k_cfg->layoutSpecificSize); 1152 if (retcode) { 1153 RF_Free(k_cfg, sizeof(RF_Config_t)); 1154 RF_Free(specific_buf, 1155 k_cfg->layoutSpecificSize); 1156 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1157 retcode)); 1158 return (retcode); 1159 } 1160 } else 1161 specific_buf = NULL; 1162 k_cfg->layoutSpecific = specific_buf; 1163 1164 /* should do some kind of sanity check on the configuration. 1165 * Store the sum of all the bytes in the last byte? */ 1166 1167 /* configure the system */ 1168 1169 /* 1170 * Clear the entire RAID descriptor, just to make sure 1171 * there is no stale data left in the case of a 1172 * reconfiguration 1173 */ 1174 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 1175 raidPtr->raidid = unit; 1176 1177 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1178 1179 if (retcode == 0) { 1180 1181 /* allow this many simultaneous IO's to 1182 this RAID device */ 1183 raidPtr->openings = RAIDOUTSTANDING; 1184 1185 raidinit(raidPtr); 1186 rf_markalldirty(raidPtr); 1187 } 1188 /* free the buffers. No return code here. */ 1189 if (k_cfg->layoutSpecificSize) { 1190 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1191 } 1192 RF_Free(k_cfg, sizeof(RF_Config_t)); 1193 1194 return (retcode); 1195 1196 /* shutdown the system */ 1197 case RAIDFRAME_SHUTDOWN: 1198 1199 part = DISKPART(dev); 1200 pmask = (1 << part); 1201 1202 if ((error = raidlock(rs)) != 0) 1203 return (error); 1204 1205 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1206 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1207 (rs->sc_dkdev.dk_copenmask & pmask))) 1208 retcode = EBUSY; 1209 else { 1210 rs->sc_flags |= RAIDF_SHUTDOWN; 1211 rs->sc_dkdev.dk_copenmask &= ~pmask; 1212 rs->sc_dkdev.dk_bopenmask &= ~pmask; 1213 rs->sc_dkdev.dk_openmask &= ~pmask; 1214 retcode = 0; 1215 } 1216 1217 raidunlock(rs); 1218 1219 if (retcode != 0) 1220 return retcode; 1221 1222 /* free the pseudo device attach bits */ 1223 1224 cf = device_cfdata(rs->sc_dev); 1225 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0) 1226 free(cf, M_RAIDFRAME); 1227 1228 return (retcode); 1229 case RAIDFRAME_GET_COMPONENT_LABEL: 1230 clabel_ptr = (RF_ComponentLabel_t **) data; 1231 /* need to read the component label for the disk indicated 1232 by row,column in clabel */ 1233 1234 /* For practice, let's get it directly fromdisk, rather 1235 than from the in-core copy */ 1236 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 1237 (RF_ComponentLabel_t *)); 1238 if (clabel == NULL) 1239 return (ENOMEM); 1240 1241 retcode = copyin( *clabel_ptr, clabel, 1242 sizeof(RF_ComponentLabel_t)); 1243 1244 if (retcode) { 1245 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1246 return(retcode); 1247 } 1248 1249 clabel->row = 0; /* Don't allow looking at anything else.*/ 1250 1251 column = clabel->column; 1252 1253 if ((column < 0) || (column >= raidPtr->numCol + 1254 raidPtr->numSpare)) { 1255 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1256 return(EINVAL); 1257 } 1258 1259 retcode = raidread_component_label(raidPtr->Disks[column].dev, 1260 raidPtr->raid_cinfo[column].ci_vp, 1261 clabel ); 1262 1263 if (retcode == 0) { 1264 retcode = copyout(clabel, *clabel_ptr, 1265 sizeof(RF_ComponentLabel_t)); 1266 } 1267 RF_Free(clabel, sizeof(RF_ComponentLabel_t)); 1268 return (retcode); 1269 1270 case RAIDFRAME_SET_COMPONENT_LABEL: 1271 clabel = (RF_ComponentLabel_t *) data; 1272 1273 /* XXX check the label for valid stuff... */ 1274 /* Note that some things *should not* get modified -- 1275 the user should be re-initing the labels instead of 1276 trying to patch things. 1277 */ 1278 1279 raidid = raidPtr->raidid; 1280 #ifdef DEBUG 1281 printf("raid%d: Got component label:\n", raidid); 1282 printf("raid%d: Version: %d\n", raidid, clabel->version); 1283 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1284 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1285 printf("raid%d: Column: %d\n", raidid, clabel->column); 1286 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1287 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1288 printf("raid%d: Status: %d\n", raidid, clabel->status); 1289 #endif 1290 clabel->row = 0; 1291 column = clabel->column; 1292 1293 if ((column < 0) || (column >= raidPtr->numCol)) { 1294 return(EINVAL); 1295 } 1296 1297 /* XXX this isn't allowed to do anything for now :-) */ 1298 1299 /* XXX and before it is, we need to fill in the rest 1300 of the fields!?!?!?! */ 1301 #if 0 1302 raidwrite_component_label( 1303 raidPtr->Disks[column].dev, 1304 raidPtr->raid_cinfo[column].ci_vp, 1305 clabel ); 1306 #endif 1307 return (0); 1308 1309 case RAIDFRAME_INIT_LABELS: 1310 clabel = (RF_ComponentLabel_t *) data; 1311 /* 1312 we only want the serial number from 1313 the above. We get all the rest of the information 1314 from the config that was used to create this RAID 1315 set. 1316 */ 1317 1318 raidPtr->serial_number = clabel->serial_number; 1319 1320 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t), 1321 (RF_ComponentLabel_t *)); 1322 if (ci_label == NULL) 1323 return (ENOMEM); 1324 1325 raid_init_component_label(raidPtr, ci_label); 1326 ci_label->serial_number = clabel->serial_number; 1327 ci_label->row = 0; /* we dont' pretend to support more */ 1328 1329 for(column=0;column<raidPtr->numCol;column++) { 1330 diskPtr = &raidPtr->Disks[column]; 1331 if (!RF_DEAD_DISK(diskPtr->status)) { 1332 ci_label->partitionSize = diskPtr->partitionSize; 1333 ci_label->column = column; 1334 raidwrite_component_label( 1335 raidPtr->Disks[column].dev, 1336 raidPtr->raid_cinfo[column].ci_vp, 1337 ci_label ); 1338 } 1339 } 1340 RF_Free(ci_label, sizeof(RF_ComponentLabel_t)); 1341 1342 return (retcode); 1343 case RAIDFRAME_SET_AUTOCONFIG: 1344 d = rf_set_autoconfig(raidPtr, *(int *) data); 1345 printf("raid%d: New autoconfig value is: %d\n", 1346 raidPtr->raidid, d); 1347 *(int *) data = d; 1348 return (retcode); 1349 1350 case RAIDFRAME_SET_ROOT: 1351 d = rf_set_rootpartition(raidPtr, *(int *) data); 1352 printf("raid%d: New rootpartition value is: %d\n", 1353 raidPtr->raidid, d); 1354 *(int *) data = d; 1355 return (retcode); 1356 1357 /* initialize all parity */ 1358 case RAIDFRAME_REWRITEPARITY: 1359 1360 if (raidPtr->Layout.map->faultsTolerated == 0) { 1361 /* Parity for RAID 0 is trivially correct */ 1362 raidPtr->parity_good = RF_RAID_CLEAN; 1363 return(0); 1364 } 1365 1366 if (raidPtr->parity_rewrite_in_progress == 1) { 1367 /* Re-write is already in progress! */ 1368 return(EINVAL); 1369 } 1370 1371 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1372 rf_RewriteParityThread, 1373 raidPtr,"raid_parity"); 1374 return (retcode); 1375 1376 1377 case RAIDFRAME_ADD_HOT_SPARE: 1378 sparePtr = (RF_SingleComponent_t *) data; 1379 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1380 retcode = rf_add_hot_spare(raidPtr, &component); 1381 return(retcode); 1382 1383 case RAIDFRAME_REMOVE_HOT_SPARE: 1384 return(retcode); 1385 1386 case RAIDFRAME_DELETE_COMPONENT: 1387 componentPtr = (RF_SingleComponent_t *)data; 1388 memcpy( &component, componentPtr, 1389 sizeof(RF_SingleComponent_t)); 1390 retcode = rf_delete_component(raidPtr, &component); 1391 return(retcode); 1392 1393 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1394 componentPtr = (RF_SingleComponent_t *)data; 1395 memcpy( &component, componentPtr, 1396 sizeof(RF_SingleComponent_t)); 1397 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1398 return(retcode); 1399 1400 case RAIDFRAME_REBUILD_IN_PLACE: 1401 1402 if (raidPtr->Layout.map->faultsTolerated == 0) { 1403 /* Can't do this on a RAID 0!! */ 1404 return(EINVAL); 1405 } 1406 1407 if (raidPtr->recon_in_progress == 1) { 1408 /* a reconstruct is already in progress! */ 1409 return(EINVAL); 1410 } 1411 1412 componentPtr = (RF_SingleComponent_t *) data; 1413 memcpy( &component, componentPtr, 1414 sizeof(RF_SingleComponent_t)); 1415 component.row = 0; /* we don't support any more */ 1416 column = component.column; 1417 1418 if ((column < 0) || (column >= raidPtr->numCol)) { 1419 return(EINVAL); 1420 } 1421 1422 RF_LOCK_MUTEX(raidPtr->mutex); 1423 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1424 (raidPtr->numFailures > 0)) { 1425 /* XXX 0 above shouldn't be constant!!! */ 1426 /* some component other than this has failed. 1427 Let's not make things worse than they already 1428 are... */ 1429 printf("raid%d: Unable to reconstruct to disk at:\n", 1430 raidPtr->raidid); 1431 printf("raid%d: Col: %d Too many failures.\n", 1432 raidPtr->raidid, column); 1433 RF_UNLOCK_MUTEX(raidPtr->mutex); 1434 return (EINVAL); 1435 } 1436 if (raidPtr->Disks[column].status == 1437 rf_ds_reconstructing) { 1438 printf("raid%d: Unable to reconstruct to disk at:\n", 1439 raidPtr->raidid); 1440 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1441 1442 RF_UNLOCK_MUTEX(raidPtr->mutex); 1443 return (EINVAL); 1444 } 1445 if (raidPtr->Disks[column].status == rf_ds_spared) { 1446 RF_UNLOCK_MUTEX(raidPtr->mutex); 1447 return (EINVAL); 1448 } 1449 RF_UNLOCK_MUTEX(raidPtr->mutex); 1450 1451 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1452 if (rrcopy == NULL) 1453 return(ENOMEM); 1454 1455 rrcopy->raidPtr = (void *) raidPtr; 1456 rrcopy->col = column; 1457 1458 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1459 rf_ReconstructInPlaceThread, 1460 rrcopy,"raid_reconip"); 1461 return(retcode); 1462 1463 case RAIDFRAME_GET_INFO: 1464 if (!raidPtr->valid) 1465 return (ENODEV); 1466 ucfgp = (RF_DeviceConfig_t **) data; 1467 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1468 (RF_DeviceConfig_t *)); 1469 if (d_cfg == NULL) 1470 return (ENOMEM); 1471 d_cfg->rows = 1; /* there is only 1 row now */ 1472 d_cfg->cols = raidPtr->numCol; 1473 d_cfg->ndevs = raidPtr->numCol; 1474 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1475 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1476 return (ENOMEM); 1477 } 1478 d_cfg->nspares = raidPtr->numSpare; 1479 if (d_cfg->nspares >= RF_MAX_DISKS) { 1480 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1481 return (ENOMEM); 1482 } 1483 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1484 d = 0; 1485 for (j = 0; j < d_cfg->cols; j++) { 1486 d_cfg->devs[d] = raidPtr->Disks[j]; 1487 d++; 1488 } 1489 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1490 d_cfg->spares[i] = raidPtr->Disks[j]; 1491 } 1492 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1493 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1494 1495 return (retcode); 1496 1497 case RAIDFRAME_CHECK_PARITY: 1498 *(int *) data = raidPtr->parity_good; 1499 return (0); 1500 1501 case RAIDFRAME_RESET_ACCTOTALS: 1502 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1503 return (0); 1504 1505 case RAIDFRAME_GET_ACCTOTALS: 1506 totals = (RF_AccTotals_t *) data; 1507 *totals = raidPtr->acc_totals; 1508 return (0); 1509 1510 case RAIDFRAME_KEEP_ACCTOTALS: 1511 raidPtr->keep_acc_totals = *(int *)data; 1512 return (0); 1513 1514 case RAIDFRAME_GET_SIZE: 1515 *(int *) data = raidPtr->totalSectors; 1516 return (0); 1517 1518 /* fail a disk & optionally start reconstruction */ 1519 case RAIDFRAME_FAIL_DISK: 1520 1521 if (raidPtr->Layout.map->faultsTolerated == 0) { 1522 /* Can't do this on a RAID 0!! */ 1523 return(EINVAL); 1524 } 1525 1526 rr = (struct rf_recon_req *) data; 1527 rr->row = 0; 1528 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1529 return (EINVAL); 1530 1531 1532 RF_LOCK_MUTEX(raidPtr->mutex); 1533 if (raidPtr->status == rf_rs_reconstructing) { 1534 /* you can't fail a disk while we're reconstructing! */ 1535 /* XXX wrong for RAID6 */ 1536 RF_UNLOCK_MUTEX(raidPtr->mutex); 1537 return (EINVAL); 1538 } 1539 if ((raidPtr->Disks[rr->col].status == 1540 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1541 /* some other component has failed. Let's not make 1542 things worse. XXX wrong for RAID6 */ 1543 RF_UNLOCK_MUTEX(raidPtr->mutex); 1544 return (EINVAL); 1545 } 1546 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1547 /* Can't fail a spared disk! */ 1548 RF_UNLOCK_MUTEX(raidPtr->mutex); 1549 return (EINVAL); 1550 } 1551 RF_UNLOCK_MUTEX(raidPtr->mutex); 1552 1553 /* make a copy of the recon request so that we don't rely on 1554 * the user's buffer */ 1555 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1556 if (rrcopy == NULL) 1557 return(ENOMEM); 1558 memcpy(rrcopy, rr, sizeof(*rr)); 1559 rrcopy->raidPtr = (void *) raidPtr; 1560 1561 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1562 rf_ReconThread, 1563 rrcopy,"raid_recon"); 1564 return (0); 1565 1566 /* invoke a copyback operation after recon on whatever disk 1567 * needs it, if any */ 1568 case RAIDFRAME_COPYBACK: 1569 1570 if (raidPtr->Layout.map->faultsTolerated == 0) { 1571 /* This makes no sense on a RAID 0!! */ 1572 return(EINVAL); 1573 } 1574 1575 if (raidPtr->copyback_in_progress == 1) { 1576 /* Copyback is already in progress! */ 1577 return(EINVAL); 1578 } 1579 1580 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1581 rf_CopybackThread, 1582 raidPtr,"raid_copyback"); 1583 return (retcode); 1584 1585 /* return the percentage completion of reconstruction */ 1586 case RAIDFRAME_CHECK_RECON_STATUS: 1587 if (raidPtr->Layout.map->faultsTolerated == 0) { 1588 /* This makes no sense on a RAID 0, so tell the 1589 user it's done. */ 1590 *(int *) data = 100; 1591 return(0); 1592 } 1593 if (raidPtr->status != rf_rs_reconstructing) 1594 *(int *) data = 100; 1595 else { 1596 if (raidPtr->reconControl->numRUsTotal > 0) { 1597 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1598 } else { 1599 *(int *) data = 0; 1600 } 1601 } 1602 return (0); 1603 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1604 progressInfoPtr = (RF_ProgressInfo_t **) data; 1605 if (raidPtr->status != rf_rs_reconstructing) { 1606 progressInfo.remaining = 0; 1607 progressInfo.completed = 100; 1608 progressInfo.total = 100; 1609 } else { 1610 progressInfo.total = 1611 raidPtr->reconControl->numRUsTotal; 1612 progressInfo.completed = 1613 raidPtr->reconControl->numRUsComplete; 1614 progressInfo.remaining = progressInfo.total - 1615 progressInfo.completed; 1616 } 1617 retcode = copyout(&progressInfo, *progressInfoPtr, 1618 sizeof(RF_ProgressInfo_t)); 1619 return (retcode); 1620 1621 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1622 if (raidPtr->Layout.map->faultsTolerated == 0) { 1623 /* This makes no sense on a RAID 0, so tell the 1624 user it's done. */ 1625 *(int *) data = 100; 1626 return(0); 1627 } 1628 if (raidPtr->parity_rewrite_in_progress == 1) { 1629 *(int *) data = 100 * 1630 raidPtr->parity_rewrite_stripes_done / 1631 raidPtr->Layout.numStripe; 1632 } else { 1633 *(int *) data = 100; 1634 } 1635 return (0); 1636 1637 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1638 progressInfoPtr = (RF_ProgressInfo_t **) data; 1639 if (raidPtr->parity_rewrite_in_progress == 1) { 1640 progressInfo.total = raidPtr->Layout.numStripe; 1641 progressInfo.completed = 1642 raidPtr->parity_rewrite_stripes_done; 1643 progressInfo.remaining = progressInfo.total - 1644 progressInfo.completed; 1645 } else { 1646 progressInfo.remaining = 0; 1647 progressInfo.completed = 100; 1648 progressInfo.total = 100; 1649 } 1650 retcode = copyout(&progressInfo, *progressInfoPtr, 1651 sizeof(RF_ProgressInfo_t)); 1652 return (retcode); 1653 1654 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1655 if (raidPtr->Layout.map->faultsTolerated == 0) { 1656 /* This makes no sense on a RAID 0 */ 1657 *(int *) data = 100; 1658 return(0); 1659 } 1660 if (raidPtr->copyback_in_progress == 1) { 1661 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1662 raidPtr->Layout.numStripe; 1663 } else { 1664 *(int *) data = 100; 1665 } 1666 return (0); 1667 1668 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1669 progressInfoPtr = (RF_ProgressInfo_t **) data; 1670 if (raidPtr->copyback_in_progress == 1) { 1671 progressInfo.total = raidPtr->Layout.numStripe; 1672 progressInfo.completed = 1673 raidPtr->copyback_stripes_done; 1674 progressInfo.remaining = progressInfo.total - 1675 progressInfo.completed; 1676 } else { 1677 progressInfo.remaining = 0; 1678 progressInfo.completed = 100; 1679 progressInfo.total = 100; 1680 } 1681 retcode = copyout(&progressInfo, *progressInfoPtr, 1682 sizeof(RF_ProgressInfo_t)); 1683 return (retcode); 1684 1685 /* the sparetable daemon calls this to wait for the kernel to 1686 * need a spare table. this ioctl does not return until a 1687 * spare table is needed. XXX -- calling mpsleep here in the 1688 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1689 * -- I should either compute the spare table in the kernel, 1690 * or have a different -- XXX XXX -- interface (a different 1691 * character device) for delivering the table -- XXX */ 1692 #if 0 1693 case RAIDFRAME_SPARET_WAIT: 1694 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1695 while (!rf_sparet_wait_queue) 1696 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1697 waitreq = rf_sparet_wait_queue; 1698 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1699 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1700 1701 /* structure assignment */ 1702 *((RF_SparetWait_t *) data) = *waitreq; 1703 1704 RF_Free(waitreq, sizeof(*waitreq)); 1705 return (0); 1706 1707 /* wakes up a process waiting on SPARET_WAIT and puts an error 1708 * code in it that will cause the dameon to exit */ 1709 case RAIDFRAME_ABORT_SPARET_WAIT: 1710 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1711 waitreq->fcol = -1; 1712 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1713 waitreq->next = rf_sparet_wait_queue; 1714 rf_sparet_wait_queue = waitreq; 1715 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1716 wakeup(&rf_sparet_wait_queue); 1717 return (0); 1718 1719 /* used by the spare table daemon to deliver a spare table 1720 * into the kernel */ 1721 case RAIDFRAME_SEND_SPARET: 1722 1723 /* install the spare table */ 1724 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1725 1726 /* respond to the requestor. the return status of the spare 1727 * table installation is passed in the "fcol" field */ 1728 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1729 waitreq->fcol = retcode; 1730 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1731 waitreq->next = rf_sparet_resp_queue; 1732 rf_sparet_resp_queue = waitreq; 1733 wakeup(&rf_sparet_resp_queue); 1734 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1735 1736 return (retcode); 1737 #endif 1738 1739 default: 1740 break; /* fall through to the os-specific code below */ 1741 1742 } 1743 1744 if (!raidPtr->valid) 1745 return (EINVAL); 1746 1747 /* 1748 * Add support for "regular" device ioctls here. 1749 */ 1750 1751 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l); 1752 if (error != EPASSTHROUGH) 1753 return (error); 1754 1755 switch (cmd) { 1756 case DIOCGDINFO: 1757 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1758 break; 1759 #ifdef __HAVE_OLD_DISKLABEL 1760 case ODIOCGDINFO: 1761 newlabel = *(rs->sc_dkdev.dk_label); 1762 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1763 return ENOTTY; 1764 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1765 break; 1766 #endif 1767 1768 case DIOCGPART: 1769 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1770 ((struct partinfo *) data)->part = 1771 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1772 break; 1773 1774 case DIOCWDINFO: 1775 case DIOCSDINFO: 1776 #ifdef __HAVE_OLD_DISKLABEL 1777 case ODIOCWDINFO: 1778 case ODIOCSDINFO: 1779 #endif 1780 { 1781 struct disklabel *lp; 1782 #ifdef __HAVE_OLD_DISKLABEL 1783 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1784 memset(&newlabel, 0, sizeof newlabel); 1785 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1786 lp = &newlabel; 1787 } else 1788 #endif 1789 lp = (struct disklabel *)data; 1790 1791 if ((error = raidlock(rs)) != 0) 1792 return (error); 1793 1794 rs->sc_flags |= RAIDF_LABELLING; 1795 1796 error = setdisklabel(rs->sc_dkdev.dk_label, 1797 lp, 0, rs->sc_dkdev.dk_cpulabel); 1798 if (error == 0) { 1799 if (cmd == DIOCWDINFO 1800 #ifdef __HAVE_OLD_DISKLABEL 1801 || cmd == ODIOCWDINFO 1802 #endif 1803 ) 1804 error = writedisklabel(RAIDLABELDEV(dev), 1805 raidstrategy, rs->sc_dkdev.dk_label, 1806 rs->sc_dkdev.dk_cpulabel); 1807 } 1808 rs->sc_flags &= ~RAIDF_LABELLING; 1809 1810 raidunlock(rs); 1811 1812 if (error) 1813 return (error); 1814 break; 1815 } 1816 1817 case DIOCWLABEL: 1818 if (*(int *) data != 0) 1819 rs->sc_flags |= RAIDF_WLABEL; 1820 else 1821 rs->sc_flags &= ~RAIDF_WLABEL; 1822 break; 1823 1824 case DIOCGDEFLABEL: 1825 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1826 break; 1827 1828 #ifdef __HAVE_OLD_DISKLABEL 1829 case ODIOCGDEFLABEL: 1830 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1831 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1832 return ENOTTY; 1833 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1834 break; 1835 #endif 1836 1837 case DIOCAWEDGE: 1838 case DIOCDWEDGE: 1839 dkw = (void *)data; 1840 1841 /* If the ioctl happens here, the parent is us. */ 1842 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1843 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1844 1845 case DIOCLWEDGES: 1846 return dkwedge_list(&rs->sc_dkdev, 1847 (struct dkwedge_list *)data, l); 1848 case DIOCCACHESYNC: 1849 return rf_sync_component_caches(raidPtr); 1850 default: 1851 retcode = ENOTTY; 1852 } 1853 return (retcode); 1854 1855 } 1856 1857 1858 /* raidinit -- complete the rest of the initialization for the 1859 RAIDframe device. */ 1860 1861 1862 static void 1863 raidinit(RF_Raid_t *raidPtr) 1864 { 1865 cfdata_t cf; 1866 struct raid_softc *rs; 1867 int unit; 1868 1869 unit = raidPtr->raidid; 1870 1871 rs = &raid_softc[unit]; 1872 1873 /* XXX should check return code first... */ 1874 rs->sc_flags |= RAIDF_INITED; 1875 1876 /* XXX doesn't check bounds. */ 1877 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1878 1879 /* attach the pseudo device */ 1880 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1881 cf->cf_name = raid_cd.cd_name; 1882 cf->cf_atname = raid_cd.cd_name; 1883 cf->cf_unit = unit; 1884 cf->cf_fstate = FSTATE_STAR; 1885 1886 rs->sc_dev = config_attach_pseudo(cf); 1887 1888 if (rs->sc_dev==NULL) { 1889 printf("raid%d: config_attach_pseudo failed\n", 1890 raidPtr->raidid); 1891 rs->sc_flags &= ~RAIDF_INITED; 1892 free(cf, M_RAIDFRAME); 1893 return; 1894 } 1895 1896 /* disk_attach actually creates space for the CPU disklabel, among 1897 * other things, so it's critical to call this *BEFORE* we try putzing 1898 * with disklabels. */ 1899 1900 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1901 disk_attach(&rs->sc_dkdev); 1902 1903 /* XXX There may be a weird interaction here between this, and 1904 * protectedSectors, as used in RAIDframe. */ 1905 1906 rs->sc_size = raidPtr->totalSectors; 1907 1908 dkwedge_discover(&rs->sc_dkdev); 1909 1910 rf_set_properties(rs, raidPtr); 1911 1912 } 1913 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1914 /* wake up the daemon & tell it to get us a spare table 1915 * XXX 1916 * the entries in the queues should be tagged with the raidPtr 1917 * so that in the extremely rare case that two recons happen at once, 1918 * we know for which device were requesting a spare table 1919 * XXX 1920 * 1921 * XXX This code is not currently used. GO 1922 */ 1923 int 1924 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1925 { 1926 int retcode; 1927 1928 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1929 req->next = rf_sparet_wait_queue; 1930 rf_sparet_wait_queue = req; 1931 wakeup(&rf_sparet_wait_queue); 1932 1933 /* mpsleep unlocks the mutex */ 1934 while (!rf_sparet_resp_queue) { 1935 tsleep(&rf_sparet_resp_queue, PRIBIO, 1936 "raidframe getsparetable", 0); 1937 } 1938 req = rf_sparet_resp_queue; 1939 rf_sparet_resp_queue = req->next; 1940 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1941 1942 retcode = req->fcol; 1943 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1944 * alloc'd */ 1945 return (retcode); 1946 } 1947 #endif 1948 1949 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1950 * bp & passes it down. 1951 * any calls originating in the kernel must use non-blocking I/O 1952 * do some extra sanity checking to return "appropriate" error values for 1953 * certain conditions (to make some standard utilities work) 1954 * 1955 * Formerly known as: rf_DoAccessKernel 1956 */ 1957 void 1958 raidstart(RF_Raid_t *raidPtr) 1959 { 1960 RF_SectorCount_t num_blocks, pb, sum; 1961 RF_RaidAddr_t raid_addr; 1962 struct partition *pp; 1963 daddr_t blocknum; 1964 int unit; 1965 struct raid_softc *rs; 1966 int do_async; 1967 struct buf *bp; 1968 int rc; 1969 1970 unit = raidPtr->raidid; 1971 rs = &raid_softc[unit]; 1972 1973 /* quick check to see if anything has died recently */ 1974 RF_LOCK_MUTEX(raidPtr->mutex); 1975 if (raidPtr->numNewFailures > 0) { 1976 RF_UNLOCK_MUTEX(raidPtr->mutex); 1977 rf_update_component_labels(raidPtr, 1978 RF_NORMAL_COMPONENT_UPDATE); 1979 RF_LOCK_MUTEX(raidPtr->mutex); 1980 raidPtr->numNewFailures--; 1981 } 1982 1983 /* Check to see if we're at the limit... */ 1984 while (raidPtr->openings > 0) { 1985 RF_UNLOCK_MUTEX(raidPtr->mutex); 1986 1987 /* get the next item, if any, from the queue */ 1988 if ((bp = bufq_get(rs->buf_queue)) == NULL) { 1989 /* nothing more to do */ 1990 return; 1991 } 1992 1993 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1994 * partition.. Need to make it absolute to the underlying 1995 * device.. */ 1996 1997 blocknum = bp->b_blkno; 1998 if (DISKPART(bp->b_dev) != RAW_PART) { 1999 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 2000 blocknum += pp->p_offset; 2001 } 2002 2003 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2004 (int) blocknum)); 2005 2006 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2007 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2008 2009 /* *THIS* is where we adjust what block we're going to... 2010 * but DO NOT TOUCH bp->b_blkno!!! */ 2011 raid_addr = blocknum; 2012 2013 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2014 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2015 sum = raid_addr + num_blocks + pb; 2016 if (1 || rf_debugKernelAccess) { 2017 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2018 (int) raid_addr, (int) sum, (int) num_blocks, 2019 (int) pb, (int) bp->b_resid)); 2020 } 2021 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2022 || (sum < num_blocks) || (sum < pb)) { 2023 bp->b_error = ENOSPC; 2024 bp->b_resid = bp->b_bcount; 2025 biodone(bp); 2026 RF_LOCK_MUTEX(raidPtr->mutex); 2027 continue; 2028 } 2029 /* 2030 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2031 */ 2032 2033 if (bp->b_bcount & raidPtr->sectorMask) { 2034 bp->b_error = EINVAL; 2035 bp->b_resid = bp->b_bcount; 2036 biodone(bp); 2037 RF_LOCK_MUTEX(raidPtr->mutex); 2038 continue; 2039 2040 } 2041 db1_printf(("Calling DoAccess..\n")); 2042 2043 2044 RF_LOCK_MUTEX(raidPtr->mutex); 2045 raidPtr->openings--; 2046 RF_UNLOCK_MUTEX(raidPtr->mutex); 2047 2048 /* 2049 * Everything is async. 2050 */ 2051 do_async = 1; 2052 2053 disk_busy(&rs->sc_dkdev); 2054 2055 /* XXX we're still at splbio() here... do we *really* 2056 need to be? */ 2057 2058 /* don't ever condition on bp->b_flags & B_WRITE. 2059 * always condition on B_READ instead */ 2060 2061 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2062 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2063 do_async, raid_addr, num_blocks, 2064 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2065 2066 if (rc) { 2067 bp->b_error = rc; 2068 bp->b_resid = bp->b_bcount; 2069 biodone(bp); 2070 /* continue loop */ 2071 } 2072 2073 RF_LOCK_MUTEX(raidPtr->mutex); 2074 } 2075 RF_UNLOCK_MUTEX(raidPtr->mutex); 2076 } 2077 2078 2079 2080 2081 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2082 2083 int 2084 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2085 { 2086 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2087 struct buf *bp; 2088 2089 req->queue = queue; 2090 bp = req->bp; 2091 2092 switch (req->type) { 2093 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2094 /* XXX need to do something extra here.. */ 2095 /* I'm leaving this in, as I've never actually seen it used, 2096 * and I'd like folks to report it... GO */ 2097 printf(("WAKEUP CALLED\n")); 2098 queue->numOutstanding++; 2099 2100 bp->b_flags = 0; 2101 bp->b_private = req; 2102 2103 KernelWakeupFunc(bp); 2104 break; 2105 2106 case RF_IO_TYPE_READ: 2107 case RF_IO_TYPE_WRITE: 2108 #if RF_ACC_TRACE > 0 2109 if (req->tracerec) { 2110 RF_ETIMER_START(req->tracerec->timer); 2111 } 2112 #endif 2113 InitBP(bp, queue->rf_cinfo->ci_vp, 2114 op, queue->rf_cinfo->ci_dev, 2115 req->sectorOffset, req->numSector, 2116 req->buf, KernelWakeupFunc, (void *) req, 2117 queue->raidPtr->logBytesPerSector, req->b_proc); 2118 2119 if (rf_debugKernelAccess) { 2120 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2121 (long) bp->b_blkno)); 2122 } 2123 queue->numOutstanding++; 2124 queue->last_deq_sector = req->sectorOffset; 2125 /* acc wouldn't have been let in if there were any pending 2126 * reqs at any other priority */ 2127 queue->curPriority = req->priority; 2128 2129 db1_printf(("Going for %c to unit %d col %d\n", 2130 req->type, queue->raidPtr->raidid, 2131 queue->col)); 2132 db1_printf(("sector %d count %d (%d bytes) %d\n", 2133 (int) req->sectorOffset, (int) req->numSector, 2134 (int) (req->numSector << 2135 queue->raidPtr->logBytesPerSector), 2136 (int) queue->raidPtr->logBytesPerSector)); 2137 2138 /* 2139 * XXX: drop lock here since this can block at 2140 * least with backing SCSI devices. Retake it 2141 * to minimize fuss with calling interfaces. 2142 */ 2143 2144 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2145 bdev_strategy(bp); 2146 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2147 break; 2148 2149 default: 2150 panic("bad req->type in rf_DispatchKernelIO"); 2151 } 2152 db1_printf(("Exiting from DispatchKernelIO\n")); 2153 2154 return (0); 2155 } 2156 /* this is the callback function associated with a I/O invoked from 2157 kernel code. 2158 */ 2159 static void 2160 KernelWakeupFunc(struct buf *bp) 2161 { 2162 RF_DiskQueueData_t *req = NULL; 2163 RF_DiskQueue_t *queue; 2164 int s; 2165 2166 s = splbio(); 2167 db1_printf(("recovering the request queue:\n")); 2168 req = bp->b_private; 2169 2170 queue = (RF_DiskQueue_t *) req->queue; 2171 2172 #if RF_ACC_TRACE > 0 2173 if (req->tracerec) { 2174 RF_ETIMER_STOP(req->tracerec->timer); 2175 RF_ETIMER_EVAL(req->tracerec->timer); 2176 RF_LOCK_MUTEX(rf_tracing_mutex); 2177 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2178 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2179 req->tracerec->num_phys_ios++; 2180 RF_UNLOCK_MUTEX(rf_tracing_mutex); 2181 } 2182 #endif 2183 2184 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2185 * ballistic, and mark the component as hosed... */ 2186 2187 if (bp->b_error != 0) { 2188 /* Mark the disk as dead */ 2189 /* but only mark it once... */ 2190 /* and only if it wouldn't leave this RAID set 2191 completely broken */ 2192 if (((queue->raidPtr->Disks[queue->col].status == 2193 rf_ds_optimal) || 2194 (queue->raidPtr->Disks[queue->col].status == 2195 rf_ds_used_spare)) && 2196 (queue->raidPtr->numFailures < 2197 queue->raidPtr->Layout.map->faultsTolerated)) { 2198 printf("raid%d: IO Error. Marking %s as failed.\n", 2199 queue->raidPtr->raidid, 2200 queue->raidPtr->Disks[queue->col].devname); 2201 queue->raidPtr->Disks[queue->col].status = 2202 rf_ds_failed; 2203 queue->raidPtr->status = rf_rs_degraded; 2204 queue->raidPtr->numFailures++; 2205 queue->raidPtr->numNewFailures++; 2206 } else { /* Disk is already dead... */ 2207 /* printf("Disk already marked as dead!\n"); */ 2208 } 2209 2210 } 2211 2212 /* Fill in the error value */ 2213 2214 req->error = bp->b_error; 2215 2216 simple_lock(&queue->raidPtr->iodone_lock); 2217 2218 /* Drop this one on the "finished" queue... */ 2219 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2220 2221 /* Let the raidio thread know there is work to be done. */ 2222 wakeup(&(queue->raidPtr->iodone)); 2223 2224 simple_unlock(&queue->raidPtr->iodone_lock); 2225 2226 splx(s); 2227 } 2228 2229 2230 2231 /* 2232 * initialize a buf structure for doing an I/O in the kernel. 2233 */ 2234 static void 2235 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2236 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2237 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2238 struct proc *b_proc) 2239 { 2240 /* bp->b_flags = B_PHYS | rw_flag; */ 2241 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2242 bp->b_oflags = 0; 2243 bp->b_cflags = 0; 2244 bp->b_bcount = numSect << logBytesPerSector; 2245 bp->b_bufsize = bp->b_bcount; 2246 bp->b_error = 0; 2247 bp->b_dev = dev; 2248 bp->b_data = bf; 2249 bp->b_blkno = startSect; 2250 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2251 if (bp->b_bcount == 0) { 2252 panic("bp->b_bcount is zero in InitBP!!"); 2253 } 2254 bp->b_proc = b_proc; 2255 bp->b_iodone = cbFunc; 2256 bp->b_private = cbArg; 2257 } 2258 2259 static void 2260 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2261 struct disklabel *lp) 2262 { 2263 memset(lp, 0, sizeof(*lp)); 2264 2265 /* fabricate a label... */ 2266 lp->d_secperunit = raidPtr->totalSectors; 2267 lp->d_secsize = raidPtr->bytesPerSector; 2268 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2269 lp->d_ntracks = 4 * raidPtr->numCol; 2270 lp->d_ncylinders = raidPtr->totalSectors / 2271 (lp->d_nsectors * lp->d_ntracks); 2272 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2273 2274 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2275 lp->d_type = DTYPE_RAID; 2276 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2277 lp->d_rpm = 3600; 2278 lp->d_interleave = 1; 2279 lp->d_flags = 0; 2280 2281 lp->d_partitions[RAW_PART].p_offset = 0; 2282 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2283 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2284 lp->d_npartitions = RAW_PART + 1; 2285 2286 lp->d_magic = DISKMAGIC; 2287 lp->d_magic2 = DISKMAGIC; 2288 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2289 2290 } 2291 /* 2292 * Read the disklabel from the raid device. If one is not present, fake one 2293 * up. 2294 */ 2295 static void 2296 raidgetdisklabel(dev_t dev) 2297 { 2298 int unit = raidunit(dev); 2299 struct raid_softc *rs = &raid_softc[unit]; 2300 const char *errstring; 2301 struct disklabel *lp = rs->sc_dkdev.dk_label; 2302 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2303 RF_Raid_t *raidPtr; 2304 2305 db1_printf(("Getting the disklabel...\n")); 2306 2307 memset(clp, 0, sizeof(*clp)); 2308 2309 raidPtr = raidPtrs[unit]; 2310 2311 raidgetdefaultlabel(raidPtr, rs, lp); 2312 2313 /* 2314 * Call the generic disklabel extraction routine. 2315 */ 2316 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2317 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2318 if (errstring) 2319 raidmakedisklabel(rs); 2320 else { 2321 int i; 2322 struct partition *pp; 2323 2324 /* 2325 * Sanity check whether the found disklabel is valid. 2326 * 2327 * This is necessary since total size of the raid device 2328 * may vary when an interleave is changed even though exactly 2329 * same components are used, and old disklabel may used 2330 * if that is found. 2331 */ 2332 if (lp->d_secperunit != rs->sc_size) 2333 printf("raid%d: WARNING: %s: " 2334 "total sector size in disklabel (%" PRIu32 ") != " 2335 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname, 2336 lp->d_secperunit, rs->sc_size); 2337 for (i = 0; i < lp->d_npartitions; i++) { 2338 pp = &lp->d_partitions[i]; 2339 if (pp->p_offset + pp->p_size > rs->sc_size) 2340 printf("raid%d: WARNING: %s: end of partition `%c' " 2341 "exceeds the size of raid (%" PRIu64 ")\n", 2342 unit, rs->sc_xname, 'a' + i, rs->sc_size); 2343 } 2344 } 2345 2346 } 2347 /* 2348 * Take care of things one might want to take care of in the event 2349 * that a disklabel isn't present. 2350 */ 2351 static void 2352 raidmakedisklabel(struct raid_softc *rs) 2353 { 2354 struct disklabel *lp = rs->sc_dkdev.dk_label; 2355 db1_printf(("Making a label..\n")); 2356 2357 /* 2358 * For historical reasons, if there's no disklabel present 2359 * the raw partition must be marked FS_BSDFFS. 2360 */ 2361 2362 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2363 2364 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2365 2366 lp->d_checksum = dkcksum(lp); 2367 } 2368 /* 2369 * Wait interruptibly for an exclusive lock. 2370 * 2371 * XXX 2372 * Several drivers do this; it should be abstracted and made MP-safe. 2373 * (Hmm... where have we seen this warning before :-> GO ) 2374 */ 2375 static int 2376 raidlock(struct raid_softc *rs) 2377 { 2378 int error; 2379 2380 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2381 rs->sc_flags |= RAIDF_WANTED; 2382 if ((error = 2383 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2384 return (error); 2385 } 2386 rs->sc_flags |= RAIDF_LOCKED; 2387 return (0); 2388 } 2389 /* 2390 * Unlock and wake up any waiters. 2391 */ 2392 static void 2393 raidunlock(struct raid_softc *rs) 2394 { 2395 2396 rs->sc_flags &= ~RAIDF_LOCKED; 2397 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2398 rs->sc_flags &= ~RAIDF_WANTED; 2399 wakeup(rs); 2400 } 2401 } 2402 2403 2404 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2405 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2406 2407 int 2408 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2409 { 2410 RF_ComponentLabel_t clabel; 2411 raidread_component_label(dev, b_vp, &clabel); 2412 clabel.mod_counter = mod_counter; 2413 clabel.clean = RF_RAID_CLEAN; 2414 raidwrite_component_label(dev, b_vp, &clabel); 2415 return(0); 2416 } 2417 2418 2419 int 2420 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2421 { 2422 RF_ComponentLabel_t clabel; 2423 raidread_component_label(dev, b_vp, &clabel); 2424 clabel.mod_counter = mod_counter; 2425 clabel.clean = RF_RAID_DIRTY; 2426 raidwrite_component_label(dev, b_vp, &clabel); 2427 return(0); 2428 } 2429 2430 /* ARGSUSED */ 2431 int 2432 raidread_component_label(dev_t dev, struct vnode *b_vp, 2433 RF_ComponentLabel_t *clabel) 2434 { 2435 struct buf *bp; 2436 const struct bdevsw *bdev; 2437 int error; 2438 2439 /* XXX should probably ensure that we don't try to do this if 2440 someone has changed rf_protected_sectors. */ 2441 2442 if (b_vp == NULL) { 2443 /* For whatever reason, this component is not valid. 2444 Don't try to read a component label from it. */ 2445 return(EINVAL); 2446 } 2447 2448 /* get a block of the appropriate size... */ 2449 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2450 bp->b_dev = dev; 2451 2452 /* get our ducks in a row for the read */ 2453 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2454 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2455 bp->b_flags |= B_READ; 2456 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2457 2458 bdev = bdevsw_lookup(bp->b_dev); 2459 if (bdev == NULL) 2460 return (ENXIO); 2461 (*bdev->d_strategy)(bp); 2462 2463 error = biowait(bp); 2464 2465 if (!error) { 2466 memcpy(clabel, bp->b_data, 2467 sizeof(RF_ComponentLabel_t)); 2468 } 2469 2470 brelse(bp, 0); 2471 return(error); 2472 } 2473 /* ARGSUSED */ 2474 int 2475 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 2476 RF_ComponentLabel_t *clabel) 2477 { 2478 struct buf *bp; 2479 const struct bdevsw *bdev; 2480 int error; 2481 2482 /* get a block of the appropriate size... */ 2483 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2484 bp->b_dev = dev; 2485 2486 /* get our ducks in a row for the write */ 2487 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2488 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2489 bp->b_flags |= B_WRITE; 2490 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2491 2492 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2493 2494 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2495 2496 bdev = bdevsw_lookup(bp->b_dev); 2497 if (bdev == NULL) 2498 return (ENXIO); 2499 (*bdev->d_strategy)(bp); 2500 error = biowait(bp); 2501 brelse(bp, 0); 2502 if (error) { 2503 #if 1 2504 printf("Failed to write RAID component info!\n"); 2505 #endif 2506 } 2507 2508 return(error); 2509 } 2510 2511 void 2512 rf_markalldirty(RF_Raid_t *raidPtr) 2513 { 2514 RF_ComponentLabel_t clabel; 2515 int sparecol; 2516 int c; 2517 int j; 2518 int scol = -1; 2519 2520 raidPtr->mod_counter++; 2521 for (c = 0; c < raidPtr->numCol; c++) { 2522 /* we don't want to touch (at all) a disk that has 2523 failed */ 2524 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2525 raidread_component_label( 2526 raidPtr->Disks[c].dev, 2527 raidPtr->raid_cinfo[c].ci_vp, 2528 &clabel); 2529 if (clabel.status == rf_ds_spared) { 2530 /* XXX do something special... 2531 but whatever you do, don't 2532 try to access it!! */ 2533 } else { 2534 raidmarkdirty( 2535 raidPtr->Disks[c].dev, 2536 raidPtr->raid_cinfo[c].ci_vp, 2537 raidPtr->mod_counter); 2538 } 2539 } 2540 } 2541 2542 for( c = 0; c < raidPtr->numSpare ; c++) { 2543 sparecol = raidPtr->numCol + c; 2544 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2545 /* 2546 2547 we claim this disk is "optimal" if it's 2548 rf_ds_used_spare, as that means it should be 2549 directly substitutable for the disk it replaced. 2550 We note that too... 2551 2552 */ 2553 2554 for(j=0;j<raidPtr->numCol;j++) { 2555 if (raidPtr->Disks[j].spareCol == sparecol) { 2556 scol = j; 2557 break; 2558 } 2559 } 2560 2561 raidread_component_label( 2562 raidPtr->Disks[sparecol].dev, 2563 raidPtr->raid_cinfo[sparecol].ci_vp, 2564 &clabel); 2565 /* make sure status is noted */ 2566 2567 raid_init_component_label(raidPtr, &clabel); 2568 2569 clabel.row = 0; 2570 clabel.column = scol; 2571 /* Note: we *don't* change status from rf_ds_used_spare 2572 to rf_ds_optimal */ 2573 /* clabel.status = rf_ds_optimal; */ 2574 2575 raidmarkdirty(raidPtr->Disks[sparecol].dev, 2576 raidPtr->raid_cinfo[sparecol].ci_vp, 2577 raidPtr->mod_counter); 2578 } 2579 } 2580 } 2581 2582 2583 void 2584 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2585 { 2586 RF_ComponentLabel_t clabel; 2587 int sparecol; 2588 int c; 2589 int j; 2590 int scol; 2591 2592 scol = -1; 2593 2594 /* XXX should do extra checks to make sure things really are clean, 2595 rather than blindly setting the clean bit... */ 2596 2597 raidPtr->mod_counter++; 2598 2599 for (c = 0; c < raidPtr->numCol; c++) { 2600 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2601 raidread_component_label( 2602 raidPtr->Disks[c].dev, 2603 raidPtr->raid_cinfo[c].ci_vp, 2604 &clabel); 2605 /* make sure status is noted */ 2606 clabel.status = rf_ds_optimal; 2607 2608 /* bump the counter */ 2609 clabel.mod_counter = raidPtr->mod_counter; 2610 2611 /* note what unit we are configured as */ 2612 clabel.last_unit = raidPtr->raidid; 2613 2614 raidwrite_component_label( 2615 raidPtr->Disks[c].dev, 2616 raidPtr->raid_cinfo[c].ci_vp, 2617 &clabel); 2618 if (final == RF_FINAL_COMPONENT_UPDATE) { 2619 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2620 raidmarkclean( 2621 raidPtr->Disks[c].dev, 2622 raidPtr->raid_cinfo[c].ci_vp, 2623 raidPtr->mod_counter); 2624 } 2625 } 2626 } 2627 /* else we don't touch it.. */ 2628 } 2629 2630 for( c = 0; c < raidPtr->numSpare ; c++) { 2631 sparecol = raidPtr->numCol + c; 2632 /* Need to ensure that the reconstruct actually completed! */ 2633 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2634 /* 2635 2636 we claim this disk is "optimal" if it's 2637 rf_ds_used_spare, as that means it should be 2638 directly substitutable for the disk it replaced. 2639 We note that too... 2640 2641 */ 2642 2643 for(j=0;j<raidPtr->numCol;j++) { 2644 if (raidPtr->Disks[j].spareCol == sparecol) { 2645 scol = j; 2646 break; 2647 } 2648 } 2649 2650 /* XXX shouldn't *really* need this... */ 2651 raidread_component_label( 2652 raidPtr->Disks[sparecol].dev, 2653 raidPtr->raid_cinfo[sparecol].ci_vp, 2654 &clabel); 2655 /* make sure status is noted */ 2656 2657 raid_init_component_label(raidPtr, &clabel); 2658 2659 clabel.mod_counter = raidPtr->mod_counter; 2660 clabel.column = scol; 2661 clabel.status = rf_ds_optimal; 2662 clabel.last_unit = raidPtr->raidid; 2663 2664 raidwrite_component_label( 2665 raidPtr->Disks[sparecol].dev, 2666 raidPtr->raid_cinfo[sparecol].ci_vp, 2667 &clabel); 2668 if (final == RF_FINAL_COMPONENT_UPDATE) { 2669 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2670 raidmarkclean( raidPtr->Disks[sparecol].dev, 2671 raidPtr->raid_cinfo[sparecol].ci_vp, 2672 raidPtr->mod_counter); 2673 } 2674 } 2675 } 2676 } 2677 } 2678 2679 void 2680 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2681 { 2682 2683 if (vp != NULL) { 2684 if (auto_configured == 1) { 2685 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2686 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2687 vput(vp); 2688 2689 } else { 2690 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2691 } 2692 } 2693 } 2694 2695 2696 void 2697 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2698 { 2699 int r,c; 2700 struct vnode *vp; 2701 int acd; 2702 2703 2704 /* We take this opportunity to close the vnodes like we should.. */ 2705 2706 for (c = 0; c < raidPtr->numCol; c++) { 2707 vp = raidPtr->raid_cinfo[c].ci_vp; 2708 acd = raidPtr->Disks[c].auto_configured; 2709 rf_close_component(raidPtr, vp, acd); 2710 raidPtr->raid_cinfo[c].ci_vp = NULL; 2711 raidPtr->Disks[c].auto_configured = 0; 2712 } 2713 2714 for (r = 0; r < raidPtr->numSpare; r++) { 2715 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2716 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2717 rf_close_component(raidPtr, vp, acd); 2718 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2719 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2720 } 2721 } 2722 2723 2724 void 2725 rf_ReconThread(struct rf_recon_req *req) 2726 { 2727 int s; 2728 RF_Raid_t *raidPtr; 2729 2730 s = splbio(); 2731 raidPtr = (RF_Raid_t *) req->raidPtr; 2732 raidPtr->recon_in_progress = 1; 2733 2734 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2735 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2736 2737 RF_Free(req, sizeof(*req)); 2738 2739 raidPtr->recon_in_progress = 0; 2740 splx(s); 2741 2742 /* That's all... */ 2743 kthread_exit(0); /* does not return */ 2744 } 2745 2746 void 2747 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2748 { 2749 int retcode; 2750 int s; 2751 2752 raidPtr->parity_rewrite_stripes_done = 0; 2753 raidPtr->parity_rewrite_in_progress = 1; 2754 s = splbio(); 2755 retcode = rf_RewriteParity(raidPtr); 2756 splx(s); 2757 if (retcode) { 2758 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2759 } else { 2760 /* set the clean bit! If we shutdown correctly, 2761 the clean bit on each component label will get 2762 set */ 2763 raidPtr->parity_good = RF_RAID_CLEAN; 2764 } 2765 raidPtr->parity_rewrite_in_progress = 0; 2766 2767 /* Anyone waiting for us to stop? If so, inform them... */ 2768 if (raidPtr->waitShutdown) { 2769 wakeup(&raidPtr->parity_rewrite_in_progress); 2770 } 2771 2772 /* That's all... */ 2773 kthread_exit(0); /* does not return */ 2774 } 2775 2776 2777 void 2778 rf_CopybackThread(RF_Raid_t *raidPtr) 2779 { 2780 int s; 2781 2782 raidPtr->copyback_in_progress = 1; 2783 s = splbio(); 2784 rf_CopybackReconstructedData(raidPtr); 2785 splx(s); 2786 raidPtr->copyback_in_progress = 0; 2787 2788 /* That's all... */ 2789 kthread_exit(0); /* does not return */ 2790 } 2791 2792 2793 void 2794 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2795 { 2796 int s; 2797 RF_Raid_t *raidPtr; 2798 2799 s = splbio(); 2800 raidPtr = req->raidPtr; 2801 raidPtr->recon_in_progress = 1; 2802 rf_ReconstructInPlace(raidPtr, req->col); 2803 RF_Free(req, sizeof(*req)); 2804 raidPtr->recon_in_progress = 0; 2805 splx(s); 2806 2807 /* That's all... */ 2808 kthread_exit(0); /* does not return */ 2809 } 2810 2811 static RF_AutoConfig_t * 2812 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2813 const char *cname, RF_SectorCount_t size) 2814 { 2815 int good_one = 0; 2816 RF_ComponentLabel_t *clabel; 2817 RF_AutoConfig_t *ac; 2818 2819 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 2820 if (clabel == NULL) { 2821 oomem: 2822 while(ac_list) { 2823 ac = ac_list; 2824 if (ac->clabel) 2825 free(ac->clabel, M_RAIDFRAME); 2826 ac_list = ac_list->next; 2827 free(ac, M_RAIDFRAME); 2828 } 2829 printf("RAID auto config: out of memory!\n"); 2830 return NULL; /* XXX probably should panic? */ 2831 } 2832 2833 if (!raidread_component_label(dev, vp, clabel)) { 2834 /* Got the label. Does it look reasonable? */ 2835 if (rf_reasonable_label(clabel) && 2836 (clabel->partitionSize <= size)) { 2837 #ifdef DEBUG 2838 printf("Component on: %s: %llu\n", 2839 cname, (unsigned long long)size); 2840 rf_print_component_label(clabel); 2841 #endif 2842 /* if it's reasonable, add it, else ignore it. */ 2843 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 2844 M_NOWAIT); 2845 if (ac == NULL) { 2846 free(clabel, M_RAIDFRAME); 2847 goto oomem; 2848 } 2849 strlcpy(ac->devname, cname, sizeof(ac->devname)); 2850 ac->dev = dev; 2851 ac->vp = vp; 2852 ac->clabel = clabel; 2853 ac->next = ac_list; 2854 ac_list = ac; 2855 good_one = 1; 2856 } 2857 } 2858 if (!good_one) { 2859 /* cleanup */ 2860 free(clabel, M_RAIDFRAME); 2861 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2862 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2863 vput(vp); 2864 } 2865 return ac_list; 2866 } 2867 2868 RF_AutoConfig_t * 2869 rf_find_raid_components(void) 2870 { 2871 struct vnode *vp; 2872 struct disklabel label; 2873 device_t dv; 2874 dev_t dev; 2875 int bmajor, bminor, wedge; 2876 int error; 2877 int i; 2878 RF_AutoConfig_t *ac_list; 2879 2880 2881 /* initialize the AutoConfig list */ 2882 ac_list = NULL; 2883 2884 /* we begin by trolling through *all* the devices on the system */ 2885 2886 for (dv = alldevs.tqh_first; dv != NULL; 2887 dv = dv->dv_list.tqe_next) { 2888 2889 /* we are only interested in disks... */ 2890 if (device_class(dv) != DV_DISK) 2891 continue; 2892 2893 /* we don't care about floppies... */ 2894 if (device_is_a(dv, "fd")) { 2895 continue; 2896 } 2897 2898 /* we don't care about CD's... */ 2899 if (device_is_a(dv, "cd")) { 2900 continue; 2901 } 2902 2903 /* we don't care about md's... */ 2904 if (device_is_a(dv, "md")) { 2905 continue; 2906 } 2907 2908 /* hdfd is the Atari/Hades floppy driver */ 2909 if (device_is_a(dv, "hdfd")) { 2910 continue; 2911 } 2912 2913 /* fdisa is the Atari/Milan floppy driver */ 2914 if (device_is_a(dv, "fdisa")) { 2915 continue; 2916 } 2917 2918 /* need to find the device_name_to_block_device_major stuff */ 2919 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 2920 2921 /* get a vnode for the raw partition of this disk */ 2922 2923 wedge = device_is_a(dv, "dk"); 2924 bminor = minor(device_unit(dv)); 2925 dev = wedge ? makedev(bmajor, bminor) : 2926 MAKEDISKDEV(bmajor, bminor, RAW_PART); 2927 if (bdevvp(dev, &vp)) 2928 panic("RAID can't alloc vnode"); 2929 2930 error = VOP_OPEN(vp, FREAD, NOCRED); 2931 2932 if (error) { 2933 /* "Who cares." Continue looking 2934 for something that exists*/ 2935 vput(vp); 2936 continue; 2937 } 2938 2939 if (wedge) { 2940 struct dkwedge_info dkw; 2941 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 2942 NOCRED); 2943 if (error) { 2944 printf("RAIDframe: can't get wedge info for " 2945 "dev %s (%d)\n", device_xname(dv), error); 2946 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2947 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2948 vput(vp); 2949 continue; 2950 } 2951 2952 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 2953 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2954 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2955 vput(vp); 2956 continue; 2957 } 2958 2959 ac_list = rf_get_component(ac_list, dev, vp, 2960 device_xname(dv), dkw.dkw_size); 2961 continue; 2962 } 2963 2964 /* Ok, the disk exists. Go get the disklabel. */ 2965 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 2966 if (error) { 2967 /* 2968 * XXX can't happen - open() would 2969 * have errored out (or faked up one) 2970 */ 2971 if (error != ENOTTY) 2972 printf("RAIDframe: can't get label for dev " 2973 "%s (%d)\n", device_xname(dv), error); 2974 } 2975 2976 /* don't need this any more. We'll allocate it again 2977 a little later if we really do... */ 2978 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2979 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2980 vput(vp); 2981 2982 if (error) 2983 continue; 2984 2985 for (i = 0; i < label.d_npartitions; i++) { 2986 char cname[sizeof(ac_list->devname)]; 2987 2988 /* We only support partitions marked as RAID */ 2989 if (label.d_partitions[i].p_fstype != FS_RAID) 2990 continue; 2991 2992 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 2993 if (bdevvp(dev, &vp)) 2994 panic("RAID can't alloc vnode"); 2995 2996 error = VOP_OPEN(vp, FREAD, NOCRED); 2997 if (error) { 2998 /* Whatever... */ 2999 vput(vp); 3000 continue; 3001 } 3002 snprintf(cname, sizeof(cname), "%s%c", 3003 device_xname(dv), 'a' + i); 3004 ac_list = rf_get_component(ac_list, dev, vp, cname, 3005 label.d_partitions[i].p_size); 3006 } 3007 } 3008 return ac_list; 3009 } 3010 3011 3012 static int 3013 rf_reasonable_label(RF_ComponentLabel_t *clabel) 3014 { 3015 3016 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 3017 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 3018 ((clabel->clean == RF_RAID_CLEAN) || 3019 (clabel->clean == RF_RAID_DIRTY)) && 3020 clabel->row >=0 && 3021 clabel->column >= 0 && 3022 clabel->num_rows > 0 && 3023 clabel->num_columns > 0 && 3024 clabel->row < clabel->num_rows && 3025 clabel->column < clabel->num_columns && 3026 clabel->blockSize > 0 && 3027 clabel->numBlocks > 0) { 3028 /* label looks reasonable enough... */ 3029 return(1); 3030 } 3031 return(0); 3032 } 3033 3034 3035 #ifdef DEBUG 3036 void 3037 rf_print_component_label(RF_ComponentLabel_t *clabel) 3038 { 3039 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3040 clabel->row, clabel->column, 3041 clabel->num_rows, clabel->num_columns); 3042 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3043 clabel->version, clabel->serial_number, 3044 clabel->mod_counter); 3045 printf(" Clean: %s Status: %d\n", 3046 clabel->clean ? "Yes" : "No", clabel->status ); 3047 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3048 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3049 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 3050 (char) clabel->parityConfig, clabel->blockSize, 3051 clabel->numBlocks); 3052 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 3053 printf(" Contains root partition: %s\n", 3054 clabel->root_partition ? "Yes" : "No" ); 3055 printf(" Last configured as: raid%d\n", clabel->last_unit ); 3056 #if 0 3057 printf(" Config order: %d\n", clabel->config_order); 3058 #endif 3059 3060 } 3061 #endif 3062 3063 RF_ConfigSet_t * 3064 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3065 { 3066 RF_AutoConfig_t *ac; 3067 RF_ConfigSet_t *config_sets; 3068 RF_ConfigSet_t *cset; 3069 RF_AutoConfig_t *ac_next; 3070 3071 3072 config_sets = NULL; 3073 3074 /* Go through the AutoConfig list, and figure out which components 3075 belong to what sets. */ 3076 ac = ac_list; 3077 while(ac!=NULL) { 3078 /* we're going to putz with ac->next, so save it here 3079 for use at the end of the loop */ 3080 ac_next = ac->next; 3081 3082 if (config_sets == NULL) { 3083 /* will need at least this one... */ 3084 config_sets = (RF_ConfigSet_t *) 3085 malloc(sizeof(RF_ConfigSet_t), 3086 M_RAIDFRAME, M_NOWAIT); 3087 if (config_sets == NULL) { 3088 panic("rf_create_auto_sets: No memory!"); 3089 } 3090 /* this one is easy :) */ 3091 config_sets->ac = ac; 3092 config_sets->next = NULL; 3093 config_sets->rootable = 0; 3094 ac->next = NULL; 3095 } else { 3096 /* which set does this component fit into? */ 3097 cset = config_sets; 3098 while(cset!=NULL) { 3099 if (rf_does_it_fit(cset, ac)) { 3100 /* looks like it matches... */ 3101 ac->next = cset->ac; 3102 cset->ac = ac; 3103 break; 3104 } 3105 cset = cset->next; 3106 } 3107 if (cset==NULL) { 3108 /* didn't find a match above... new set..*/ 3109 cset = (RF_ConfigSet_t *) 3110 malloc(sizeof(RF_ConfigSet_t), 3111 M_RAIDFRAME, M_NOWAIT); 3112 if (cset == NULL) { 3113 panic("rf_create_auto_sets: No memory!"); 3114 } 3115 cset->ac = ac; 3116 ac->next = NULL; 3117 cset->next = config_sets; 3118 cset->rootable = 0; 3119 config_sets = cset; 3120 } 3121 } 3122 ac = ac_next; 3123 } 3124 3125 3126 return(config_sets); 3127 } 3128 3129 static int 3130 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3131 { 3132 RF_ComponentLabel_t *clabel1, *clabel2; 3133 3134 /* If this one matches the *first* one in the set, that's good 3135 enough, since the other members of the set would have been 3136 through here too... */ 3137 /* note that we are not checking partitionSize here.. 3138 3139 Note that we are also not checking the mod_counters here. 3140 If everything else matches execpt the mod_counter, that's 3141 good enough for this test. We will deal with the mod_counters 3142 a little later in the autoconfiguration process. 3143 3144 (clabel1->mod_counter == clabel2->mod_counter) && 3145 3146 The reason we don't check for this is that failed disks 3147 will have lower modification counts. If those disks are 3148 not added to the set they used to belong to, then they will 3149 form their own set, which may result in 2 different sets, 3150 for example, competing to be configured at raid0, and 3151 perhaps competing to be the root filesystem set. If the 3152 wrong ones get configured, or both attempt to become /, 3153 weird behaviour and or serious lossage will occur. Thus we 3154 need to bring them into the fold here, and kick them out at 3155 a later point. 3156 3157 */ 3158 3159 clabel1 = cset->ac->clabel; 3160 clabel2 = ac->clabel; 3161 if ((clabel1->version == clabel2->version) && 3162 (clabel1->serial_number == clabel2->serial_number) && 3163 (clabel1->num_rows == clabel2->num_rows) && 3164 (clabel1->num_columns == clabel2->num_columns) && 3165 (clabel1->sectPerSU == clabel2->sectPerSU) && 3166 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3167 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3168 (clabel1->parityConfig == clabel2->parityConfig) && 3169 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3170 (clabel1->blockSize == clabel2->blockSize) && 3171 (clabel1->numBlocks == clabel2->numBlocks) && 3172 (clabel1->autoconfigure == clabel2->autoconfigure) && 3173 (clabel1->root_partition == clabel2->root_partition) && 3174 (clabel1->last_unit == clabel2->last_unit) && 3175 (clabel1->config_order == clabel2->config_order)) { 3176 /* if it get's here, it almost *has* to be a match */ 3177 } else { 3178 /* it's not consistent with somebody in the set.. 3179 punt */ 3180 return(0); 3181 } 3182 /* all was fine.. it must fit... */ 3183 return(1); 3184 } 3185 3186 int 3187 rf_have_enough_components(RF_ConfigSet_t *cset) 3188 { 3189 RF_AutoConfig_t *ac; 3190 RF_AutoConfig_t *auto_config; 3191 RF_ComponentLabel_t *clabel; 3192 int c; 3193 int num_cols; 3194 int num_missing; 3195 int mod_counter; 3196 int mod_counter_found; 3197 int even_pair_failed; 3198 char parity_type; 3199 3200 3201 /* check to see that we have enough 'live' components 3202 of this set. If so, we can configure it if necessary */ 3203 3204 num_cols = cset->ac->clabel->num_columns; 3205 parity_type = cset->ac->clabel->parityConfig; 3206 3207 /* XXX Check for duplicate components!?!?!? */ 3208 3209 /* Determine what the mod_counter is supposed to be for this set. */ 3210 3211 mod_counter_found = 0; 3212 mod_counter = 0; 3213 ac = cset->ac; 3214 while(ac!=NULL) { 3215 if (mod_counter_found==0) { 3216 mod_counter = ac->clabel->mod_counter; 3217 mod_counter_found = 1; 3218 } else { 3219 if (ac->clabel->mod_counter > mod_counter) { 3220 mod_counter = ac->clabel->mod_counter; 3221 } 3222 } 3223 ac = ac->next; 3224 } 3225 3226 num_missing = 0; 3227 auto_config = cset->ac; 3228 3229 even_pair_failed = 0; 3230 for(c=0; c<num_cols; c++) { 3231 ac = auto_config; 3232 while(ac!=NULL) { 3233 if ((ac->clabel->column == c) && 3234 (ac->clabel->mod_counter == mod_counter)) { 3235 /* it's this one... */ 3236 #ifdef DEBUG 3237 printf("Found: %s at %d\n", 3238 ac->devname,c); 3239 #endif 3240 break; 3241 } 3242 ac=ac->next; 3243 } 3244 if (ac==NULL) { 3245 /* Didn't find one here! */ 3246 /* special case for RAID 1, especially 3247 where there are more than 2 3248 components (where RAIDframe treats 3249 things a little differently :( ) */ 3250 if (parity_type == '1') { 3251 if (c%2 == 0) { /* even component */ 3252 even_pair_failed = 1; 3253 } else { /* odd component. If 3254 we're failed, and 3255 so is the even 3256 component, it's 3257 "Good Night, Charlie" */ 3258 if (even_pair_failed == 1) { 3259 return(0); 3260 } 3261 } 3262 } else { 3263 /* normal accounting */ 3264 num_missing++; 3265 } 3266 } 3267 if ((parity_type == '1') && (c%2 == 1)) { 3268 /* Just did an even component, and we didn't 3269 bail.. reset the even_pair_failed flag, 3270 and go on to the next component.... */ 3271 even_pair_failed = 0; 3272 } 3273 } 3274 3275 clabel = cset->ac->clabel; 3276 3277 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3278 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3279 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3280 /* XXX this needs to be made *much* more general */ 3281 /* Too many failures */ 3282 return(0); 3283 } 3284 /* otherwise, all is well, and we've got enough to take a kick 3285 at autoconfiguring this set */ 3286 return(1); 3287 } 3288 3289 void 3290 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3291 RF_Raid_t *raidPtr) 3292 { 3293 RF_ComponentLabel_t *clabel; 3294 int i; 3295 3296 clabel = ac->clabel; 3297 3298 /* 1. Fill in the common stuff */ 3299 config->numRow = clabel->num_rows = 1; 3300 config->numCol = clabel->num_columns; 3301 config->numSpare = 0; /* XXX should this be set here? */ 3302 config->sectPerSU = clabel->sectPerSU; 3303 config->SUsPerPU = clabel->SUsPerPU; 3304 config->SUsPerRU = clabel->SUsPerRU; 3305 config->parityConfig = clabel->parityConfig; 3306 /* XXX... */ 3307 strcpy(config->diskQueueType,"fifo"); 3308 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3309 config->layoutSpecificSize = 0; /* XXX ?? */ 3310 3311 while(ac!=NULL) { 3312 /* row/col values will be in range due to the checks 3313 in reasonable_label() */ 3314 strcpy(config->devnames[0][ac->clabel->column], 3315 ac->devname); 3316 ac = ac->next; 3317 } 3318 3319 for(i=0;i<RF_MAXDBGV;i++) { 3320 config->debugVars[i][0] = 0; 3321 } 3322 } 3323 3324 int 3325 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3326 { 3327 RF_ComponentLabel_t clabel; 3328 struct vnode *vp; 3329 dev_t dev; 3330 int column; 3331 int sparecol; 3332 3333 raidPtr->autoconfigure = new_value; 3334 3335 for(column=0; column<raidPtr->numCol; column++) { 3336 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3337 dev = raidPtr->Disks[column].dev; 3338 vp = raidPtr->raid_cinfo[column].ci_vp; 3339 raidread_component_label(dev, vp, &clabel); 3340 clabel.autoconfigure = new_value; 3341 raidwrite_component_label(dev, vp, &clabel); 3342 } 3343 } 3344 for(column = 0; column < raidPtr->numSpare ; column++) { 3345 sparecol = raidPtr->numCol + column; 3346 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3347 dev = raidPtr->Disks[sparecol].dev; 3348 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3349 raidread_component_label(dev, vp, &clabel); 3350 clabel.autoconfigure = new_value; 3351 raidwrite_component_label(dev, vp, &clabel); 3352 } 3353 } 3354 return(new_value); 3355 } 3356 3357 int 3358 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3359 { 3360 RF_ComponentLabel_t clabel; 3361 struct vnode *vp; 3362 dev_t dev; 3363 int column; 3364 int sparecol; 3365 3366 raidPtr->root_partition = new_value; 3367 for(column=0; column<raidPtr->numCol; column++) { 3368 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3369 dev = raidPtr->Disks[column].dev; 3370 vp = raidPtr->raid_cinfo[column].ci_vp; 3371 raidread_component_label(dev, vp, &clabel); 3372 clabel.root_partition = new_value; 3373 raidwrite_component_label(dev, vp, &clabel); 3374 } 3375 } 3376 for(column = 0; column < raidPtr->numSpare ; column++) { 3377 sparecol = raidPtr->numCol + column; 3378 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3379 dev = raidPtr->Disks[sparecol].dev; 3380 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3381 raidread_component_label(dev, vp, &clabel); 3382 clabel.root_partition = new_value; 3383 raidwrite_component_label(dev, vp, &clabel); 3384 } 3385 } 3386 return(new_value); 3387 } 3388 3389 void 3390 rf_release_all_vps(RF_ConfigSet_t *cset) 3391 { 3392 RF_AutoConfig_t *ac; 3393 3394 ac = cset->ac; 3395 while(ac!=NULL) { 3396 /* Close the vp, and give it back */ 3397 if (ac->vp) { 3398 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3399 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3400 vput(ac->vp); 3401 ac->vp = NULL; 3402 } 3403 ac = ac->next; 3404 } 3405 } 3406 3407 3408 void 3409 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3410 { 3411 RF_AutoConfig_t *ac; 3412 RF_AutoConfig_t *next_ac; 3413 3414 ac = cset->ac; 3415 while(ac!=NULL) { 3416 next_ac = ac->next; 3417 /* nuke the label */ 3418 free(ac->clabel, M_RAIDFRAME); 3419 /* cleanup the config structure */ 3420 free(ac, M_RAIDFRAME); 3421 /* "next.." */ 3422 ac = next_ac; 3423 } 3424 /* and, finally, nuke the config set */ 3425 free(cset, M_RAIDFRAME); 3426 } 3427 3428 3429 void 3430 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3431 { 3432 /* current version number */ 3433 clabel->version = RF_COMPONENT_LABEL_VERSION; 3434 clabel->serial_number = raidPtr->serial_number; 3435 clabel->mod_counter = raidPtr->mod_counter; 3436 clabel->num_rows = 1; 3437 clabel->num_columns = raidPtr->numCol; 3438 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3439 clabel->status = rf_ds_optimal; /* "It's good!" */ 3440 3441 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3442 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3443 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3444 3445 clabel->blockSize = raidPtr->bytesPerSector; 3446 clabel->numBlocks = raidPtr->sectorsPerDisk; 3447 3448 /* XXX not portable */ 3449 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3450 clabel->maxOutstanding = raidPtr->maxOutstanding; 3451 clabel->autoconfigure = raidPtr->autoconfigure; 3452 clabel->root_partition = raidPtr->root_partition; 3453 clabel->last_unit = raidPtr->raidid; 3454 clabel->config_order = raidPtr->config_order; 3455 } 3456 3457 int 3458 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3459 { 3460 RF_Raid_t *raidPtr; 3461 RF_Config_t *config; 3462 int raidID; 3463 int retcode; 3464 3465 #ifdef DEBUG 3466 printf("RAID autoconfigure\n"); 3467 #endif 3468 3469 retcode = 0; 3470 *unit = -1; 3471 3472 /* 1. Create a config structure */ 3473 3474 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3475 M_RAIDFRAME, 3476 M_NOWAIT); 3477 if (config==NULL) { 3478 printf("Out of mem!?!?\n"); 3479 /* XXX do something more intelligent here. */ 3480 return(1); 3481 } 3482 3483 memset(config, 0, sizeof(RF_Config_t)); 3484 3485 /* 3486 2. Figure out what RAID ID this one is supposed to live at 3487 See if we can get the same RAID dev that it was configured 3488 on last time.. 3489 */ 3490 3491 raidID = cset->ac->clabel->last_unit; 3492 if ((raidID < 0) || (raidID >= numraid)) { 3493 /* let's not wander off into lala land. */ 3494 raidID = numraid - 1; 3495 } 3496 if (raidPtrs[raidID]->valid != 0) { 3497 3498 /* 3499 Nope... Go looking for an alternative... 3500 Start high so we don't immediately use raid0 if that's 3501 not taken. 3502 */ 3503 3504 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3505 if (raidPtrs[raidID]->valid == 0) { 3506 /* can use this one! */ 3507 break; 3508 } 3509 } 3510 } 3511 3512 if (raidID < 0) { 3513 /* punt... */ 3514 printf("Unable to auto configure this set!\n"); 3515 printf("(Out of RAID devs!)\n"); 3516 free(config, M_RAIDFRAME); 3517 return(1); 3518 } 3519 3520 #ifdef DEBUG 3521 printf("Configuring raid%d:\n",raidID); 3522 #endif 3523 3524 raidPtr = raidPtrs[raidID]; 3525 3526 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3527 raidPtr->raidid = raidID; 3528 raidPtr->openings = RAIDOUTSTANDING; 3529 3530 /* 3. Build the configuration structure */ 3531 rf_create_configuration(cset->ac, config, raidPtr); 3532 3533 /* 4. Do the configuration */ 3534 retcode = rf_Configure(raidPtr, config, cset->ac); 3535 3536 if (retcode == 0) { 3537 3538 raidinit(raidPtrs[raidID]); 3539 3540 rf_markalldirty(raidPtrs[raidID]); 3541 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3542 if (cset->ac->clabel->root_partition==1) { 3543 /* everything configured just fine. Make a note 3544 that this set is eligible to be root. */ 3545 cset->rootable = 1; 3546 /* XXX do this here? */ 3547 raidPtrs[raidID]->root_partition = 1; 3548 } 3549 } 3550 3551 /* 5. Cleanup */ 3552 free(config, M_RAIDFRAME); 3553 3554 *unit = raidID; 3555 return(retcode); 3556 } 3557 3558 void 3559 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3560 { 3561 struct buf *bp; 3562 3563 bp = (struct buf *)desc->bp; 3564 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3565 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3566 } 3567 3568 void 3569 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3570 size_t xmin, size_t xmax) 3571 { 3572 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3573 pool_sethiwat(p, xmax); 3574 pool_prime(p, xmin); 3575 pool_setlowat(p, xmin); 3576 } 3577 3578 /* 3579 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see 3580 * if there is IO pending and if that IO could possibly be done for a 3581 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3582 * otherwise. 3583 * 3584 */ 3585 3586 int 3587 rf_buf_queue_check(int raidid) 3588 { 3589 if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) && 3590 raidPtrs[raidid]->openings > 0) { 3591 /* there is work to do */ 3592 return 0; 3593 } 3594 /* default is nothing to do */ 3595 return 1; 3596 } 3597 3598 int 3599 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr) 3600 { 3601 struct partinfo dpart; 3602 struct dkwedge_info dkw; 3603 int error; 3604 3605 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred); 3606 if (error == 0) { 3607 diskPtr->blockSize = dpart.disklab->d_secsize; 3608 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 3609 diskPtr->partitionSize = dpart.part->p_size; 3610 return 0; 3611 } 3612 3613 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred); 3614 if (error == 0) { 3615 diskPtr->blockSize = 512; /* XXX */ 3616 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors; 3617 diskPtr->partitionSize = dkw.dkw_size; 3618 return 0; 3619 } 3620 return error; 3621 } 3622 3623 static int 3624 raid_match(device_t self, cfdata_t cfdata, void *aux) 3625 { 3626 return 1; 3627 } 3628 3629 static void 3630 raid_attach(device_t parent, device_t self, void *aux) 3631 { 3632 3633 } 3634 3635 3636 static int 3637 raid_detach(device_t self, int flags) 3638 { 3639 int error; 3640 struct raid_softc *rs = &raid_softc[device_unit(self)]; 3641 3642 if ((error = raidlock(rs)) != 0) 3643 return (error); 3644 3645 error = raid_detach_unlocked(rs); 3646 3647 raidunlock(rs); 3648 3649 return error; 3650 } 3651 3652 static void 3653 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr) 3654 { 3655 prop_dictionary_t disk_info, odisk_info, geom; 3656 disk_info = prop_dictionary_create(); 3657 geom = prop_dictionary_create(); 3658 prop_dictionary_set_uint64(geom, "sectors-per-unit", 3659 raidPtr->totalSectors); 3660 prop_dictionary_set_uint32(geom, "sector-size", 3661 raidPtr->bytesPerSector); 3662 3663 prop_dictionary_set_uint16(geom, "sectors-per-track", 3664 raidPtr->Layout.dataSectorsPerStripe); 3665 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 3666 4 * raidPtr->numCol); 3667 3668 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 3669 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe * 3670 (4 * raidPtr->numCol))); 3671 3672 prop_dictionary_set(disk_info, "geometry", geom); 3673 prop_object_release(geom); 3674 prop_dictionary_set(device_properties(rs->sc_dev), 3675 "disk-info", disk_info); 3676 odisk_info = rs->sc_dkdev.dk_info; 3677 rs->sc_dkdev.dk_info = disk_info; 3678 if (odisk_info) 3679 prop_object_release(odisk_info); 3680 } 3681 3682 /* 3683 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3684 * We end up returning whatever error was returned by the first cache flush 3685 * that fails. 3686 */ 3687 3688 static int 3689 rf_sync_component_caches(RF_Raid_t *raidPtr) 3690 { 3691 int c, sparecol; 3692 int e,error; 3693 int force = 1; 3694 3695 error = 0; 3696 for (c = 0; c < raidPtr->numCol; c++) { 3697 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3698 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3699 &force, FWRITE, NOCRED); 3700 if (e) { 3701 if (e != ENODEV) 3702 printf("raid%d: cache flush to component %s failed.\n", 3703 raidPtr->raidid, raidPtr->Disks[c].devname); 3704 if (error == 0) { 3705 error = e; 3706 } 3707 } 3708 } 3709 } 3710 3711 for( c = 0; c < raidPtr->numSpare ; c++) { 3712 sparecol = raidPtr->numCol + c; 3713 /* Need to ensure that the reconstruct actually completed! */ 3714 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3715 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3716 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3717 if (e) { 3718 if (e != ENODEV) 3719 printf("raid%d: cache flush to component %s failed.\n", 3720 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3721 if (error == 0) { 3722 error = e; 3723 } 3724 } 3725 } 3726 } 3727 return error; 3728 } 3729