1 /* $NetBSD: rf_netbsdkintf.c,v 1.246 2008/04/28 20:23:56 martin Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 1990, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * This code is derived from software contributed to Berkeley by 36 * the Systems Programming Group of the University of Utah Computer 37 * Science Department. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 64 * 65 * @(#)cd.c 8.2 (Berkeley) 11/16/93 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * 71 * This code is derived from software contributed to Berkeley by 72 * the Systems Programming Group of the University of Utah Computer 73 * Science Department. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 3. All advertising materials mentioning features or use of this software 84 * must display the following acknowledgement: 85 * This product includes software developed by the University of 86 * California, Berkeley and its contributors. 87 * 4. Neither the name of the University nor the names of its contributors 88 * may be used to endorse or promote products derived from this software 89 * without specific prior written permission. 90 * 91 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 94 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 95 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 96 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 97 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 98 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 99 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 100 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 101 * SUCH DAMAGE. 102 * 103 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 104 * 105 * @(#)cd.c 8.2 (Berkeley) 11/16/93 106 */ 107 108 /* 109 * Copyright (c) 1995 Carnegie-Mellon University. 110 * All rights reserved. 111 * 112 * Authors: Mark Holland, Jim Zelenka 113 * 114 * Permission to use, copy, modify and distribute this software and 115 * its documentation is hereby granted, provided that both the copyright 116 * notice and this permission notice appear in all copies of the 117 * software, derivative works or modified versions, and any portions 118 * thereof, and that both notices appear in supporting documentation. 119 * 120 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 121 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 122 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 123 * 124 * Carnegie Mellon requests users of this software to return to 125 * 126 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 127 * School of Computer Science 128 * Carnegie Mellon University 129 * Pittsburgh PA 15213-3890 130 * 131 * any improvements or extensions that they make and grant Carnegie the 132 * rights to redistribute these changes. 133 */ 134 135 /*********************************************************** 136 * 137 * rf_kintf.c -- the kernel interface routines for RAIDframe 138 * 139 ***********************************************************/ 140 141 #include <sys/cdefs.h> 142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.246 2008/04/28 20:23:56 martin Exp $"); 143 144 #include <sys/param.h> 145 #include <sys/errno.h> 146 #include <sys/pool.h> 147 #include <sys/proc.h> 148 #include <sys/queue.h> 149 #include <sys/disk.h> 150 #include <sys/device.h> 151 #include <sys/stat.h> 152 #include <sys/ioctl.h> 153 #include <sys/fcntl.h> 154 #include <sys/systm.h> 155 #include <sys/vnode.h> 156 #include <sys/disklabel.h> 157 #include <sys/conf.h> 158 #include <sys/buf.h> 159 #include <sys/bufq.h> 160 #include <sys/user.h> 161 #include <sys/reboot.h> 162 #include <sys/kauth.h> 163 164 #include <prop/proplib.h> 165 166 #include <dev/raidframe/raidframevar.h> 167 #include <dev/raidframe/raidframeio.h> 168 #include "raid.h" 169 #include "opt_raid_autoconfig.h" 170 #include "rf_raid.h" 171 #include "rf_copyback.h" 172 #include "rf_dag.h" 173 #include "rf_dagflags.h" 174 #include "rf_desc.h" 175 #include "rf_diskqueue.h" 176 #include "rf_etimer.h" 177 #include "rf_general.h" 178 #include "rf_kintf.h" 179 #include "rf_options.h" 180 #include "rf_driver.h" 181 #include "rf_parityscan.h" 182 #include "rf_threadstuff.h" 183 184 #ifdef DEBUG 185 int rf_kdebug_level = 0; 186 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 187 #else /* DEBUG */ 188 #define db1_printf(a) { } 189 #endif /* DEBUG */ 190 191 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 192 193 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 194 195 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 196 * spare table */ 197 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 198 * installation process */ 199 200 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 201 202 /* prototypes */ 203 static void KernelWakeupFunc(struct buf *); 204 static void InitBP(struct buf *, struct vnode *, unsigned, 205 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 206 void *, int, struct proc *); 207 static void raidinit(RF_Raid_t *); 208 209 void raidattach(int); 210 static int raid_match(struct device *, struct cfdata *, void *); 211 static void raid_attach(struct device *, struct device *, void *); 212 static int raid_detach(struct device *, int); 213 214 dev_type_open(raidopen); 215 dev_type_close(raidclose); 216 dev_type_read(raidread); 217 dev_type_write(raidwrite); 218 dev_type_ioctl(raidioctl); 219 dev_type_strategy(raidstrategy); 220 dev_type_dump(raiddump); 221 dev_type_size(raidsize); 222 223 const struct bdevsw raid_bdevsw = { 224 raidopen, raidclose, raidstrategy, raidioctl, 225 raiddump, raidsize, D_DISK 226 }; 227 228 const struct cdevsw raid_cdevsw = { 229 raidopen, raidclose, raidread, raidwrite, raidioctl, 230 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 231 }; 232 233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 234 235 /* XXX Not sure if the following should be replacing the raidPtrs above, 236 or if it should be used in conjunction with that... 237 */ 238 239 struct raid_softc { 240 struct device *sc_dev; 241 int sc_flags; /* flags */ 242 int sc_cflags; /* configuration flags */ 243 uint64_t sc_size; /* size of the raid device */ 244 char sc_xname[20]; /* XXX external name */ 245 struct disk sc_dkdev; /* generic disk device info */ 246 struct bufq_state *buf_queue; /* used for the device queue */ 247 }; 248 /* sc_flags */ 249 #define RAIDF_INITED 0x01 /* unit has been initialized */ 250 #define RAIDF_WLABEL 0x02 /* label area is writable */ 251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 253 #define RAIDF_LOCKED 0x80 /* unit is locked */ 254 255 #define raidunit(x) DISKUNIT(x) 256 int numraid = 0; 257 258 extern struct cfdriver raid_cd; 259 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc), 260 raid_match, raid_attach, raid_detach, NULL); 261 262 /* 263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 264 * Be aware that large numbers can allow the driver to consume a lot of 265 * kernel memory, especially on writes, and in degraded mode reads. 266 * 267 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 268 * a single 64K write will typically require 64K for the old data, 269 * 64K for the old parity, and 64K for the new parity, for a total 270 * of 192K (if the parity buffer is not re-used immediately). 271 * Even it if is used immediately, that's still 128K, which when multiplied 272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 273 * 274 * Now in degraded mode, for example, a 64K read on the above setup may 275 * require data reconstruction, which will require *all* of the 4 remaining 276 * disks to participate -- 4 * 32K/disk == 128K again. 277 */ 278 279 #ifndef RAIDOUTSTANDING 280 #define RAIDOUTSTANDING 6 281 #endif 282 283 #define RAIDLABELDEV(dev) \ 284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 285 286 /* declared here, and made public, for the benefit of KVM stuff.. */ 287 struct raid_softc *raid_softc; 288 289 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 290 struct disklabel *); 291 static void raidgetdisklabel(dev_t); 292 static void raidmakedisklabel(struct raid_softc *); 293 294 static int raidlock(struct raid_softc *); 295 static void raidunlock(struct raid_softc *); 296 297 static void rf_markalldirty(RF_Raid_t *); 298 static void rf_set_properties(struct raid_softc *, RF_Raid_t *); 299 300 void rf_ReconThread(struct rf_recon_req *); 301 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 302 void rf_CopybackThread(RF_Raid_t *raidPtr); 303 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 304 int rf_autoconfig(struct device *self); 305 void rf_buildroothack(RF_ConfigSet_t *); 306 307 RF_AutoConfig_t *rf_find_raid_components(void); 308 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 310 static int rf_reasonable_label(RF_ComponentLabel_t *); 311 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 312 int rf_set_autoconfig(RF_Raid_t *, int); 313 int rf_set_rootpartition(RF_Raid_t *, int); 314 void rf_release_all_vps(RF_ConfigSet_t *); 315 void rf_cleanup_config_set(RF_ConfigSet_t *); 316 int rf_have_enough_components(RF_ConfigSet_t *); 317 int rf_auto_config_set(RF_ConfigSet_t *, int *); 318 319 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 320 allow autoconfig to take place. 321 Note that this is overridden by having 322 RAID_AUTOCONFIG as an option in the 323 kernel config file. */ 324 325 struct RF_Pools_s rf_pools; 326 327 void 328 raidattach(int num) 329 { 330 int raidID; 331 int i, rc; 332 333 #ifdef DEBUG 334 printf("raidattach: Asked for %d units\n", num); 335 #endif 336 337 if (num <= 0) { 338 #ifdef DIAGNOSTIC 339 panic("raidattach: count <= 0"); 340 #endif 341 return; 342 } 343 /* This is where all the initialization stuff gets done. */ 344 345 numraid = num; 346 347 /* Make some space for requested number of units... */ 348 349 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 350 if (raidPtrs == NULL) { 351 panic("raidPtrs is NULL!!"); 352 } 353 354 rf_mutex_init(&rf_sparet_wait_mutex); 355 356 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 357 358 for (i = 0; i < num; i++) 359 raidPtrs[i] = NULL; 360 rc = rf_BootRaidframe(); 361 if (rc == 0) 362 aprint_normal("Kernelized RAIDframe activated\n"); 363 else 364 panic("Serious error booting RAID!!"); 365 366 /* put together some datastructures like the CCD device does.. This 367 * lets us lock the device and what-not when it gets opened. */ 368 369 raid_softc = (struct raid_softc *) 370 malloc(num * sizeof(struct raid_softc), 371 M_RAIDFRAME, M_NOWAIT); 372 if (raid_softc == NULL) { 373 aprint_error("WARNING: no memory for RAIDframe driver\n"); 374 return; 375 } 376 377 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 378 379 for (raidID = 0; raidID < num; raidID++) { 380 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0); 381 382 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 383 (RF_Raid_t *)); 384 if (raidPtrs[raidID] == NULL) { 385 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID); 386 numraid = raidID; 387 return; 388 } 389 } 390 391 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 392 aprint_error("raidattach: config_cfattach_attach failed?\n"); 393 } 394 395 #ifdef RAID_AUTOCONFIG 396 raidautoconfig = 1; 397 #endif 398 399 /* 400 * Register a finalizer which will be used to auto-config RAID 401 * sets once all real hardware devices have been found. 402 */ 403 if (config_finalize_register(NULL, rf_autoconfig) != 0) 404 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 405 } 406 407 int 408 rf_autoconfig(struct device *self) 409 { 410 RF_AutoConfig_t *ac_list; 411 RF_ConfigSet_t *config_sets; 412 413 if (raidautoconfig == 0) 414 return (0); 415 416 /* XXX This code can only be run once. */ 417 raidautoconfig = 0; 418 419 /* 1. locate all RAID components on the system */ 420 #ifdef DEBUG 421 printf("Searching for RAID components...\n"); 422 #endif 423 ac_list = rf_find_raid_components(); 424 425 /* 2. Sort them into their respective sets. */ 426 config_sets = rf_create_auto_sets(ac_list); 427 428 /* 429 * 3. Evaluate each set andconfigure the valid ones. 430 * This gets done in rf_buildroothack(). 431 */ 432 rf_buildroothack(config_sets); 433 434 return 1; 435 } 436 437 void 438 rf_buildroothack(RF_ConfigSet_t *config_sets) 439 { 440 RF_ConfigSet_t *cset; 441 RF_ConfigSet_t *next_cset; 442 int retcode; 443 int raidID; 444 int rootID; 445 int col; 446 int num_root; 447 char *devname; 448 449 rootID = 0; 450 num_root = 0; 451 cset = config_sets; 452 while(cset != NULL ) { 453 next_cset = cset->next; 454 if (rf_have_enough_components(cset) && 455 cset->ac->clabel->autoconfigure==1) { 456 retcode = rf_auto_config_set(cset,&raidID); 457 if (!retcode) { 458 #ifdef DEBUG 459 printf("raid%d: configured ok\n", raidID); 460 #endif 461 if (cset->rootable) { 462 rootID = raidID; 463 num_root++; 464 } 465 } else { 466 /* The autoconfig didn't work :( */ 467 #ifdef DEBUG 468 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 469 #endif 470 rf_release_all_vps(cset); 471 } 472 } else { 473 /* we're not autoconfiguring this set... 474 release the associated resources */ 475 rf_release_all_vps(cset); 476 } 477 /* cleanup */ 478 rf_cleanup_config_set(cset); 479 cset = next_cset; 480 } 481 482 /* if the user has specified what the root device should be 483 then we don't touch booted_device or boothowto... */ 484 485 if (rootspec != NULL) 486 return; 487 488 /* we found something bootable... */ 489 490 if (num_root == 1) { 491 booted_device = raid_softc[rootID].sc_dev; 492 } else if (num_root > 1) { 493 494 /* 495 * Maybe the MD code can help. If it cannot, then 496 * setroot() will discover that we have no 497 * booted_device and will ask the user if nothing was 498 * hardwired in the kernel config file 499 */ 500 501 if (booted_device == NULL) 502 cpu_rootconf(); 503 if (booted_device == NULL) 504 return; 505 506 num_root = 0; 507 for (raidID = 0; raidID < numraid; raidID++) { 508 if (raidPtrs[raidID]->valid == 0) 509 continue; 510 511 if (raidPtrs[raidID]->root_partition == 0) 512 continue; 513 514 for (col = 0; col < raidPtrs[raidID]->numCol; col++) { 515 devname = raidPtrs[raidID]->Disks[col].devname; 516 devname += sizeof("/dev/") - 1; 517 if (strncmp(devname, device_xname(booted_device), 518 strlen(device_xname(booted_device))) != 0) 519 continue; 520 #ifdef DEBUG 521 printf("raid%d includes boot device %s\n", 522 raidID, devname); 523 #endif 524 num_root++; 525 rootID = raidID; 526 } 527 } 528 529 if (num_root == 1) { 530 booted_device = raid_softc[rootID].sc_dev; 531 } else { 532 /* we can't guess.. require the user to answer... */ 533 boothowto |= RB_ASKNAME; 534 } 535 } 536 } 537 538 539 int 540 raidsize(dev_t dev) 541 { 542 struct raid_softc *rs; 543 struct disklabel *lp; 544 int part, unit, omask, size; 545 546 unit = raidunit(dev); 547 if (unit >= numraid) 548 return (-1); 549 rs = &raid_softc[unit]; 550 551 if ((rs->sc_flags & RAIDF_INITED) == 0) 552 return (-1); 553 554 part = DISKPART(dev); 555 omask = rs->sc_dkdev.dk_openmask & (1 << part); 556 lp = rs->sc_dkdev.dk_label; 557 558 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 559 return (-1); 560 561 if (lp->d_partitions[part].p_fstype != FS_SWAP) 562 size = -1; 563 else 564 size = lp->d_partitions[part].p_size * 565 (lp->d_secsize / DEV_BSIZE); 566 567 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 568 return (-1); 569 570 return (size); 571 572 } 573 574 int 575 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 576 { 577 int unit = raidunit(dev); 578 struct raid_softc *rs; 579 const struct bdevsw *bdev; 580 struct disklabel *lp; 581 RF_Raid_t *raidPtr; 582 daddr_t offset; 583 int part, c, sparecol, j, scol, dumpto; 584 int error = 0; 585 586 if (unit >= numraid) 587 return (ENXIO); 588 589 rs = &raid_softc[unit]; 590 raidPtr = raidPtrs[unit]; 591 592 if ((rs->sc_flags & RAIDF_INITED) == 0) 593 return ENXIO; 594 595 /* we only support dumping to RAID 1 sets */ 596 if (raidPtr->Layout.numDataCol != 1 || 597 raidPtr->Layout.numParityCol != 1) 598 return EINVAL; 599 600 601 if ((error = raidlock(rs)) != 0) 602 return error; 603 604 if (size % DEV_BSIZE != 0) { 605 error = EINVAL; 606 goto out; 607 } 608 609 if (blkno + size / DEV_BSIZE > rs->sc_size) { 610 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 611 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 612 size / DEV_BSIZE, rs->sc_size); 613 error = EINVAL; 614 goto out; 615 } 616 617 part = DISKPART(dev); 618 lp = rs->sc_dkdev.dk_label; 619 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 620 621 /* figure out what device is alive.. */ 622 623 /* 624 Look for a component to dump to. The preference for the 625 component to dump to is as follows: 626 1) the master 627 2) a used_spare of the master 628 3) the slave 629 4) a used_spare of the slave 630 */ 631 632 dumpto = -1; 633 for (c = 0; c < raidPtr->numCol; c++) { 634 if (raidPtr->Disks[c].status == rf_ds_optimal) { 635 /* this might be the one */ 636 dumpto = c; 637 break; 638 } 639 } 640 641 /* 642 At this point we have possibly selected a live master or a 643 live slave. We now check to see if there is a spared 644 master (or a spared slave), if we didn't find a live master 645 or a live slave. 646 */ 647 648 for (c = 0; c < raidPtr->numSpare; c++) { 649 sparecol = raidPtr->numCol + c; 650 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 651 /* How about this one? */ 652 scol = -1; 653 for(j=0;j<raidPtr->numCol;j++) { 654 if (raidPtr->Disks[j].spareCol == sparecol) { 655 scol = j; 656 break; 657 } 658 } 659 if (scol == 0) { 660 /* 661 We must have found a spared master! 662 We'll take that over anything else 663 found so far. (We couldn't have 664 found a real master before, since 665 this is a used spare, and it's 666 saying that it's replacing the 667 master.) On reboot (with 668 autoconfiguration turned on) 669 sparecol will become the 1st 670 component (component0) of this set. 671 */ 672 dumpto = sparecol; 673 break; 674 } else if (scol != -1) { 675 /* 676 Must be a spared slave. We'll dump 677 to that if we havn't found anything 678 else so far. 679 */ 680 if (dumpto == -1) 681 dumpto = sparecol; 682 } 683 } 684 } 685 686 if (dumpto == -1) { 687 /* we couldn't find any live components to dump to!?!? 688 */ 689 error = EINVAL; 690 goto out; 691 } 692 693 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 694 695 /* 696 Note that blkno is relative to this particular partition. 697 By adding the offset of this partition in the RAID 698 set, and also adding RF_PROTECTED_SECTORS, we get a 699 value that is relative to the partition used for the 700 underlying component. 701 */ 702 703 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 704 blkno + offset, va, size); 705 706 out: 707 raidunlock(rs); 708 709 return error; 710 } 711 /* ARGSUSED */ 712 int 713 raidopen(dev_t dev, int flags, int fmt, 714 struct lwp *l) 715 { 716 int unit = raidunit(dev); 717 struct raid_softc *rs; 718 struct disklabel *lp; 719 int part, pmask; 720 int error = 0; 721 722 if (unit >= numraid) 723 return (ENXIO); 724 rs = &raid_softc[unit]; 725 726 if ((error = raidlock(rs)) != 0) 727 return (error); 728 lp = rs->sc_dkdev.dk_label; 729 730 part = DISKPART(dev); 731 732 /* 733 * If there are wedges, and this is not RAW_PART, then we 734 * need to fail. 735 */ 736 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 737 error = EBUSY; 738 goto bad; 739 } 740 pmask = (1 << part); 741 742 if ((rs->sc_flags & RAIDF_INITED) && 743 (rs->sc_dkdev.dk_openmask == 0)) 744 raidgetdisklabel(dev); 745 746 /* make sure that this partition exists */ 747 748 if (part != RAW_PART) { 749 if (((rs->sc_flags & RAIDF_INITED) == 0) || 750 ((part >= lp->d_npartitions) || 751 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 752 error = ENXIO; 753 goto bad; 754 } 755 } 756 /* Prevent this unit from being unconfigured while open. */ 757 switch (fmt) { 758 case S_IFCHR: 759 rs->sc_dkdev.dk_copenmask |= pmask; 760 break; 761 762 case S_IFBLK: 763 rs->sc_dkdev.dk_bopenmask |= pmask; 764 break; 765 } 766 767 if ((rs->sc_dkdev.dk_openmask == 0) && 768 ((rs->sc_flags & RAIDF_INITED) != 0)) { 769 /* First one... mark things as dirty... Note that we *MUST* 770 have done a configure before this. I DO NOT WANT TO BE 771 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 772 THAT THEY BELONG TOGETHER!!!!! */ 773 /* XXX should check to see if we're only open for reading 774 here... If so, we needn't do this, but then need some 775 other way of keeping track of what's happened.. */ 776 777 rf_markalldirty( raidPtrs[unit] ); 778 } 779 780 781 rs->sc_dkdev.dk_openmask = 782 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 783 784 bad: 785 raidunlock(rs); 786 787 return (error); 788 789 790 } 791 /* ARGSUSED */ 792 int 793 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 794 { 795 int unit = raidunit(dev); 796 struct cfdata *cf; 797 struct raid_softc *rs; 798 int error = 0; 799 int part; 800 801 if (unit >= numraid) 802 return (ENXIO); 803 rs = &raid_softc[unit]; 804 805 if ((error = raidlock(rs)) != 0) 806 return (error); 807 808 part = DISKPART(dev); 809 810 /* ...that much closer to allowing unconfiguration... */ 811 switch (fmt) { 812 case S_IFCHR: 813 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 814 break; 815 816 case S_IFBLK: 817 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 818 break; 819 } 820 rs->sc_dkdev.dk_openmask = 821 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 822 823 if ((rs->sc_dkdev.dk_openmask == 0) && 824 ((rs->sc_flags & RAIDF_INITED) != 0)) { 825 /* Last one... device is not unconfigured yet. 826 Device shutdown has taken care of setting the 827 clean bits if RAIDF_INITED is not set 828 mark things as clean... */ 829 830 rf_update_component_labels(raidPtrs[unit], 831 RF_FINAL_COMPONENT_UPDATE); 832 if (doing_shutdown) { 833 /* last one, and we're going down, so 834 lights out for this RAID set too. */ 835 error = rf_Shutdown(raidPtrs[unit]); 836 837 /* It's no longer initialized... */ 838 rs->sc_flags &= ~RAIDF_INITED; 839 840 /* detach the device */ 841 842 cf = device_cfdata(rs->sc_dev); 843 error = config_detach(rs->sc_dev, DETACH_QUIET); 844 free(cf, M_RAIDFRAME); 845 846 /* Detach the disk. */ 847 disk_detach(&rs->sc_dkdev); 848 disk_destroy(&rs->sc_dkdev); 849 } 850 } 851 852 raidunlock(rs); 853 return (0); 854 855 } 856 857 void 858 raidstrategy(struct buf *bp) 859 { 860 int s; 861 862 unsigned int raidID = raidunit(bp->b_dev); 863 RF_Raid_t *raidPtr; 864 struct raid_softc *rs = &raid_softc[raidID]; 865 int wlabel; 866 867 if ((rs->sc_flags & RAIDF_INITED) ==0) { 868 bp->b_error = ENXIO; 869 goto done; 870 } 871 if (raidID >= numraid || !raidPtrs[raidID]) { 872 bp->b_error = ENODEV; 873 goto done; 874 } 875 raidPtr = raidPtrs[raidID]; 876 if (!raidPtr->valid) { 877 bp->b_error = ENODEV; 878 goto done; 879 } 880 if (bp->b_bcount == 0) { 881 db1_printf(("b_bcount is zero..\n")); 882 goto done; 883 } 884 885 /* 886 * Do bounds checking and adjust transfer. If there's an 887 * error, the bounds check will flag that for us. 888 */ 889 890 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 891 if (DISKPART(bp->b_dev) == RAW_PART) { 892 uint64_t size; /* device size in DEV_BSIZE unit */ 893 894 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 895 size = raidPtr->totalSectors << 896 (raidPtr->logBytesPerSector - DEV_BSHIFT); 897 } else { 898 size = raidPtr->totalSectors >> 899 (DEV_BSHIFT - raidPtr->logBytesPerSector); 900 } 901 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 902 goto done; 903 } 904 } else { 905 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 906 db1_printf(("Bounds check failed!!:%d %d\n", 907 (int) bp->b_blkno, (int) wlabel)); 908 goto done; 909 } 910 } 911 s = splbio(); 912 913 bp->b_resid = 0; 914 915 /* stuff it onto our queue */ 916 BUFQ_PUT(rs->buf_queue, bp); 917 918 /* scheduled the IO to happen at the next convenient time */ 919 wakeup(&(raidPtrs[raidID]->iodone)); 920 921 splx(s); 922 return; 923 924 done: 925 bp->b_resid = bp->b_bcount; 926 biodone(bp); 927 } 928 /* ARGSUSED */ 929 int 930 raidread(dev_t dev, struct uio *uio, int flags) 931 { 932 int unit = raidunit(dev); 933 struct raid_softc *rs; 934 935 if (unit >= numraid) 936 return (ENXIO); 937 rs = &raid_softc[unit]; 938 939 if ((rs->sc_flags & RAIDF_INITED) == 0) 940 return (ENXIO); 941 942 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 943 944 } 945 /* ARGSUSED */ 946 int 947 raidwrite(dev_t dev, struct uio *uio, int flags) 948 { 949 int unit = raidunit(dev); 950 struct raid_softc *rs; 951 952 if (unit >= numraid) 953 return (ENXIO); 954 rs = &raid_softc[unit]; 955 956 if ((rs->sc_flags & RAIDF_INITED) == 0) 957 return (ENXIO); 958 959 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 960 961 } 962 963 int 964 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 965 { 966 int unit = raidunit(dev); 967 int error = 0; 968 int part, pmask; 969 struct cfdata *cf; 970 struct raid_softc *rs; 971 RF_Config_t *k_cfg, *u_cfg; 972 RF_Raid_t *raidPtr; 973 RF_RaidDisk_t *diskPtr; 974 RF_AccTotals_t *totals; 975 RF_DeviceConfig_t *d_cfg, **ucfgp; 976 u_char *specific_buf; 977 int retcode = 0; 978 int column; 979 int raidid; 980 struct rf_recon_req *rrcopy, *rr; 981 RF_ComponentLabel_t *clabel; 982 RF_ComponentLabel_t *ci_label; 983 RF_ComponentLabel_t **clabel_ptr; 984 RF_SingleComponent_t *sparePtr,*componentPtr; 985 RF_SingleComponent_t component; 986 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 987 int i, j, d; 988 #ifdef __HAVE_OLD_DISKLABEL 989 struct disklabel newlabel; 990 #endif 991 struct dkwedge_info *dkw; 992 993 if (unit >= numraid) 994 return (ENXIO); 995 rs = &raid_softc[unit]; 996 raidPtr = raidPtrs[unit]; 997 998 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 999 (int) DISKPART(dev), (int) unit, (int) cmd)); 1000 1001 /* Must be open for writes for these commands... */ 1002 switch (cmd) { 1003 #ifdef DIOCGSECTORSIZE 1004 case DIOCGSECTORSIZE: 1005 *(u_int *)data = raidPtr->bytesPerSector; 1006 return 0; 1007 case DIOCGMEDIASIZE: 1008 *(off_t *)data = 1009 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1010 return 0; 1011 #endif 1012 case DIOCSDINFO: 1013 case DIOCWDINFO: 1014 #ifdef __HAVE_OLD_DISKLABEL 1015 case ODIOCWDINFO: 1016 case ODIOCSDINFO: 1017 #endif 1018 case DIOCWLABEL: 1019 case DIOCAWEDGE: 1020 case DIOCDWEDGE: 1021 if ((flag & FWRITE) == 0) 1022 return (EBADF); 1023 } 1024 1025 /* Must be initialized for these... */ 1026 switch (cmd) { 1027 case DIOCGDINFO: 1028 case DIOCSDINFO: 1029 case DIOCWDINFO: 1030 #ifdef __HAVE_OLD_DISKLABEL 1031 case ODIOCGDINFO: 1032 case ODIOCWDINFO: 1033 case ODIOCSDINFO: 1034 case ODIOCGDEFLABEL: 1035 #endif 1036 case DIOCGPART: 1037 case DIOCWLABEL: 1038 case DIOCGDEFLABEL: 1039 case DIOCAWEDGE: 1040 case DIOCDWEDGE: 1041 case DIOCLWEDGES: 1042 case RAIDFRAME_SHUTDOWN: 1043 case RAIDFRAME_REWRITEPARITY: 1044 case RAIDFRAME_GET_INFO: 1045 case RAIDFRAME_RESET_ACCTOTALS: 1046 case RAIDFRAME_GET_ACCTOTALS: 1047 case RAIDFRAME_KEEP_ACCTOTALS: 1048 case RAIDFRAME_GET_SIZE: 1049 case RAIDFRAME_FAIL_DISK: 1050 case RAIDFRAME_COPYBACK: 1051 case RAIDFRAME_CHECK_RECON_STATUS: 1052 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1053 case RAIDFRAME_GET_COMPONENT_LABEL: 1054 case RAIDFRAME_SET_COMPONENT_LABEL: 1055 case RAIDFRAME_ADD_HOT_SPARE: 1056 case RAIDFRAME_REMOVE_HOT_SPARE: 1057 case RAIDFRAME_INIT_LABELS: 1058 case RAIDFRAME_REBUILD_IN_PLACE: 1059 case RAIDFRAME_CHECK_PARITY: 1060 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1061 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1062 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1063 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1064 case RAIDFRAME_SET_AUTOCONFIG: 1065 case RAIDFRAME_SET_ROOT: 1066 case RAIDFRAME_DELETE_COMPONENT: 1067 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1068 if ((rs->sc_flags & RAIDF_INITED) == 0) 1069 return (ENXIO); 1070 } 1071 1072 switch (cmd) { 1073 1074 /* configure the system */ 1075 case RAIDFRAME_CONFIGURE: 1076 1077 if (raidPtr->valid) { 1078 /* There is a valid RAID set running on this unit! */ 1079 printf("raid%d: Device already configured!\n",unit); 1080 return(EINVAL); 1081 } 1082 1083 /* copy-in the configuration information */ 1084 /* data points to a pointer to the configuration structure */ 1085 1086 u_cfg = *((RF_Config_t **) data); 1087 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1088 if (k_cfg == NULL) { 1089 return (ENOMEM); 1090 } 1091 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1092 if (retcode) { 1093 RF_Free(k_cfg, sizeof(RF_Config_t)); 1094 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1095 retcode)); 1096 return (retcode); 1097 } 1098 /* allocate a buffer for the layout-specific data, and copy it 1099 * in */ 1100 if (k_cfg->layoutSpecificSize) { 1101 if (k_cfg->layoutSpecificSize > 10000) { 1102 /* sanity check */ 1103 RF_Free(k_cfg, sizeof(RF_Config_t)); 1104 return (EINVAL); 1105 } 1106 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1107 (u_char *)); 1108 if (specific_buf == NULL) { 1109 RF_Free(k_cfg, sizeof(RF_Config_t)); 1110 return (ENOMEM); 1111 } 1112 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1113 k_cfg->layoutSpecificSize); 1114 if (retcode) { 1115 RF_Free(k_cfg, sizeof(RF_Config_t)); 1116 RF_Free(specific_buf, 1117 k_cfg->layoutSpecificSize); 1118 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1119 retcode)); 1120 return (retcode); 1121 } 1122 } else 1123 specific_buf = NULL; 1124 k_cfg->layoutSpecific = specific_buf; 1125 1126 /* should do some kind of sanity check on the configuration. 1127 * Store the sum of all the bytes in the last byte? */ 1128 1129 /* configure the system */ 1130 1131 /* 1132 * Clear the entire RAID descriptor, just to make sure 1133 * there is no stale data left in the case of a 1134 * reconfiguration 1135 */ 1136 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 1137 raidPtr->raidid = unit; 1138 1139 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1140 1141 if (retcode == 0) { 1142 1143 /* allow this many simultaneous IO's to 1144 this RAID device */ 1145 raidPtr->openings = RAIDOUTSTANDING; 1146 1147 raidinit(raidPtr); 1148 rf_markalldirty(raidPtr); 1149 } 1150 /* free the buffers. No return code here. */ 1151 if (k_cfg->layoutSpecificSize) { 1152 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1153 } 1154 RF_Free(k_cfg, sizeof(RF_Config_t)); 1155 1156 return (retcode); 1157 1158 /* shutdown the system */ 1159 case RAIDFRAME_SHUTDOWN: 1160 1161 if ((error = raidlock(rs)) != 0) 1162 return (error); 1163 1164 /* 1165 * If somebody has a partition mounted, we shouldn't 1166 * shutdown. 1167 */ 1168 1169 part = DISKPART(dev); 1170 pmask = (1 << part); 1171 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1172 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1173 (rs->sc_dkdev.dk_copenmask & pmask))) { 1174 raidunlock(rs); 1175 return (EBUSY); 1176 } 1177 1178 retcode = rf_Shutdown(raidPtr); 1179 1180 /* It's no longer initialized... */ 1181 rs->sc_flags &= ~RAIDF_INITED; 1182 1183 /* free the pseudo device attach bits */ 1184 1185 cf = device_cfdata(rs->sc_dev); 1186 /* XXX this causes us to not return any errors 1187 from the above call to rf_Shutdown() */ 1188 retcode = config_detach(rs->sc_dev, DETACH_QUIET); 1189 free(cf, M_RAIDFRAME); 1190 1191 /* Detach the disk. */ 1192 disk_detach(&rs->sc_dkdev); 1193 disk_destroy(&rs->sc_dkdev); 1194 1195 raidunlock(rs); 1196 1197 return (retcode); 1198 case RAIDFRAME_GET_COMPONENT_LABEL: 1199 clabel_ptr = (RF_ComponentLabel_t **) data; 1200 /* need to read the component label for the disk indicated 1201 by row,column in clabel */ 1202 1203 /* For practice, let's get it directly fromdisk, rather 1204 than from the in-core copy */ 1205 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 1206 (RF_ComponentLabel_t *)); 1207 if (clabel == NULL) 1208 return (ENOMEM); 1209 1210 retcode = copyin( *clabel_ptr, clabel, 1211 sizeof(RF_ComponentLabel_t)); 1212 1213 if (retcode) { 1214 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1215 return(retcode); 1216 } 1217 1218 clabel->row = 0; /* Don't allow looking at anything else.*/ 1219 1220 column = clabel->column; 1221 1222 if ((column < 0) || (column >= raidPtr->numCol + 1223 raidPtr->numSpare)) { 1224 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1225 return(EINVAL); 1226 } 1227 1228 retcode = raidread_component_label(raidPtr->Disks[column].dev, 1229 raidPtr->raid_cinfo[column].ci_vp, 1230 clabel ); 1231 1232 if (retcode == 0) { 1233 retcode = copyout(clabel, *clabel_ptr, 1234 sizeof(RF_ComponentLabel_t)); 1235 } 1236 RF_Free(clabel, sizeof(RF_ComponentLabel_t)); 1237 return (retcode); 1238 1239 case RAIDFRAME_SET_COMPONENT_LABEL: 1240 clabel = (RF_ComponentLabel_t *) data; 1241 1242 /* XXX check the label for valid stuff... */ 1243 /* Note that some things *should not* get modified -- 1244 the user should be re-initing the labels instead of 1245 trying to patch things. 1246 */ 1247 1248 raidid = raidPtr->raidid; 1249 #ifdef DEBUG 1250 printf("raid%d: Got component label:\n", raidid); 1251 printf("raid%d: Version: %d\n", raidid, clabel->version); 1252 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1253 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1254 printf("raid%d: Column: %d\n", raidid, clabel->column); 1255 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1256 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1257 printf("raid%d: Status: %d\n", raidid, clabel->status); 1258 #endif 1259 clabel->row = 0; 1260 column = clabel->column; 1261 1262 if ((column < 0) || (column >= raidPtr->numCol)) { 1263 return(EINVAL); 1264 } 1265 1266 /* XXX this isn't allowed to do anything for now :-) */ 1267 1268 /* XXX and before it is, we need to fill in the rest 1269 of the fields!?!?!?! */ 1270 #if 0 1271 raidwrite_component_label( 1272 raidPtr->Disks[column].dev, 1273 raidPtr->raid_cinfo[column].ci_vp, 1274 clabel ); 1275 #endif 1276 return (0); 1277 1278 case RAIDFRAME_INIT_LABELS: 1279 clabel = (RF_ComponentLabel_t *) data; 1280 /* 1281 we only want the serial number from 1282 the above. We get all the rest of the information 1283 from the config that was used to create this RAID 1284 set. 1285 */ 1286 1287 raidPtr->serial_number = clabel->serial_number; 1288 1289 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t), 1290 (RF_ComponentLabel_t *)); 1291 if (ci_label == NULL) 1292 return (ENOMEM); 1293 1294 raid_init_component_label(raidPtr, ci_label); 1295 ci_label->serial_number = clabel->serial_number; 1296 ci_label->row = 0; /* we dont' pretend to support more */ 1297 1298 for(column=0;column<raidPtr->numCol;column++) { 1299 diskPtr = &raidPtr->Disks[column]; 1300 if (!RF_DEAD_DISK(diskPtr->status)) { 1301 ci_label->partitionSize = diskPtr->partitionSize; 1302 ci_label->column = column; 1303 raidwrite_component_label( 1304 raidPtr->Disks[column].dev, 1305 raidPtr->raid_cinfo[column].ci_vp, 1306 ci_label ); 1307 } 1308 } 1309 RF_Free(ci_label, sizeof(RF_ComponentLabel_t)); 1310 1311 return (retcode); 1312 case RAIDFRAME_SET_AUTOCONFIG: 1313 d = rf_set_autoconfig(raidPtr, *(int *) data); 1314 printf("raid%d: New autoconfig value is: %d\n", 1315 raidPtr->raidid, d); 1316 *(int *) data = d; 1317 return (retcode); 1318 1319 case RAIDFRAME_SET_ROOT: 1320 d = rf_set_rootpartition(raidPtr, *(int *) data); 1321 printf("raid%d: New rootpartition value is: %d\n", 1322 raidPtr->raidid, d); 1323 *(int *) data = d; 1324 return (retcode); 1325 1326 /* initialize all parity */ 1327 case RAIDFRAME_REWRITEPARITY: 1328 1329 if (raidPtr->Layout.map->faultsTolerated == 0) { 1330 /* Parity for RAID 0 is trivially correct */ 1331 raidPtr->parity_good = RF_RAID_CLEAN; 1332 return(0); 1333 } 1334 1335 if (raidPtr->parity_rewrite_in_progress == 1) { 1336 /* Re-write is already in progress! */ 1337 return(EINVAL); 1338 } 1339 1340 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1341 rf_RewriteParityThread, 1342 raidPtr,"raid_parity"); 1343 return (retcode); 1344 1345 1346 case RAIDFRAME_ADD_HOT_SPARE: 1347 sparePtr = (RF_SingleComponent_t *) data; 1348 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1349 retcode = rf_add_hot_spare(raidPtr, &component); 1350 return(retcode); 1351 1352 case RAIDFRAME_REMOVE_HOT_SPARE: 1353 return(retcode); 1354 1355 case RAIDFRAME_DELETE_COMPONENT: 1356 componentPtr = (RF_SingleComponent_t *)data; 1357 memcpy( &component, componentPtr, 1358 sizeof(RF_SingleComponent_t)); 1359 retcode = rf_delete_component(raidPtr, &component); 1360 return(retcode); 1361 1362 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1363 componentPtr = (RF_SingleComponent_t *)data; 1364 memcpy( &component, componentPtr, 1365 sizeof(RF_SingleComponent_t)); 1366 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1367 return(retcode); 1368 1369 case RAIDFRAME_REBUILD_IN_PLACE: 1370 1371 if (raidPtr->Layout.map->faultsTolerated == 0) { 1372 /* Can't do this on a RAID 0!! */ 1373 return(EINVAL); 1374 } 1375 1376 if (raidPtr->recon_in_progress == 1) { 1377 /* a reconstruct is already in progress! */ 1378 return(EINVAL); 1379 } 1380 1381 componentPtr = (RF_SingleComponent_t *) data; 1382 memcpy( &component, componentPtr, 1383 sizeof(RF_SingleComponent_t)); 1384 component.row = 0; /* we don't support any more */ 1385 column = component.column; 1386 1387 if ((column < 0) || (column >= raidPtr->numCol)) { 1388 return(EINVAL); 1389 } 1390 1391 RF_LOCK_MUTEX(raidPtr->mutex); 1392 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1393 (raidPtr->numFailures > 0)) { 1394 /* XXX 0 above shouldn't be constant!!! */ 1395 /* some component other than this has failed. 1396 Let's not make things worse than they already 1397 are... */ 1398 printf("raid%d: Unable to reconstruct to disk at:\n", 1399 raidPtr->raidid); 1400 printf("raid%d: Col: %d Too many failures.\n", 1401 raidPtr->raidid, column); 1402 RF_UNLOCK_MUTEX(raidPtr->mutex); 1403 return (EINVAL); 1404 } 1405 if (raidPtr->Disks[column].status == 1406 rf_ds_reconstructing) { 1407 printf("raid%d: Unable to reconstruct to disk at:\n", 1408 raidPtr->raidid); 1409 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1410 1411 RF_UNLOCK_MUTEX(raidPtr->mutex); 1412 return (EINVAL); 1413 } 1414 if (raidPtr->Disks[column].status == rf_ds_spared) { 1415 RF_UNLOCK_MUTEX(raidPtr->mutex); 1416 return (EINVAL); 1417 } 1418 RF_UNLOCK_MUTEX(raidPtr->mutex); 1419 1420 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1421 if (rrcopy == NULL) 1422 return(ENOMEM); 1423 1424 rrcopy->raidPtr = (void *) raidPtr; 1425 rrcopy->col = column; 1426 1427 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1428 rf_ReconstructInPlaceThread, 1429 rrcopy,"raid_reconip"); 1430 return(retcode); 1431 1432 case RAIDFRAME_GET_INFO: 1433 if (!raidPtr->valid) 1434 return (ENODEV); 1435 ucfgp = (RF_DeviceConfig_t **) data; 1436 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1437 (RF_DeviceConfig_t *)); 1438 if (d_cfg == NULL) 1439 return (ENOMEM); 1440 d_cfg->rows = 1; /* there is only 1 row now */ 1441 d_cfg->cols = raidPtr->numCol; 1442 d_cfg->ndevs = raidPtr->numCol; 1443 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1444 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1445 return (ENOMEM); 1446 } 1447 d_cfg->nspares = raidPtr->numSpare; 1448 if (d_cfg->nspares >= RF_MAX_DISKS) { 1449 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1450 return (ENOMEM); 1451 } 1452 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1453 d = 0; 1454 for (j = 0; j < d_cfg->cols; j++) { 1455 d_cfg->devs[d] = raidPtr->Disks[j]; 1456 d++; 1457 } 1458 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1459 d_cfg->spares[i] = raidPtr->Disks[j]; 1460 } 1461 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1462 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1463 1464 return (retcode); 1465 1466 case RAIDFRAME_CHECK_PARITY: 1467 *(int *) data = raidPtr->parity_good; 1468 return (0); 1469 1470 case RAIDFRAME_RESET_ACCTOTALS: 1471 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1472 return (0); 1473 1474 case RAIDFRAME_GET_ACCTOTALS: 1475 totals = (RF_AccTotals_t *) data; 1476 *totals = raidPtr->acc_totals; 1477 return (0); 1478 1479 case RAIDFRAME_KEEP_ACCTOTALS: 1480 raidPtr->keep_acc_totals = *(int *)data; 1481 return (0); 1482 1483 case RAIDFRAME_GET_SIZE: 1484 *(int *) data = raidPtr->totalSectors; 1485 return (0); 1486 1487 /* fail a disk & optionally start reconstruction */ 1488 case RAIDFRAME_FAIL_DISK: 1489 1490 if (raidPtr->Layout.map->faultsTolerated == 0) { 1491 /* Can't do this on a RAID 0!! */ 1492 return(EINVAL); 1493 } 1494 1495 rr = (struct rf_recon_req *) data; 1496 rr->row = 0; 1497 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1498 return (EINVAL); 1499 1500 1501 RF_LOCK_MUTEX(raidPtr->mutex); 1502 if (raidPtr->status == rf_rs_reconstructing) { 1503 /* you can't fail a disk while we're reconstructing! */ 1504 /* XXX wrong for RAID6 */ 1505 RF_UNLOCK_MUTEX(raidPtr->mutex); 1506 return (EINVAL); 1507 } 1508 if ((raidPtr->Disks[rr->col].status == 1509 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1510 /* some other component has failed. Let's not make 1511 things worse. XXX wrong for RAID6 */ 1512 RF_UNLOCK_MUTEX(raidPtr->mutex); 1513 return (EINVAL); 1514 } 1515 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1516 /* Can't fail a spared disk! */ 1517 RF_UNLOCK_MUTEX(raidPtr->mutex); 1518 return (EINVAL); 1519 } 1520 RF_UNLOCK_MUTEX(raidPtr->mutex); 1521 1522 /* make a copy of the recon request so that we don't rely on 1523 * the user's buffer */ 1524 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1525 if (rrcopy == NULL) 1526 return(ENOMEM); 1527 memcpy(rrcopy, rr, sizeof(*rr)); 1528 rrcopy->raidPtr = (void *) raidPtr; 1529 1530 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1531 rf_ReconThread, 1532 rrcopy,"raid_recon"); 1533 return (0); 1534 1535 /* invoke a copyback operation after recon on whatever disk 1536 * needs it, if any */ 1537 case RAIDFRAME_COPYBACK: 1538 1539 if (raidPtr->Layout.map->faultsTolerated == 0) { 1540 /* This makes no sense on a RAID 0!! */ 1541 return(EINVAL); 1542 } 1543 1544 if (raidPtr->copyback_in_progress == 1) { 1545 /* Copyback is already in progress! */ 1546 return(EINVAL); 1547 } 1548 1549 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1550 rf_CopybackThread, 1551 raidPtr,"raid_copyback"); 1552 return (retcode); 1553 1554 /* return the percentage completion of reconstruction */ 1555 case RAIDFRAME_CHECK_RECON_STATUS: 1556 if (raidPtr->Layout.map->faultsTolerated == 0) { 1557 /* This makes no sense on a RAID 0, so tell the 1558 user it's done. */ 1559 *(int *) data = 100; 1560 return(0); 1561 } 1562 if (raidPtr->status != rf_rs_reconstructing) 1563 *(int *) data = 100; 1564 else { 1565 if (raidPtr->reconControl->numRUsTotal > 0) { 1566 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1567 } else { 1568 *(int *) data = 0; 1569 } 1570 } 1571 return (0); 1572 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1573 progressInfoPtr = (RF_ProgressInfo_t **) data; 1574 if (raidPtr->status != rf_rs_reconstructing) { 1575 progressInfo.remaining = 0; 1576 progressInfo.completed = 100; 1577 progressInfo.total = 100; 1578 } else { 1579 progressInfo.total = 1580 raidPtr->reconControl->numRUsTotal; 1581 progressInfo.completed = 1582 raidPtr->reconControl->numRUsComplete; 1583 progressInfo.remaining = progressInfo.total - 1584 progressInfo.completed; 1585 } 1586 retcode = copyout(&progressInfo, *progressInfoPtr, 1587 sizeof(RF_ProgressInfo_t)); 1588 return (retcode); 1589 1590 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1591 if (raidPtr->Layout.map->faultsTolerated == 0) { 1592 /* This makes no sense on a RAID 0, so tell the 1593 user it's done. */ 1594 *(int *) data = 100; 1595 return(0); 1596 } 1597 if (raidPtr->parity_rewrite_in_progress == 1) { 1598 *(int *) data = 100 * 1599 raidPtr->parity_rewrite_stripes_done / 1600 raidPtr->Layout.numStripe; 1601 } else { 1602 *(int *) data = 100; 1603 } 1604 return (0); 1605 1606 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1607 progressInfoPtr = (RF_ProgressInfo_t **) data; 1608 if (raidPtr->parity_rewrite_in_progress == 1) { 1609 progressInfo.total = raidPtr->Layout.numStripe; 1610 progressInfo.completed = 1611 raidPtr->parity_rewrite_stripes_done; 1612 progressInfo.remaining = progressInfo.total - 1613 progressInfo.completed; 1614 } else { 1615 progressInfo.remaining = 0; 1616 progressInfo.completed = 100; 1617 progressInfo.total = 100; 1618 } 1619 retcode = copyout(&progressInfo, *progressInfoPtr, 1620 sizeof(RF_ProgressInfo_t)); 1621 return (retcode); 1622 1623 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1624 if (raidPtr->Layout.map->faultsTolerated == 0) { 1625 /* This makes no sense on a RAID 0 */ 1626 *(int *) data = 100; 1627 return(0); 1628 } 1629 if (raidPtr->copyback_in_progress == 1) { 1630 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1631 raidPtr->Layout.numStripe; 1632 } else { 1633 *(int *) data = 100; 1634 } 1635 return (0); 1636 1637 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1638 progressInfoPtr = (RF_ProgressInfo_t **) data; 1639 if (raidPtr->copyback_in_progress == 1) { 1640 progressInfo.total = raidPtr->Layout.numStripe; 1641 progressInfo.completed = 1642 raidPtr->copyback_stripes_done; 1643 progressInfo.remaining = progressInfo.total - 1644 progressInfo.completed; 1645 } else { 1646 progressInfo.remaining = 0; 1647 progressInfo.completed = 100; 1648 progressInfo.total = 100; 1649 } 1650 retcode = copyout(&progressInfo, *progressInfoPtr, 1651 sizeof(RF_ProgressInfo_t)); 1652 return (retcode); 1653 1654 /* the sparetable daemon calls this to wait for the kernel to 1655 * need a spare table. this ioctl does not return until a 1656 * spare table is needed. XXX -- calling mpsleep here in the 1657 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1658 * -- I should either compute the spare table in the kernel, 1659 * or have a different -- XXX XXX -- interface (a different 1660 * character device) for delivering the table -- XXX */ 1661 #if 0 1662 case RAIDFRAME_SPARET_WAIT: 1663 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1664 while (!rf_sparet_wait_queue) 1665 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1666 waitreq = rf_sparet_wait_queue; 1667 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1668 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1669 1670 /* structure assignment */ 1671 *((RF_SparetWait_t *) data) = *waitreq; 1672 1673 RF_Free(waitreq, sizeof(*waitreq)); 1674 return (0); 1675 1676 /* wakes up a process waiting on SPARET_WAIT and puts an error 1677 * code in it that will cause the dameon to exit */ 1678 case RAIDFRAME_ABORT_SPARET_WAIT: 1679 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1680 waitreq->fcol = -1; 1681 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1682 waitreq->next = rf_sparet_wait_queue; 1683 rf_sparet_wait_queue = waitreq; 1684 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1685 wakeup(&rf_sparet_wait_queue); 1686 return (0); 1687 1688 /* used by the spare table daemon to deliver a spare table 1689 * into the kernel */ 1690 case RAIDFRAME_SEND_SPARET: 1691 1692 /* install the spare table */ 1693 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1694 1695 /* respond to the requestor. the return status of the spare 1696 * table installation is passed in the "fcol" field */ 1697 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1698 waitreq->fcol = retcode; 1699 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1700 waitreq->next = rf_sparet_resp_queue; 1701 rf_sparet_resp_queue = waitreq; 1702 wakeup(&rf_sparet_resp_queue); 1703 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1704 1705 return (retcode); 1706 #endif 1707 1708 default: 1709 break; /* fall through to the os-specific code below */ 1710 1711 } 1712 1713 if (!raidPtr->valid) 1714 return (EINVAL); 1715 1716 /* 1717 * Add support for "regular" device ioctls here. 1718 */ 1719 1720 switch (cmd) { 1721 case DIOCGDINFO: 1722 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1723 break; 1724 #ifdef __HAVE_OLD_DISKLABEL 1725 case ODIOCGDINFO: 1726 newlabel = *(rs->sc_dkdev.dk_label); 1727 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1728 return ENOTTY; 1729 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1730 break; 1731 #endif 1732 1733 case DIOCGPART: 1734 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1735 ((struct partinfo *) data)->part = 1736 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1737 break; 1738 1739 case DIOCWDINFO: 1740 case DIOCSDINFO: 1741 #ifdef __HAVE_OLD_DISKLABEL 1742 case ODIOCWDINFO: 1743 case ODIOCSDINFO: 1744 #endif 1745 { 1746 struct disklabel *lp; 1747 #ifdef __HAVE_OLD_DISKLABEL 1748 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1749 memset(&newlabel, 0, sizeof newlabel); 1750 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1751 lp = &newlabel; 1752 } else 1753 #endif 1754 lp = (struct disklabel *)data; 1755 1756 if ((error = raidlock(rs)) != 0) 1757 return (error); 1758 1759 rs->sc_flags |= RAIDF_LABELLING; 1760 1761 error = setdisklabel(rs->sc_dkdev.dk_label, 1762 lp, 0, rs->sc_dkdev.dk_cpulabel); 1763 if (error == 0) { 1764 if (cmd == DIOCWDINFO 1765 #ifdef __HAVE_OLD_DISKLABEL 1766 || cmd == ODIOCWDINFO 1767 #endif 1768 ) 1769 error = writedisklabel(RAIDLABELDEV(dev), 1770 raidstrategy, rs->sc_dkdev.dk_label, 1771 rs->sc_dkdev.dk_cpulabel); 1772 } 1773 rs->sc_flags &= ~RAIDF_LABELLING; 1774 1775 raidunlock(rs); 1776 1777 if (error) 1778 return (error); 1779 break; 1780 } 1781 1782 case DIOCWLABEL: 1783 if (*(int *) data != 0) 1784 rs->sc_flags |= RAIDF_WLABEL; 1785 else 1786 rs->sc_flags &= ~RAIDF_WLABEL; 1787 break; 1788 1789 case DIOCGDEFLABEL: 1790 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1791 break; 1792 1793 #ifdef __HAVE_OLD_DISKLABEL 1794 case ODIOCGDEFLABEL: 1795 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1796 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1797 return ENOTTY; 1798 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1799 break; 1800 #endif 1801 1802 case DIOCAWEDGE: 1803 case DIOCDWEDGE: 1804 dkw = (void *)data; 1805 1806 /* If the ioctl happens here, the parent is us. */ 1807 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1808 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1809 1810 case DIOCLWEDGES: 1811 return dkwedge_list(&rs->sc_dkdev, 1812 (struct dkwedge_list *)data, l); 1813 1814 default: 1815 retcode = ENOTTY; 1816 } 1817 return (retcode); 1818 1819 } 1820 1821 1822 /* raidinit -- complete the rest of the initialization for the 1823 RAIDframe device. */ 1824 1825 1826 static void 1827 raidinit(RF_Raid_t *raidPtr) 1828 { 1829 struct cfdata *cf; 1830 struct raid_softc *rs; 1831 int unit; 1832 1833 unit = raidPtr->raidid; 1834 1835 rs = &raid_softc[unit]; 1836 1837 /* XXX should check return code first... */ 1838 rs->sc_flags |= RAIDF_INITED; 1839 1840 /* XXX doesn't check bounds. */ 1841 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1842 1843 /* attach the pseudo device */ 1844 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1845 cf->cf_name = raid_cd.cd_name; 1846 cf->cf_atname = raid_cd.cd_name; 1847 cf->cf_unit = unit; 1848 cf->cf_fstate = FSTATE_STAR; 1849 1850 rs->sc_dev = config_attach_pseudo(cf); 1851 1852 if (rs->sc_dev==NULL) { 1853 printf("raid%d: config_attach_pseudo failed\n", 1854 raidPtr->raidid); 1855 } 1856 1857 /* disk_attach actually creates space for the CPU disklabel, among 1858 * other things, so it's critical to call this *BEFORE* we try putzing 1859 * with disklabels. */ 1860 1861 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1862 disk_attach(&rs->sc_dkdev); 1863 1864 /* XXX There may be a weird interaction here between this, and 1865 * protectedSectors, as used in RAIDframe. */ 1866 1867 rs->sc_size = raidPtr->totalSectors; 1868 1869 dkwedge_discover(&rs->sc_dkdev); 1870 1871 rf_set_properties(rs, raidPtr); 1872 1873 } 1874 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1875 /* wake up the daemon & tell it to get us a spare table 1876 * XXX 1877 * the entries in the queues should be tagged with the raidPtr 1878 * so that in the extremely rare case that two recons happen at once, 1879 * we know for which device were requesting a spare table 1880 * XXX 1881 * 1882 * XXX This code is not currently used. GO 1883 */ 1884 int 1885 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1886 { 1887 int retcode; 1888 1889 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1890 req->next = rf_sparet_wait_queue; 1891 rf_sparet_wait_queue = req; 1892 wakeup(&rf_sparet_wait_queue); 1893 1894 /* mpsleep unlocks the mutex */ 1895 while (!rf_sparet_resp_queue) { 1896 tsleep(&rf_sparet_resp_queue, PRIBIO, 1897 "raidframe getsparetable", 0); 1898 } 1899 req = rf_sparet_resp_queue; 1900 rf_sparet_resp_queue = req->next; 1901 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1902 1903 retcode = req->fcol; 1904 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1905 * alloc'd */ 1906 return (retcode); 1907 } 1908 #endif 1909 1910 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1911 * bp & passes it down. 1912 * any calls originating in the kernel must use non-blocking I/O 1913 * do some extra sanity checking to return "appropriate" error values for 1914 * certain conditions (to make some standard utilities work) 1915 * 1916 * Formerly known as: rf_DoAccessKernel 1917 */ 1918 void 1919 raidstart(RF_Raid_t *raidPtr) 1920 { 1921 RF_SectorCount_t num_blocks, pb, sum; 1922 RF_RaidAddr_t raid_addr; 1923 struct partition *pp; 1924 daddr_t blocknum; 1925 int unit; 1926 struct raid_softc *rs; 1927 int do_async; 1928 struct buf *bp; 1929 int rc; 1930 1931 unit = raidPtr->raidid; 1932 rs = &raid_softc[unit]; 1933 1934 /* quick check to see if anything has died recently */ 1935 RF_LOCK_MUTEX(raidPtr->mutex); 1936 if (raidPtr->numNewFailures > 0) { 1937 RF_UNLOCK_MUTEX(raidPtr->mutex); 1938 rf_update_component_labels(raidPtr, 1939 RF_NORMAL_COMPONENT_UPDATE); 1940 RF_LOCK_MUTEX(raidPtr->mutex); 1941 raidPtr->numNewFailures--; 1942 } 1943 1944 /* Check to see if we're at the limit... */ 1945 while (raidPtr->openings > 0) { 1946 RF_UNLOCK_MUTEX(raidPtr->mutex); 1947 1948 /* get the next item, if any, from the queue */ 1949 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) { 1950 /* nothing more to do */ 1951 return; 1952 } 1953 1954 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1955 * partition.. Need to make it absolute to the underlying 1956 * device.. */ 1957 1958 blocknum = bp->b_blkno; 1959 if (DISKPART(bp->b_dev) != RAW_PART) { 1960 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1961 blocknum += pp->p_offset; 1962 } 1963 1964 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1965 (int) blocknum)); 1966 1967 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1968 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1969 1970 /* *THIS* is where we adjust what block we're going to... 1971 * but DO NOT TOUCH bp->b_blkno!!! */ 1972 raid_addr = blocknum; 1973 1974 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1975 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1976 sum = raid_addr + num_blocks + pb; 1977 if (1 || rf_debugKernelAccess) { 1978 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1979 (int) raid_addr, (int) sum, (int) num_blocks, 1980 (int) pb, (int) bp->b_resid)); 1981 } 1982 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1983 || (sum < num_blocks) || (sum < pb)) { 1984 bp->b_error = ENOSPC; 1985 bp->b_resid = bp->b_bcount; 1986 biodone(bp); 1987 RF_LOCK_MUTEX(raidPtr->mutex); 1988 continue; 1989 } 1990 /* 1991 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1992 */ 1993 1994 if (bp->b_bcount & raidPtr->sectorMask) { 1995 bp->b_error = EINVAL; 1996 bp->b_resid = bp->b_bcount; 1997 biodone(bp); 1998 RF_LOCK_MUTEX(raidPtr->mutex); 1999 continue; 2000 2001 } 2002 db1_printf(("Calling DoAccess..\n")); 2003 2004 2005 RF_LOCK_MUTEX(raidPtr->mutex); 2006 raidPtr->openings--; 2007 RF_UNLOCK_MUTEX(raidPtr->mutex); 2008 2009 /* 2010 * Everything is async. 2011 */ 2012 do_async = 1; 2013 2014 disk_busy(&rs->sc_dkdev); 2015 2016 /* XXX we're still at splbio() here... do we *really* 2017 need to be? */ 2018 2019 /* don't ever condition on bp->b_flags & B_WRITE. 2020 * always condition on B_READ instead */ 2021 2022 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2023 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2024 do_async, raid_addr, num_blocks, 2025 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2026 2027 if (rc) { 2028 bp->b_error = rc; 2029 bp->b_resid = bp->b_bcount; 2030 biodone(bp); 2031 /* continue loop */ 2032 } 2033 2034 RF_LOCK_MUTEX(raidPtr->mutex); 2035 } 2036 RF_UNLOCK_MUTEX(raidPtr->mutex); 2037 } 2038 2039 2040 2041 2042 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2043 2044 int 2045 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2046 { 2047 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2048 struct buf *bp; 2049 2050 req->queue = queue; 2051 2052 #if DIAGNOSTIC 2053 if (queue->raidPtr->raidid >= numraid) { 2054 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid, 2055 numraid); 2056 panic("Invalid Unit number in rf_DispatchKernelIO"); 2057 } 2058 #endif 2059 2060 bp = req->bp; 2061 2062 switch (req->type) { 2063 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2064 /* XXX need to do something extra here.. */ 2065 /* I'm leaving this in, as I've never actually seen it used, 2066 * and I'd like folks to report it... GO */ 2067 printf(("WAKEUP CALLED\n")); 2068 queue->numOutstanding++; 2069 2070 bp->b_flags = 0; 2071 bp->b_private = req; 2072 2073 KernelWakeupFunc(bp); 2074 break; 2075 2076 case RF_IO_TYPE_READ: 2077 case RF_IO_TYPE_WRITE: 2078 #if RF_ACC_TRACE > 0 2079 if (req->tracerec) { 2080 RF_ETIMER_START(req->tracerec->timer); 2081 } 2082 #endif 2083 InitBP(bp, queue->rf_cinfo->ci_vp, 2084 op, queue->rf_cinfo->ci_dev, 2085 req->sectorOffset, req->numSector, 2086 req->buf, KernelWakeupFunc, (void *) req, 2087 queue->raidPtr->logBytesPerSector, req->b_proc); 2088 2089 if (rf_debugKernelAccess) { 2090 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2091 (long) bp->b_blkno)); 2092 } 2093 queue->numOutstanding++; 2094 queue->last_deq_sector = req->sectorOffset; 2095 /* acc wouldn't have been let in if there were any pending 2096 * reqs at any other priority */ 2097 queue->curPriority = req->priority; 2098 2099 db1_printf(("Going for %c to unit %d col %d\n", 2100 req->type, queue->raidPtr->raidid, 2101 queue->col)); 2102 db1_printf(("sector %d count %d (%d bytes) %d\n", 2103 (int) req->sectorOffset, (int) req->numSector, 2104 (int) (req->numSector << 2105 queue->raidPtr->logBytesPerSector), 2106 (int) queue->raidPtr->logBytesPerSector)); 2107 VOP_STRATEGY(bp->b_vp, bp); 2108 2109 break; 2110 2111 default: 2112 panic("bad req->type in rf_DispatchKernelIO"); 2113 } 2114 db1_printf(("Exiting from DispatchKernelIO\n")); 2115 2116 return (0); 2117 } 2118 /* this is the callback function associated with a I/O invoked from 2119 kernel code. 2120 */ 2121 static void 2122 KernelWakeupFunc(struct buf *bp) 2123 { 2124 RF_DiskQueueData_t *req = NULL; 2125 RF_DiskQueue_t *queue; 2126 int s; 2127 2128 s = splbio(); 2129 db1_printf(("recovering the request queue:\n")); 2130 req = bp->b_private; 2131 2132 queue = (RF_DiskQueue_t *) req->queue; 2133 2134 #if RF_ACC_TRACE > 0 2135 if (req->tracerec) { 2136 RF_ETIMER_STOP(req->tracerec->timer); 2137 RF_ETIMER_EVAL(req->tracerec->timer); 2138 RF_LOCK_MUTEX(rf_tracing_mutex); 2139 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2140 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2141 req->tracerec->num_phys_ios++; 2142 RF_UNLOCK_MUTEX(rf_tracing_mutex); 2143 } 2144 #endif 2145 2146 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2147 * ballistic, and mark the component as hosed... */ 2148 2149 if (bp->b_error != 0) { 2150 /* Mark the disk as dead */ 2151 /* but only mark it once... */ 2152 /* and only if it wouldn't leave this RAID set 2153 completely broken */ 2154 if (((queue->raidPtr->Disks[queue->col].status == 2155 rf_ds_optimal) || 2156 (queue->raidPtr->Disks[queue->col].status == 2157 rf_ds_used_spare)) && 2158 (queue->raidPtr->numFailures < 2159 queue->raidPtr->Layout.map->faultsTolerated)) { 2160 printf("raid%d: IO Error. Marking %s as failed.\n", 2161 queue->raidPtr->raidid, 2162 queue->raidPtr->Disks[queue->col].devname); 2163 queue->raidPtr->Disks[queue->col].status = 2164 rf_ds_failed; 2165 queue->raidPtr->status = rf_rs_degraded; 2166 queue->raidPtr->numFailures++; 2167 queue->raidPtr->numNewFailures++; 2168 } else { /* Disk is already dead... */ 2169 /* printf("Disk already marked as dead!\n"); */ 2170 } 2171 2172 } 2173 2174 /* Fill in the error value */ 2175 2176 req->error = bp->b_error; 2177 2178 simple_lock(&queue->raidPtr->iodone_lock); 2179 2180 /* Drop this one on the "finished" queue... */ 2181 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2182 2183 /* Let the raidio thread know there is work to be done. */ 2184 wakeup(&(queue->raidPtr->iodone)); 2185 2186 simple_unlock(&queue->raidPtr->iodone_lock); 2187 2188 splx(s); 2189 } 2190 2191 2192 2193 /* 2194 * initialize a buf structure for doing an I/O in the kernel. 2195 */ 2196 static void 2197 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2198 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2199 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2200 struct proc *b_proc) 2201 { 2202 /* bp->b_flags = B_PHYS | rw_flag; */ 2203 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2204 bp->b_oflags = 0; 2205 bp->b_cflags = 0; 2206 bp->b_bcount = numSect << logBytesPerSector; 2207 bp->b_bufsize = bp->b_bcount; 2208 bp->b_error = 0; 2209 bp->b_dev = dev; 2210 bp->b_data = bf; 2211 bp->b_blkno = startSect; 2212 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2213 if (bp->b_bcount == 0) { 2214 panic("bp->b_bcount is zero in InitBP!!"); 2215 } 2216 bp->b_proc = b_proc; 2217 bp->b_iodone = cbFunc; 2218 bp->b_private = cbArg; 2219 bp->b_vp = b_vp; 2220 bp->b_objlock = &b_vp->v_interlock; 2221 if ((bp->b_flags & B_READ) == 0) { 2222 mutex_enter(&b_vp->v_interlock); 2223 b_vp->v_numoutput++; 2224 mutex_exit(&b_vp->v_interlock); 2225 } 2226 2227 } 2228 2229 static void 2230 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2231 struct disklabel *lp) 2232 { 2233 memset(lp, 0, sizeof(*lp)); 2234 2235 /* fabricate a label... */ 2236 lp->d_secperunit = raidPtr->totalSectors; 2237 lp->d_secsize = raidPtr->bytesPerSector; 2238 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2239 lp->d_ntracks = 4 * raidPtr->numCol; 2240 lp->d_ncylinders = raidPtr->totalSectors / 2241 (lp->d_nsectors * lp->d_ntracks); 2242 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2243 2244 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2245 lp->d_type = DTYPE_RAID; 2246 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2247 lp->d_rpm = 3600; 2248 lp->d_interleave = 1; 2249 lp->d_flags = 0; 2250 2251 lp->d_partitions[RAW_PART].p_offset = 0; 2252 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2253 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2254 lp->d_npartitions = RAW_PART + 1; 2255 2256 lp->d_magic = DISKMAGIC; 2257 lp->d_magic2 = DISKMAGIC; 2258 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2259 2260 } 2261 /* 2262 * Read the disklabel from the raid device. If one is not present, fake one 2263 * up. 2264 */ 2265 static void 2266 raidgetdisklabel(dev_t dev) 2267 { 2268 int unit = raidunit(dev); 2269 struct raid_softc *rs = &raid_softc[unit]; 2270 const char *errstring; 2271 struct disklabel *lp = rs->sc_dkdev.dk_label; 2272 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2273 RF_Raid_t *raidPtr; 2274 2275 db1_printf(("Getting the disklabel...\n")); 2276 2277 memset(clp, 0, sizeof(*clp)); 2278 2279 raidPtr = raidPtrs[unit]; 2280 2281 raidgetdefaultlabel(raidPtr, rs, lp); 2282 2283 /* 2284 * Call the generic disklabel extraction routine. 2285 */ 2286 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2287 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2288 if (errstring) 2289 raidmakedisklabel(rs); 2290 else { 2291 int i; 2292 struct partition *pp; 2293 2294 /* 2295 * Sanity check whether the found disklabel is valid. 2296 * 2297 * This is necessary since total size of the raid device 2298 * may vary when an interleave is changed even though exactly 2299 * same components are used, and old disklabel may used 2300 * if that is found. 2301 */ 2302 if (lp->d_secperunit != rs->sc_size) 2303 printf("raid%d: WARNING: %s: " 2304 "total sector size in disklabel (%d) != " 2305 "the size of raid (%ld)\n", unit, rs->sc_xname, 2306 lp->d_secperunit, (long) rs->sc_size); 2307 for (i = 0; i < lp->d_npartitions; i++) { 2308 pp = &lp->d_partitions[i]; 2309 if (pp->p_offset + pp->p_size > rs->sc_size) 2310 printf("raid%d: WARNING: %s: end of partition `%c' " 2311 "exceeds the size of raid (%ld)\n", 2312 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size); 2313 } 2314 } 2315 2316 } 2317 /* 2318 * Take care of things one might want to take care of in the event 2319 * that a disklabel isn't present. 2320 */ 2321 static void 2322 raidmakedisklabel(struct raid_softc *rs) 2323 { 2324 struct disklabel *lp = rs->sc_dkdev.dk_label; 2325 db1_printf(("Making a label..\n")); 2326 2327 /* 2328 * For historical reasons, if there's no disklabel present 2329 * the raw partition must be marked FS_BSDFFS. 2330 */ 2331 2332 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2333 2334 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2335 2336 lp->d_checksum = dkcksum(lp); 2337 } 2338 /* 2339 * Wait interruptibly for an exclusive lock. 2340 * 2341 * XXX 2342 * Several drivers do this; it should be abstracted and made MP-safe. 2343 * (Hmm... where have we seen this warning before :-> GO ) 2344 */ 2345 static int 2346 raidlock(struct raid_softc *rs) 2347 { 2348 int error; 2349 2350 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2351 rs->sc_flags |= RAIDF_WANTED; 2352 if ((error = 2353 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2354 return (error); 2355 } 2356 rs->sc_flags |= RAIDF_LOCKED; 2357 return (0); 2358 } 2359 /* 2360 * Unlock and wake up any waiters. 2361 */ 2362 static void 2363 raidunlock(struct raid_softc *rs) 2364 { 2365 2366 rs->sc_flags &= ~RAIDF_LOCKED; 2367 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2368 rs->sc_flags &= ~RAIDF_WANTED; 2369 wakeup(rs); 2370 } 2371 } 2372 2373 2374 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2375 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2376 2377 int 2378 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2379 { 2380 RF_ComponentLabel_t clabel; 2381 raidread_component_label(dev, b_vp, &clabel); 2382 clabel.mod_counter = mod_counter; 2383 clabel.clean = RF_RAID_CLEAN; 2384 raidwrite_component_label(dev, b_vp, &clabel); 2385 return(0); 2386 } 2387 2388 2389 int 2390 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2391 { 2392 RF_ComponentLabel_t clabel; 2393 raidread_component_label(dev, b_vp, &clabel); 2394 clabel.mod_counter = mod_counter; 2395 clabel.clean = RF_RAID_DIRTY; 2396 raidwrite_component_label(dev, b_vp, &clabel); 2397 return(0); 2398 } 2399 2400 /* ARGSUSED */ 2401 int 2402 raidread_component_label(dev_t dev, struct vnode *b_vp, 2403 RF_ComponentLabel_t *clabel) 2404 { 2405 struct buf *bp; 2406 const struct bdevsw *bdev; 2407 int error; 2408 2409 /* XXX should probably ensure that we don't try to do this if 2410 someone has changed rf_protected_sectors. */ 2411 2412 if (b_vp == NULL) { 2413 /* For whatever reason, this component is not valid. 2414 Don't try to read a component label from it. */ 2415 return(EINVAL); 2416 } 2417 2418 /* get a block of the appropriate size... */ 2419 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2420 bp->b_dev = dev; 2421 2422 /* get our ducks in a row for the read */ 2423 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2424 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2425 bp->b_flags |= B_READ; 2426 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2427 2428 bdev = bdevsw_lookup(bp->b_dev); 2429 if (bdev == NULL) 2430 return (ENXIO); 2431 (*bdev->d_strategy)(bp); 2432 2433 error = biowait(bp); 2434 2435 if (!error) { 2436 memcpy(clabel, bp->b_data, 2437 sizeof(RF_ComponentLabel_t)); 2438 } 2439 2440 brelse(bp, 0); 2441 return(error); 2442 } 2443 /* ARGSUSED */ 2444 int 2445 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 2446 RF_ComponentLabel_t *clabel) 2447 { 2448 struct buf *bp; 2449 const struct bdevsw *bdev; 2450 int error; 2451 2452 /* get a block of the appropriate size... */ 2453 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2454 bp->b_dev = dev; 2455 2456 /* get our ducks in a row for the write */ 2457 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2458 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2459 bp->b_flags |= B_WRITE; 2460 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2461 2462 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2463 2464 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2465 2466 bdev = bdevsw_lookup(bp->b_dev); 2467 if (bdev == NULL) 2468 return (ENXIO); 2469 (*bdev->d_strategy)(bp); 2470 error = biowait(bp); 2471 brelse(bp, 0); 2472 if (error) { 2473 #if 1 2474 printf("Failed to write RAID component info!\n"); 2475 #endif 2476 } 2477 2478 return(error); 2479 } 2480 2481 void 2482 rf_markalldirty(RF_Raid_t *raidPtr) 2483 { 2484 RF_ComponentLabel_t clabel; 2485 int sparecol; 2486 int c; 2487 int j; 2488 int scol = -1; 2489 2490 raidPtr->mod_counter++; 2491 for (c = 0; c < raidPtr->numCol; c++) { 2492 /* we don't want to touch (at all) a disk that has 2493 failed */ 2494 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2495 raidread_component_label( 2496 raidPtr->Disks[c].dev, 2497 raidPtr->raid_cinfo[c].ci_vp, 2498 &clabel); 2499 if (clabel.status == rf_ds_spared) { 2500 /* XXX do something special... 2501 but whatever you do, don't 2502 try to access it!! */ 2503 } else { 2504 raidmarkdirty( 2505 raidPtr->Disks[c].dev, 2506 raidPtr->raid_cinfo[c].ci_vp, 2507 raidPtr->mod_counter); 2508 } 2509 } 2510 } 2511 2512 for( c = 0; c < raidPtr->numSpare ; c++) { 2513 sparecol = raidPtr->numCol + c; 2514 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2515 /* 2516 2517 we claim this disk is "optimal" if it's 2518 rf_ds_used_spare, as that means it should be 2519 directly substitutable for the disk it replaced. 2520 We note that too... 2521 2522 */ 2523 2524 for(j=0;j<raidPtr->numCol;j++) { 2525 if (raidPtr->Disks[j].spareCol == sparecol) { 2526 scol = j; 2527 break; 2528 } 2529 } 2530 2531 raidread_component_label( 2532 raidPtr->Disks[sparecol].dev, 2533 raidPtr->raid_cinfo[sparecol].ci_vp, 2534 &clabel); 2535 /* make sure status is noted */ 2536 2537 raid_init_component_label(raidPtr, &clabel); 2538 2539 clabel.row = 0; 2540 clabel.column = scol; 2541 /* Note: we *don't* change status from rf_ds_used_spare 2542 to rf_ds_optimal */ 2543 /* clabel.status = rf_ds_optimal; */ 2544 2545 raidmarkdirty(raidPtr->Disks[sparecol].dev, 2546 raidPtr->raid_cinfo[sparecol].ci_vp, 2547 raidPtr->mod_counter); 2548 } 2549 } 2550 } 2551 2552 2553 void 2554 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2555 { 2556 RF_ComponentLabel_t clabel; 2557 int sparecol; 2558 int c; 2559 int j; 2560 int scol; 2561 2562 scol = -1; 2563 2564 /* XXX should do extra checks to make sure things really are clean, 2565 rather than blindly setting the clean bit... */ 2566 2567 raidPtr->mod_counter++; 2568 2569 for (c = 0; c < raidPtr->numCol; c++) { 2570 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2571 raidread_component_label( 2572 raidPtr->Disks[c].dev, 2573 raidPtr->raid_cinfo[c].ci_vp, 2574 &clabel); 2575 /* make sure status is noted */ 2576 clabel.status = rf_ds_optimal; 2577 2578 /* bump the counter */ 2579 clabel.mod_counter = raidPtr->mod_counter; 2580 2581 /* note what unit we are configured as */ 2582 clabel.last_unit = raidPtr->raidid; 2583 2584 raidwrite_component_label( 2585 raidPtr->Disks[c].dev, 2586 raidPtr->raid_cinfo[c].ci_vp, 2587 &clabel); 2588 if (final == RF_FINAL_COMPONENT_UPDATE) { 2589 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2590 raidmarkclean( 2591 raidPtr->Disks[c].dev, 2592 raidPtr->raid_cinfo[c].ci_vp, 2593 raidPtr->mod_counter); 2594 } 2595 } 2596 } 2597 /* else we don't touch it.. */ 2598 } 2599 2600 for( c = 0; c < raidPtr->numSpare ; c++) { 2601 sparecol = raidPtr->numCol + c; 2602 /* Need to ensure that the reconstruct actually completed! */ 2603 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2604 /* 2605 2606 we claim this disk is "optimal" if it's 2607 rf_ds_used_spare, as that means it should be 2608 directly substitutable for the disk it replaced. 2609 We note that too... 2610 2611 */ 2612 2613 for(j=0;j<raidPtr->numCol;j++) { 2614 if (raidPtr->Disks[j].spareCol == sparecol) { 2615 scol = j; 2616 break; 2617 } 2618 } 2619 2620 /* XXX shouldn't *really* need this... */ 2621 raidread_component_label( 2622 raidPtr->Disks[sparecol].dev, 2623 raidPtr->raid_cinfo[sparecol].ci_vp, 2624 &clabel); 2625 /* make sure status is noted */ 2626 2627 raid_init_component_label(raidPtr, &clabel); 2628 2629 clabel.mod_counter = raidPtr->mod_counter; 2630 clabel.column = scol; 2631 clabel.status = rf_ds_optimal; 2632 clabel.last_unit = raidPtr->raidid; 2633 2634 raidwrite_component_label( 2635 raidPtr->Disks[sparecol].dev, 2636 raidPtr->raid_cinfo[sparecol].ci_vp, 2637 &clabel); 2638 if (final == RF_FINAL_COMPONENT_UPDATE) { 2639 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2640 raidmarkclean( raidPtr->Disks[sparecol].dev, 2641 raidPtr->raid_cinfo[sparecol].ci_vp, 2642 raidPtr->mod_counter); 2643 } 2644 } 2645 } 2646 } 2647 } 2648 2649 void 2650 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2651 { 2652 2653 if (vp != NULL) { 2654 if (auto_configured == 1) { 2655 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2656 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2657 vput(vp); 2658 2659 } else { 2660 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2661 } 2662 } 2663 } 2664 2665 2666 void 2667 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2668 { 2669 int r,c; 2670 struct vnode *vp; 2671 int acd; 2672 2673 2674 /* We take this opportunity to close the vnodes like we should.. */ 2675 2676 for (c = 0; c < raidPtr->numCol; c++) { 2677 vp = raidPtr->raid_cinfo[c].ci_vp; 2678 acd = raidPtr->Disks[c].auto_configured; 2679 rf_close_component(raidPtr, vp, acd); 2680 raidPtr->raid_cinfo[c].ci_vp = NULL; 2681 raidPtr->Disks[c].auto_configured = 0; 2682 } 2683 2684 for (r = 0; r < raidPtr->numSpare; r++) { 2685 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2686 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2687 rf_close_component(raidPtr, vp, acd); 2688 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2689 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2690 } 2691 } 2692 2693 2694 void 2695 rf_ReconThread(struct rf_recon_req *req) 2696 { 2697 int s; 2698 RF_Raid_t *raidPtr; 2699 2700 s = splbio(); 2701 raidPtr = (RF_Raid_t *) req->raidPtr; 2702 raidPtr->recon_in_progress = 1; 2703 2704 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2705 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2706 2707 RF_Free(req, sizeof(*req)); 2708 2709 raidPtr->recon_in_progress = 0; 2710 splx(s); 2711 2712 /* That's all... */ 2713 kthread_exit(0); /* does not return */ 2714 } 2715 2716 void 2717 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2718 { 2719 int retcode; 2720 int s; 2721 2722 raidPtr->parity_rewrite_stripes_done = 0; 2723 raidPtr->parity_rewrite_in_progress = 1; 2724 s = splbio(); 2725 retcode = rf_RewriteParity(raidPtr); 2726 splx(s); 2727 if (retcode) { 2728 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2729 } else { 2730 /* set the clean bit! If we shutdown correctly, 2731 the clean bit on each component label will get 2732 set */ 2733 raidPtr->parity_good = RF_RAID_CLEAN; 2734 } 2735 raidPtr->parity_rewrite_in_progress = 0; 2736 2737 /* Anyone waiting for us to stop? If so, inform them... */ 2738 if (raidPtr->waitShutdown) { 2739 wakeup(&raidPtr->parity_rewrite_in_progress); 2740 } 2741 2742 /* That's all... */ 2743 kthread_exit(0); /* does not return */ 2744 } 2745 2746 2747 void 2748 rf_CopybackThread(RF_Raid_t *raidPtr) 2749 { 2750 int s; 2751 2752 raidPtr->copyback_in_progress = 1; 2753 s = splbio(); 2754 rf_CopybackReconstructedData(raidPtr); 2755 splx(s); 2756 raidPtr->copyback_in_progress = 0; 2757 2758 /* That's all... */ 2759 kthread_exit(0); /* does not return */ 2760 } 2761 2762 2763 void 2764 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2765 { 2766 int s; 2767 RF_Raid_t *raidPtr; 2768 2769 s = splbio(); 2770 raidPtr = req->raidPtr; 2771 raidPtr->recon_in_progress = 1; 2772 rf_ReconstructInPlace(raidPtr, req->col); 2773 RF_Free(req, sizeof(*req)); 2774 raidPtr->recon_in_progress = 0; 2775 splx(s); 2776 2777 /* That's all... */ 2778 kthread_exit(0); /* does not return */ 2779 } 2780 2781 static RF_AutoConfig_t * 2782 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2783 const char *cname, RF_SectorCount_t size) 2784 { 2785 int good_one = 0; 2786 RF_ComponentLabel_t *clabel; 2787 RF_AutoConfig_t *ac; 2788 2789 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 2790 if (clabel == NULL) { 2791 oomem: 2792 while(ac_list) { 2793 ac = ac_list; 2794 if (ac->clabel) 2795 free(ac->clabel, M_RAIDFRAME); 2796 ac_list = ac_list->next; 2797 free(ac, M_RAIDFRAME); 2798 } 2799 printf("RAID auto config: out of memory!\n"); 2800 return NULL; /* XXX probably should panic? */ 2801 } 2802 2803 if (!raidread_component_label(dev, vp, clabel)) { 2804 /* Got the label. Does it look reasonable? */ 2805 if (rf_reasonable_label(clabel) && 2806 (clabel->partitionSize <= size)) { 2807 #ifdef DEBUG 2808 printf("Component on: %s: %llu\n", 2809 cname, (unsigned long long)size); 2810 rf_print_component_label(clabel); 2811 #endif 2812 /* if it's reasonable, add it, else ignore it. */ 2813 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 2814 M_NOWAIT); 2815 if (ac == NULL) { 2816 free(clabel, M_RAIDFRAME); 2817 goto oomem; 2818 } 2819 strlcpy(ac->devname, cname, sizeof(ac->devname)); 2820 ac->dev = dev; 2821 ac->vp = vp; 2822 ac->clabel = clabel; 2823 ac->next = ac_list; 2824 ac_list = ac; 2825 good_one = 1; 2826 } 2827 } 2828 if (!good_one) { 2829 /* cleanup */ 2830 free(clabel, M_RAIDFRAME); 2831 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2832 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2833 vput(vp); 2834 } 2835 return ac_list; 2836 } 2837 2838 RF_AutoConfig_t * 2839 rf_find_raid_components() 2840 { 2841 struct vnode *vp; 2842 struct disklabel label; 2843 struct device *dv; 2844 dev_t dev; 2845 int bmajor, bminor, wedge; 2846 int error; 2847 int i; 2848 RF_AutoConfig_t *ac_list; 2849 2850 2851 /* initialize the AutoConfig list */ 2852 ac_list = NULL; 2853 2854 /* we begin by trolling through *all* the devices on the system */ 2855 2856 for (dv = alldevs.tqh_first; dv != NULL; 2857 dv = dv->dv_list.tqe_next) { 2858 2859 /* we are only interested in disks... */ 2860 if (device_class(dv) != DV_DISK) 2861 continue; 2862 2863 /* we don't care about floppies... */ 2864 if (device_is_a(dv, "fd")) { 2865 continue; 2866 } 2867 2868 /* we don't care about CD's... */ 2869 if (device_is_a(dv, "cd")) { 2870 continue; 2871 } 2872 2873 /* hdfd is the Atari/Hades floppy driver */ 2874 if (device_is_a(dv, "hdfd")) { 2875 continue; 2876 } 2877 2878 /* fdisa is the Atari/Milan floppy driver */ 2879 if (device_is_a(dv, "fdisa")) { 2880 continue; 2881 } 2882 2883 /* need to find the device_name_to_block_device_major stuff */ 2884 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 2885 2886 /* get a vnode for the raw partition of this disk */ 2887 2888 wedge = device_is_a(dv, "dk"); 2889 bminor = minor(device_unit(dv)); 2890 dev = wedge ? makedev(bmajor, bminor) : 2891 MAKEDISKDEV(bmajor, bminor, RAW_PART); 2892 if (bdevvp(dev, &vp)) 2893 panic("RAID can't alloc vnode"); 2894 2895 error = VOP_OPEN(vp, FREAD, NOCRED); 2896 2897 if (error) { 2898 /* "Who cares." Continue looking 2899 for something that exists*/ 2900 vput(vp); 2901 continue; 2902 } 2903 2904 if (wedge) { 2905 struct dkwedge_info dkw; 2906 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 2907 NOCRED); 2908 if (error) { 2909 printf("RAIDframe: can't get wedge info for " 2910 "dev %s (%d)\n", device_xname(dv), error); 2911 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2912 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2913 vput(vp); 2914 continue; 2915 } 2916 2917 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 2918 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2919 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2920 vput(vp); 2921 continue; 2922 } 2923 2924 ac_list = rf_get_component(ac_list, dev, vp, 2925 device_xname(dv), dkw.dkw_size); 2926 continue; 2927 } 2928 2929 /* Ok, the disk exists. Go get the disklabel. */ 2930 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 2931 if (error) { 2932 /* 2933 * XXX can't happen - open() would 2934 * have errored out (or faked up one) 2935 */ 2936 if (error != ENOTTY) 2937 printf("RAIDframe: can't get label for dev " 2938 "%s (%d)\n", device_xname(dv), error); 2939 } 2940 2941 /* don't need this any more. We'll allocate it again 2942 a little later if we really do... */ 2943 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2944 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2945 vput(vp); 2946 2947 if (error) 2948 continue; 2949 2950 for (i = 0; i < label.d_npartitions; i++) { 2951 char cname[sizeof(ac_list->devname)]; 2952 2953 /* We only support partitions marked as RAID */ 2954 if (label.d_partitions[i].p_fstype != FS_RAID) 2955 continue; 2956 2957 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 2958 if (bdevvp(dev, &vp)) 2959 panic("RAID can't alloc vnode"); 2960 2961 error = VOP_OPEN(vp, FREAD, NOCRED); 2962 if (error) { 2963 /* Whatever... */ 2964 vput(vp); 2965 continue; 2966 } 2967 snprintf(cname, sizeof(cname), "%s%c", 2968 device_xname(dv), 'a' + i); 2969 ac_list = rf_get_component(ac_list, dev, vp, cname, 2970 label.d_partitions[i].p_size); 2971 } 2972 } 2973 return ac_list; 2974 } 2975 2976 2977 static int 2978 rf_reasonable_label(RF_ComponentLabel_t *clabel) 2979 { 2980 2981 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2982 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2983 ((clabel->clean == RF_RAID_CLEAN) || 2984 (clabel->clean == RF_RAID_DIRTY)) && 2985 clabel->row >=0 && 2986 clabel->column >= 0 && 2987 clabel->num_rows > 0 && 2988 clabel->num_columns > 0 && 2989 clabel->row < clabel->num_rows && 2990 clabel->column < clabel->num_columns && 2991 clabel->blockSize > 0 && 2992 clabel->numBlocks > 0) { 2993 /* label looks reasonable enough... */ 2994 return(1); 2995 } 2996 return(0); 2997 } 2998 2999 3000 #ifdef DEBUG 3001 void 3002 rf_print_component_label(RF_ComponentLabel_t *clabel) 3003 { 3004 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3005 clabel->row, clabel->column, 3006 clabel->num_rows, clabel->num_columns); 3007 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3008 clabel->version, clabel->serial_number, 3009 clabel->mod_counter); 3010 printf(" Clean: %s Status: %d\n", 3011 clabel->clean ? "Yes" : "No", clabel->status ); 3012 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3013 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3014 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 3015 (char) clabel->parityConfig, clabel->blockSize, 3016 clabel->numBlocks); 3017 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 3018 printf(" Contains root partition: %s\n", 3019 clabel->root_partition ? "Yes" : "No" ); 3020 printf(" Last configured as: raid%d\n", clabel->last_unit ); 3021 #if 0 3022 printf(" Config order: %d\n", clabel->config_order); 3023 #endif 3024 3025 } 3026 #endif 3027 3028 RF_ConfigSet_t * 3029 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3030 { 3031 RF_AutoConfig_t *ac; 3032 RF_ConfigSet_t *config_sets; 3033 RF_ConfigSet_t *cset; 3034 RF_AutoConfig_t *ac_next; 3035 3036 3037 config_sets = NULL; 3038 3039 /* Go through the AutoConfig list, and figure out which components 3040 belong to what sets. */ 3041 ac = ac_list; 3042 while(ac!=NULL) { 3043 /* we're going to putz with ac->next, so save it here 3044 for use at the end of the loop */ 3045 ac_next = ac->next; 3046 3047 if (config_sets == NULL) { 3048 /* will need at least this one... */ 3049 config_sets = (RF_ConfigSet_t *) 3050 malloc(sizeof(RF_ConfigSet_t), 3051 M_RAIDFRAME, M_NOWAIT); 3052 if (config_sets == NULL) { 3053 panic("rf_create_auto_sets: No memory!"); 3054 } 3055 /* this one is easy :) */ 3056 config_sets->ac = ac; 3057 config_sets->next = NULL; 3058 config_sets->rootable = 0; 3059 ac->next = NULL; 3060 } else { 3061 /* which set does this component fit into? */ 3062 cset = config_sets; 3063 while(cset!=NULL) { 3064 if (rf_does_it_fit(cset, ac)) { 3065 /* looks like it matches... */ 3066 ac->next = cset->ac; 3067 cset->ac = ac; 3068 break; 3069 } 3070 cset = cset->next; 3071 } 3072 if (cset==NULL) { 3073 /* didn't find a match above... new set..*/ 3074 cset = (RF_ConfigSet_t *) 3075 malloc(sizeof(RF_ConfigSet_t), 3076 M_RAIDFRAME, M_NOWAIT); 3077 if (cset == NULL) { 3078 panic("rf_create_auto_sets: No memory!"); 3079 } 3080 cset->ac = ac; 3081 ac->next = NULL; 3082 cset->next = config_sets; 3083 cset->rootable = 0; 3084 config_sets = cset; 3085 } 3086 } 3087 ac = ac_next; 3088 } 3089 3090 3091 return(config_sets); 3092 } 3093 3094 static int 3095 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3096 { 3097 RF_ComponentLabel_t *clabel1, *clabel2; 3098 3099 /* If this one matches the *first* one in the set, that's good 3100 enough, since the other members of the set would have been 3101 through here too... */ 3102 /* note that we are not checking partitionSize here.. 3103 3104 Note that we are also not checking the mod_counters here. 3105 If everything else matches execpt the mod_counter, that's 3106 good enough for this test. We will deal with the mod_counters 3107 a little later in the autoconfiguration process. 3108 3109 (clabel1->mod_counter == clabel2->mod_counter) && 3110 3111 The reason we don't check for this is that failed disks 3112 will have lower modification counts. If those disks are 3113 not added to the set they used to belong to, then they will 3114 form their own set, which may result in 2 different sets, 3115 for example, competing to be configured at raid0, and 3116 perhaps competing to be the root filesystem set. If the 3117 wrong ones get configured, or both attempt to become /, 3118 weird behaviour and or serious lossage will occur. Thus we 3119 need to bring them into the fold here, and kick them out at 3120 a later point. 3121 3122 */ 3123 3124 clabel1 = cset->ac->clabel; 3125 clabel2 = ac->clabel; 3126 if ((clabel1->version == clabel2->version) && 3127 (clabel1->serial_number == clabel2->serial_number) && 3128 (clabel1->num_rows == clabel2->num_rows) && 3129 (clabel1->num_columns == clabel2->num_columns) && 3130 (clabel1->sectPerSU == clabel2->sectPerSU) && 3131 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3132 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3133 (clabel1->parityConfig == clabel2->parityConfig) && 3134 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3135 (clabel1->blockSize == clabel2->blockSize) && 3136 (clabel1->numBlocks == clabel2->numBlocks) && 3137 (clabel1->autoconfigure == clabel2->autoconfigure) && 3138 (clabel1->root_partition == clabel2->root_partition) && 3139 (clabel1->last_unit == clabel2->last_unit) && 3140 (clabel1->config_order == clabel2->config_order)) { 3141 /* if it get's here, it almost *has* to be a match */ 3142 } else { 3143 /* it's not consistent with somebody in the set.. 3144 punt */ 3145 return(0); 3146 } 3147 /* all was fine.. it must fit... */ 3148 return(1); 3149 } 3150 3151 int 3152 rf_have_enough_components(RF_ConfigSet_t *cset) 3153 { 3154 RF_AutoConfig_t *ac; 3155 RF_AutoConfig_t *auto_config; 3156 RF_ComponentLabel_t *clabel; 3157 int c; 3158 int num_cols; 3159 int num_missing; 3160 int mod_counter; 3161 int mod_counter_found; 3162 int even_pair_failed; 3163 char parity_type; 3164 3165 3166 /* check to see that we have enough 'live' components 3167 of this set. If so, we can configure it if necessary */ 3168 3169 num_cols = cset->ac->clabel->num_columns; 3170 parity_type = cset->ac->clabel->parityConfig; 3171 3172 /* XXX Check for duplicate components!?!?!? */ 3173 3174 /* Determine what the mod_counter is supposed to be for this set. */ 3175 3176 mod_counter_found = 0; 3177 mod_counter = 0; 3178 ac = cset->ac; 3179 while(ac!=NULL) { 3180 if (mod_counter_found==0) { 3181 mod_counter = ac->clabel->mod_counter; 3182 mod_counter_found = 1; 3183 } else { 3184 if (ac->clabel->mod_counter > mod_counter) { 3185 mod_counter = ac->clabel->mod_counter; 3186 } 3187 } 3188 ac = ac->next; 3189 } 3190 3191 num_missing = 0; 3192 auto_config = cset->ac; 3193 3194 even_pair_failed = 0; 3195 for(c=0; c<num_cols; c++) { 3196 ac = auto_config; 3197 while(ac!=NULL) { 3198 if ((ac->clabel->column == c) && 3199 (ac->clabel->mod_counter == mod_counter)) { 3200 /* it's this one... */ 3201 #ifdef DEBUG 3202 printf("Found: %s at %d\n", 3203 ac->devname,c); 3204 #endif 3205 break; 3206 } 3207 ac=ac->next; 3208 } 3209 if (ac==NULL) { 3210 /* Didn't find one here! */ 3211 /* special case for RAID 1, especially 3212 where there are more than 2 3213 components (where RAIDframe treats 3214 things a little differently :( ) */ 3215 if (parity_type == '1') { 3216 if (c%2 == 0) { /* even component */ 3217 even_pair_failed = 1; 3218 } else { /* odd component. If 3219 we're failed, and 3220 so is the even 3221 component, it's 3222 "Good Night, Charlie" */ 3223 if (even_pair_failed == 1) { 3224 return(0); 3225 } 3226 } 3227 } else { 3228 /* normal accounting */ 3229 num_missing++; 3230 } 3231 } 3232 if ((parity_type == '1') && (c%2 == 1)) { 3233 /* Just did an even component, and we didn't 3234 bail.. reset the even_pair_failed flag, 3235 and go on to the next component.... */ 3236 even_pair_failed = 0; 3237 } 3238 } 3239 3240 clabel = cset->ac->clabel; 3241 3242 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3243 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3244 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3245 /* XXX this needs to be made *much* more general */ 3246 /* Too many failures */ 3247 return(0); 3248 } 3249 /* otherwise, all is well, and we've got enough to take a kick 3250 at autoconfiguring this set */ 3251 return(1); 3252 } 3253 3254 void 3255 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3256 RF_Raid_t *raidPtr) 3257 { 3258 RF_ComponentLabel_t *clabel; 3259 int i; 3260 3261 clabel = ac->clabel; 3262 3263 /* 1. Fill in the common stuff */ 3264 config->numRow = clabel->num_rows = 1; 3265 config->numCol = clabel->num_columns; 3266 config->numSpare = 0; /* XXX should this be set here? */ 3267 config->sectPerSU = clabel->sectPerSU; 3268 config->SUsPerPU = clabel->SUsPerPU; 3269 config->SUsPerRU = clabel->SUsPerRU; 3270 config->parityConfig = clabel->parityConfig; 3271 /* XXX... */ 3272 strcpy(config->diskQueueType,"fifo"); 3273 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3274 config->layoutSpecificSize = 0; /* XXX ?? */ 3275 3276 while(ac!=NULL) { 3277 /* row/col values will be in range due to the checks 3278 in reasonable_label() */ 3279 strcpy(config->devnames[0][ac->clabel->column], 3280 ac->devname); 3281 ac = ac->next; 3282 } 3283 3284 for(i=0;i<RF_MAXDBGV;i++) { 3285 config->debugVars[i][0] = 0; 3286 } 3287 } 3288 3289 int 3290 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3291 { 3292 RF_ComponentLabel_t clabel; 3293 struct vnode *vp; 3294 dev_t dev; 3295 int column; 3296 int sparecol; 3297 3298 raidPtr->autoconfigure = new_value; 3299 3300 for(column=0; column<raidPtr->numCol; column++) { 3301 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3302 dev = raidPtr->Disks[column].dev; 3303 vp = raidPtr->raid_cinfo[column].ci_vp; 3304 raidread_component_label(dev, vp, &clabel); 3305 clabel.autoconfigure = new_value; 3306 raidwrite_component_label(dev, vp, &clabel); 3307 } 3308 } 3309 for(column = 0; column < raidPtr->numSpare ; column++) { 3310 sparecol = raidPtr->numCol + column; 3311 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3312 dev = raidPtr->Disks[sparecol].dev; 3313 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3314 raidread_component_label(dev, vp, &clabel); 3315 clabel.autoconfigure = new_value; 3316 raidwrite_component_label(dev, vp, &clabel); 3317 } 3318 } 3319 return(new_value); 3320 } 3321 3322 int 3323 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3324 { 3325 RF_ComponentLabel_t clabel; 3326 struct vnode *vp; 3327 dev_t dev; 3328 int column; 3329 int sparecol; 3330 3331 raidPtr->root_partition = new_value; 3332 for(column=0; column<raidPtr->numCol; column++) { 3333 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3334 dev = raidPtr->Disks[column].dev; 3335 vp = raidPtr->raid_cinfo[column].ci_vp; 3336 raidread_component_label(dev, vp, &clabel); 3337 clabel.root_partition = new_value; 3338 raidwrite_component_label(dev, vp, &clabel); 3339 } 3340 } 3341 for(column = 0; column < raidPtr->numSpare ; column++) { 3342 sparecol = raidPtr->numCol + column; 3343 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3344 dev = raidPtr->Disks[sparecol].dev; 3345 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3346 raidread_component_label(dev, vp, &clabel); 3347 clabel.root_partition = new_value; 3348 raidwrite_component_label(dev, vp, &clabel); 3349 } 3350 } 3351 return(new_value); 3352 } 3353 3354 void 3355 rf_release_all_vps(RF_ConfigSet_t *cset) 3356 { 3357 RF_AutoConfig_t *ac; 3358 3359 ac = cset->ac; 3360 while(ac!=NULL) { 3361 /* Close the vp, and give it back */ 3362 if (ac->vp) { 3363 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3364 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3365 vput(ac->vp); 3366 ac->vp = NULL; 3367 } 3368 ac = ac->next; 3369 } 3370 } 3371 3372 3373 void 3374 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3375 { 3376 RF_AutoConfig_t *ac; 3377 RF_AutoConfig_t *next_ac; 3378 3379 ac = cset->ac; 3380 while(ac!=NULL) { 3381 next_ac = ac->next; 3382 /* nuke the label */ 3383 free(ac->clabel, M_RAIDFRAME); 3384 /* cleanup the config structure */ 3385 free(ac, M_RAIDFRAME); 3386 /* "next.." */ 3387 ac = next_ac; 3388 } 3389 /* and, finally, nuke the config set */ 3390 free(cset, M_RAIDFRAME); 3391 } 3392 3393 3394 void 3395 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3396 { 3397 /* current version number */ 3398 clabel->version = RF_COMPONENT_LABEL_VERSION; 3399 clabel->serial_number = raidPtr->serial_number; 3400 clabel->mod_counter = raidPtr->mod_counter; 3401 clabel->num_rows = 1; 3402 clabel->num_columns = raidPtr->numCol; 3403 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3404 clabel->status = rf_ds_optimal; /* "It's good!" */ 3405 3406 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3407 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3408 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3409 3410 clabel->blockSize = raidPtr->bytesPerSector; 3411 clabel->numBlocks = raidPtr->sectorsPerDisk; 3412 3413 /* XXX not portable */ 3414 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3415 clabel->maxOutstanding = raidPtr->maxOutstanding; 3416 clabel->autoconfigure = raidPtr->autoconfigure; 3417 clabel->root_partition = raidPtr->root_partition; 3418 clabel->last_unit = raidPtr->raidid; 3419 clabel->config_order = raidPtr->config_order; 3420 } 3421 3422 int 3423 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3424 { 3425 RF_Raid_t *raidPtr; 3426 RF_Config_t *config; 3427 int raidID; 3428 int retcode; 3429 3430 #ifdef DEBUG 3431 printf("RAID autoconfigure\n"); 3432 #endif 3433 3434 retcode = 0; 3435 *unit = -1; 3436 3437 /* 1. Create a config structure */ 3438 3439 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3440 M_RAIDFRAME, 3441 M_NOWAIT); 3442 if (config==NULL) { 3443 printf("Out of mem!?!?\n"); 3444 /* XXX do something more intelligent here. */ 3445 return(1); 3446 } 3447 3448 memset(config, 0, sizeof(RF_Config_t)); 3449 3450 /* 3451 2. Figure out what RAID ID this one is supposed to live at 3452 See if we can get the same RAID dev that it was configured 3453 on last time.. 3454 */ 3455 3456 raidID = cset->ac->clabel->last_unit; 3457 if ((raidID < 0) || (raidID >= numraid)) { 3458 /* let's not wander off into lala land. */ 3459 raidID = numraid - 1; 3460 } 3461 if (raidPtrs[raidID]->valid != 0) { 3462 3463 /* 3464 Nope... Go looking for an alternative... 3465 Start high so we don't immediately use raid0 if that's 3466 not taken. 3467 */ 3468 3469 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3470 if (raidPtrs[raidID]->valid == 0) { 3471 /* can use this one! */ 3472 break; 3473 } 3474 } 3475 } 3476 3477 if (raidID < 0) { 3478 /* punt... */ 3479 printf("Unable to auto configure this set!\n"); 3480 printf("(Out of RAID devs!)\n"); 3481 free(config, M_RAIDFRAME); 3482 return(1); 3483 } 3484 3485 #ifdef DEBUG 3486 printf("Configuring raid%d:\n",raidID); 3487 #endif 3488 3489 raidPtr = raidPtrs[raidID]; 3490 3491 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3492 raidPtr->raidid = raidID; 3493 raidPtr->openings = RAIDOUTSTANDING; 3494 3495 /* 3. Build the configuration structure */ 3496 rf_create_configuration(cset->ac, config, raidPtr); 3497 3498 /* 4. Do the configuration */ 3499 retcode = rf_Configure(raidPtr, config, cset->ac); 3500 3501 if (retcode == 0) { 3502 3503 raidinit(raidPtrs[raidID]); 3504 3505 rf_markalldirty(raidPtrs[raidID]); 3506 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3507 if (cset->ac->clabel->root_partition==1) { 3508 /* everything configured just fine. Make a note 3509 that this set is eligible to be root. */ 3510 cset->rootable = 1; 3511 /* XXX do this here? */ 3512 raidPtrs[raidID]->root_partition = 1; 3513 } 3514 } 3515 3516 /* 5. Cleanup */ 3517 free(config, M_RAIDFRAME); 3518 3519 *unit = raidID; 3520 return(retcode); 3521 } 3522 3523 void 3524 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3525 { 3526 struct buf *bp; 3527 3528 bp = (struct buf *)desc->bp; 3529 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3530 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3531 } 3532 3533 void 3534 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3535 size_t xmin, size_t xmax) 3536 { 3537 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3538 pool_sethiwat(p, xmax); 3539 pool_prime(p, xmin); 3540 pool_setlowat(p, xmin); 3541 } 3542 3543 /* 3544 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see 3545 * if there is IO pending and if that IO could possibly be done for a 3546 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3547 * otherwise. 3548 * 3549 */ 3550 3551 int 3552 rf_buf_queue_check(int raidid) 3553 { 3554 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) && 3555 raidPtrs[raidid]->openings > 0) { 3556 /* there is work to do */ 3557 return 0; 3558 } 3559 /* default is nothing to do */ 3560 return 1; 3561 } 3562 3563 int 3564 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr) 3565 { 3566 struct partinfo dpart; 3567 struct dkwedge_info dkw; 3568 int error; 3569 3570 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred); 3571 if (error == 0) { 3572 diskPtr->blockSize = dpart.disklab->d_secsize; 3573 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 3574 diskPtr->partitionSize = dpart.part->p_size; 3575 return 0; 3576 } 3577 3578 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred); 3579 if (error == 0) { 3580 diskPtr->blockSize = 512; /* XXX */ 3581 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors; 3582 diskPtr->partitionSize = dkw.dkw_size; 3583 return 0; 3584 } 3585 return error; 3586 } 3587 3588 static int 3589 raid_match(struct device *self, struct cfdata *cfdata, 3590 void *aux) 3591 { 3592 return 1; 3593 } 3594 3595 static void 3596 raid_attach(struct device *parent, struct device *self, 3597 void *aux) 3598 { 3599 3600 } 3601 3602 3603 static int 3604 raid_detach(struct device *self, int flags) 3605 { 3606 struct raid_softc *rs = (struct raid_softc *)self; 3607 3608 if (rs->sc_flags & RAIDF_INITED) 3609 return EBUSY; 3610 3611 return 0; 3612 } 3613 3614 static void 3615 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr) 3616 { 3617 prop_dictionary_t disk_info, odisk_info, geom; 3618 disk_info = prop_dictionary_create(); 3619 geom = prop_dictionary_create(); 3620 prop_dictionary_set_uint64(geom, "sectors-per-unit", 3621 raidPtr->totalSectors); 3622 prop_dictionary_set_uint32(geom, "sector-size", 3623 raidPtr->bytesPerSector); 3624 3625 prop_dictionary_set_uint16(geom, "sectors-per-track", 3626 raidPtr->Layout.dataSectorsPerStripe); 3627 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 3628 4 * raidPtr->numCol); 3629 3630 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 3631 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe * 3632 (4 * raidPtr->numCol))); 3633 3634 prop_dictionary_set(disk_info, "geometry", geom); 3635 prop_object_release(geom); 3636 prop_dictionary_set(device_properties(rs->sc_dev), 3637 "disk-info", disk_info); 3638 odisk_info = rs->sc_dkdev.dk_info; 3639 rs->sc_dkdev.dk_info = disk_info; 3640 if (odisk_info) 3641 prop_object_release(odisk_info); 3642 } 3643