1 /* $NetBSD: rf_netbsdkintf.c,v 1.248 2008/06/16 16:58:26 oster Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 1990, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * This code is derived from software contributed to Berkeley by 36 * the Systems Programming Group of the University of Utah Computer 37 * Science Department. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 64 * 65 * @(#)cd.c 8.2 (Berkeley) 11/16/93 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * 71 * This code is derived from software contributed to Berkeley by 72 * the Systems Programming Group of the University of Utah Computer 73 * Science Department. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 3. All advertising materials mentioning features or use of this software 84 * must display the following acknowledgement: 85 * This product includes software developed by the University of 86 * California, Berkeley and its contributors. 87 * 4. Neither the name of the University nor the names of its contributors 88 * may be used to endorse or promote products derived from this software 89 * without specific prior written permission. 90 * 91 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 94 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 95 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 96 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 97 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 98 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 99 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 100 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 101 * SUCH DAMAGE. 102 * 103 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 104 * 105 * @(#)cd.c 8.2 (Berkeley) 11/16/93 106 */ 107 108 /* 109 * Copyright (c) 1995 Carnegie-Mellon University. 110 * All rights reserved. 111 * 112 * Authors: Mark Holland, Jim Zelenka 113 * 114 * Permission to use, copy, modify and distribute this software and 115 * its documentation is hereby granted, provided that both the copyright 116 * notice and this permission notice appear in all copies of the 117 * software, derivative works or modified versions, and any portions 118 * thereof, and that both notices appear in supporting documentation. 119 * 120 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 121 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 122 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 123 * 124 * Carnegie Mellon requests users of this software to return to 125 * 126 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 127 * School of Computer Science 128 * Carnegie Mellon University 129 * Pittsburgh PA 15213-3890 130 * 131 * any improvements or extensions that they make and grant Carnegie the 132 * rights to redistribute these changes. 133 */ 134 135 /*********************************************************** 136 * 137 * rf_kintf.c -- the kernel interface routines for RAIDframe 138 * 139 ***********************************************************/ 140 141 #include <sys/cdefs.h> 142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.248 2008/06/16 16:58:26 oster Exp $"); 143 144 #include <sys/param.h> 145 #include <sys/errno.h> 146 #include <sys/pool.h> 147 #include <sys/proc.h> 148 #include <sys/queue.h> 149 #include <sys/disk.h> 150 #include <sys/device.h> 151 #include <sys/stat.h> 152 #include <sys/ioctl.h> 153 #include <sys/fcntl.h> 154 #include <sys/systm.h> 155 #include <sys/vnode.h> 156 #include <sys/disklabel.h> 157 #include <sys/conf.h> 158 #include <sys/buf.h> 159 #include <sys/bufq.h> 160 #include <sys/user.h> 161 #include <sys/reboot.h> 162 #include <sys/kauth.h> 163 164 #include <prop/proplib.h> 165 166 #include <dev/raidframe/raidframevar.h> 167 #include <dev/raidframe/raidframeio.h> 168 #include "raid.h" 169 #include "opt_raid_autoconfig.h" 170 #include "rf_raid.h" 171 #include "rf_copyback.h" 172 #include "rf_dag.h" 173 #include "rf_dagflags.h" 174 #include "rf_desc.h" 175 #include "rf_diskqueue.h" 176 #include "rf_etimer.h" 177 #include "rf_general.h" 178 #include "rf_kintf.h" 179 #include "rf_options.h" 180 #include "rf_driver.h" 181 #include "rf_parityscan.h" 182 #include "rf_threadstuff.h" 183 184 #ifdef DEBUG 185 int rf_kdebug_level = 0; 186 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 187 #else /* DEBUG */ 188 #define db1_printf(a) { } 189 #endif /* DEBUG */ 190 191 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 192 193 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 194 195 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 196 * spare table */ 197 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 198 * installation process */ 199 200 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 201 202 /* prototypes */ 203 static void KernelWakeupFunc(struct buf *); 204 static void InitBP(struct buf *, struct vnode *, unsigned, 205 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 206 void *, int, struct proc *); 207 static void raidinit(RF_Raid_t *); 208 209 void raidattach(int); 210 static int raid_match(struct device *, struct cfdata *, void *); 211 static void raid_attach(struct device *, struct device *, void *); 212 static int raid_detach(struct device *, int); 213 214 dev_type_open(raidopen); 215 dev_type_close(raidclose); 216 dev_type_read(raidread); 217 dev_type_write(raidwrite); 218 dev_type_ioctl(raidioctl); 219 dev_type_strategy(raidstrategy); 220 dev_type_dump(raiddump); 221 dev_type_size(raidsize); 222 223 const struct bdevsw raid_bdevsw = { 224 raidopen, raidclose, raidstrategy, raidioctl, 225 raiddump, raidsize, D_DISK 226 }; 227 228 const struct cdevsw raid_cdevsw = { 229 raidopen, raidclose, raidread, raidwrite, raidioctl, 230 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 231 }; 232 233 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 234 235 /* XXX Not sure if the following should be replacing the raidPtrs above, 236 or if it should be used in conjunction with that... 237 */ 238 239 struct raid_softc { 240 struct device *sc_dev; 241 int sc_flags; /* flags */ 242 int sc_cflags; /* configuration flags */ 243 uint64_t sc_size; /* size of the raid device */ 244 char sc_xname[20]; /* XXX external name */ 245 struct disk sc_dkdev; /* generic disk device info */ 246 struct bufq_state *buf_queue; /* used for the device queue */ 247 }; 248 /* sc_flags */ 249 #define RAIDF_INITED 0x01 /* unit has been initialized */ 250 #define RAIDF_WLABEL 0x02 /* label area is writable */ 251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 253 #define RAIDF_LOCKED 0x80 /* unit is locked */ 254 255 #define raidunit(x) DISKUNIT(x) 256 int numraid = 0; 257 258 extern struct cfdriver raid_cd; 259 CFATTACH_DECL_NEW(raid, sizeof(struct raid_softc), 260 raid_match, raid_attach, raid_detach, NULL); 261 262 /* 263 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 264 * Be aware that large numbers can allow the driver to consume a lot of 265 * kernel memory, especially on writes, and in degraded mode reads. 266 * 267 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 268 * a single 64K write will typically require 64K for the old data, 269 * 64K for the old parity, and 64K for the new parity, for a total 270 * of 192K (if the parity buffer is not re-used immediately). 271 * Even it if is used immediately, that's still 128K, which when multiplied 272 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 273 * 274 * Now in degraded mode, for example, a 64K read on the above setup may 275 * require data reconstruction, which will require *all* of the 4 remaining 276 * disks to participate -- 4 * 32K/disk == 128K again. 277 */ 278 279 #ifndef RAIDOUTSTANDING 280 #define RAIDOUTSTANDING 6 281 #endif 282 283 #define RAIDLABELDEV(dev) \ 284 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 285 286 /* declared here, and made public, for the benefit of KVM stuff.. */ 287 struct raid_softc *raid_softc; 288 289 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 290 struct disklabel *); 291 static void raidgetdisklabel(dev_t); 292 static void raidmakedisklabel(struct raid_softc *); 293 294 static int raidlock(struct raid_softc *); 295 static void raidunlock(struct raid_softc *); 296 297 static void rf_markalldirty(RF_Raid_t *); 298 static void rf_set_properties(struct raid_softc *, RF_Raid_t *); 299 300 void rf_ReconThread(struct rf_recon_req *); 301 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 302 void rf_CopybackThread(RF_Raid_t *raidPtr); 303 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 304 int rf_autoconfig(struct device *self); 305 void rf_buildroothack(RF_ConfigSet_t *); 306 307 RF_AutoConfig_t *rf_find_raid_components(void); 308 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 310 static int rf_reasonable_label(RF_ComponentLabel_t *); 311 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 312 int rf_set_autoconfig(RF_Raid_t *, int); 313 int rf_set_rootpartition(RF_Raid_t *, int); 314 void rf_release_all_vps(RF_ConfigSet_t *); 315 void rf_cleanup_config_set(RF_ConfigSet_t *); 316 int rf_have_enough_components(RF_ConfigSet_t *); 317 int rf_auto_config_set(RF_ConfigSet_t *, int *); 318 319 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 320 allow autoconfig to take place. 321 Note that this is overridden by having 322 RAID_AUTOCONFIG as an option in the 323 kernel config file. */ 324 325 struct RF_Pools_s rf_pools; 326 327 void 328 raidattach(int num) 329 { 330 int raidID; 331 int i, rc; 332 333 #ifdef DEBUG 334 printf("raidattach: Asked for %d units\n", num); 335 #endif 336 337 if (num <= 0) { 338 #ifdef DIAGNOSTIC 339 panic("raidattach: count <= 0"); 340 #endif 341 return; 342 } 343 /* This is where all the initialization stuff gets done. */ 344 345 numraid = num; 346 347 /* Make some space for requested number of units... */ 348 349 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 350 if (raidPtrs == NULL) { 351 panic("raidPtrs is NULL!!"); 352 } 353 354 rf_mutex_init(&rf_sparet_wait_mutex); 355 356 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 357 358 for (i = 0; i < num; i++) 359 raidPtrs[i] = NULL; 360 rc = rf_BootRaidframe(); 361 if (rc == 0) 362 aprint_normal("Kernelized RAIDframe activated\n"); 363 else 364 panic("Serious error booting RAID!!"); 365 366 /* put together some datastructures like the CCD device does.. This 367 * lets us lock the device and what-not when it gets opened. */ 368 369 raid_softc = (struct raid_softc *) 370 malloc(num * sizeof(struct raid_softc), 371 M_RAIDFRAME, M_NOWAIT); 372 if (raid_softc == NULL) { 373 aprint_error("WARNING: no memory for RAIDframe driver\n"); 374 return; 375 } 376 377 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 378 379 for (raidID = 0; raidID < num; raidID++) { 380 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0); 381 382 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 383 (RF_Raid_t *)); 384 if (raidPtrs[raidID] == NULL) { 385 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID); 386 numraid = raidID; 387 return; 388 } 389 } 390 391 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 392 aprint_error("raidattach: config_cfattach_attach failed?\n"); 393 } 394 395 #ifdef RAID_AUTOCONFIG 396 raidautoconfig = 1; 397 #endif 398 399 /* 400 * Register a finalizer which will be used to auto-config RAID 401 * sets once all real hardware devices have been found. 402 */ 403 if (config_finalize_register(NULL, rf_autoconfig) != 0) 404 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 405 } 406 407 int 408 rf_autoconfig(struct device *self) 409 { 410 RF_AutoConfig_t *ac_list; 411 RF_ConfigSet_t *config_sets; 412 413 if (raidautoconfig == 0) 414 return (0); 415 416 /* XXX This code can only be run once. */ 417 raidautoconfig = 0; 418 419 /* 1. locate all RAID components on the system */ 420 #ifdef DEBUG 421 printf("Searching for RAID components...\n"); 422 #endif 423 ac_list = rf_find_raid_components(); 424 425 /* 2. Sort them into their respective sets. */ 426 config_sets = rf_create_auto_sets(ac_list); 427 428 /* 429 * 3. Evaluate each set andconfigure the valid ones. 430 * This gets done in rf_buildroothack(). 431 */ 432 rf_buildroothack(config_sets); 433 434 return 1; 435 } 436 437 void 438 rf_buildroothack(RF_ConfigSet_t *config_sets) 439 { 440 RF_ConfigSet_t *cset; 441 RF_ConfigSet_t *next_cset; 442 int retcode; 443 int raidID; 444 int rootID; 445 int col; 446 int num_root; 447 char *devname; 448 449 rootID = 0; 450 num_root = 0; 451 cset = config_sets; 452 while(cset != NULL ) { 453 next_cset = cset->next; 454 if (rf_have_enough_components(cset) && 455 cset->ac->clabel->autoconfigure==1) { 456 retcode = rf_auto_config_set(cset,&raidID); 457 if (!retcode) { 458 #ifdef DEBUG 459 printf("raid%d: configured ok\n", raidID); 460 #endif 461 if (cset->rootable) { 462 rootID = raidID; 463 num_root++; 464 } 465 } else { 466 /* The autoconfig didn't work :( */ 467 #ifdef DEBUG 468 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 469 #endif 470 rf_release_all_vps(cset); 471 } 472 } else { 473 /* we're not autoconfiguring this set... 474 release the associated resources */ 475 rf_release_all_vps(cset); 476 } 477 /* cleanup */ 478 rf_cleanup_config_set(cset); 479 cset = next_cset; 480 } 481 482 /* if the user has specified what the root device should be 483 then we don't touch booted_device or boothowto... */ 484 485 if (rootspec != NULL) 486 return; 487 488 /* we found something bootable... */ 489 490 if (num_root == 1) { 491 booted_device = raid_softc[rootID].sc_dev; 492 } else if (num_root > 1) { 493 494 /* 495 * Maybe the MD code can help. If it cannot, then 496 * setroot() will discover that we have no 497 * booted_device and will ask the user if nothing was 498 * hardwired in the kernel config file 499 */ 500 501 if (booted_device == NULL) 502 cpu_rootconf(); 503 if (booted_device == NULL) 504 return; 505 506 num_root = 0; 507 for (raidID = 0; raidID < numraid; raidID++) { 508 if (raidPtrs[raidID]->valid == 0) 509 continue; 510 511 if (raidPtrs[raidID]->root_partition == 0) 512 continue; 513 514 for (col = 0; col < raidPtrs[raidID]->numCol; col++) { 515 devname = raidPtrs[raidID]->Disks[col].devname; 516 devname += sizeof("/dev/") - 1; 517 if (strncmp(devname, device_xname(booted_device), 518 strlen(device_xname(booted_device))) != 0) 519 continue; 520 #ifdef DEBUG 521 printf("raid%d includes boot device %s\n", 522 raidID, devname); 523 #endif 524 num_root++; 525 rootID = raidID; 526 } 527 } 528 529 if (num_root == 1) { 530 booted_device = raid_softc[rootID].sc_dev; 531 } else { 532 /* we can't guess.. require the user to answer... */ 533 boothowto |= RB_ASKNAME; 534 } 535 } 536 } 537 538 539 int 540 raidsize(dev_t dev) 541 { 542 struct raid_softc *rs; 543 struct disklabel *lp; 544 int part, unit, omask, size; 545 546 unit = raidunit(dev); 547 if (unit >= numraid) 548 return (-1); 549 rs = &raid_softc[unit]; 550 551 if ((rs->sc_flags & RAIDF_INITED) == 0) 552 return (-1); 553 554 part = DISKPART(dev); 555 omask = rs->sc_dkdev.dk_openmask & (1 << part); 556 lp = rs->sc_dkdev.dk_label; 557 558 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 559 return (-1); 560 561 if (lp->d_partitions[part].p_fstype != FS_SWAP) 562 size = -1; 563 else 564 size = lp->d_partitions[part].p_size * 565 (lp->d_secsize / DEV_BSIZE); 566 567 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 568 return (-1); 569 570 return (size); 571 572 } 573 574 int 575 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 576 { 577 int unit = raidunit(dev); 578 struct raid_softc *rs; 579 const struct bdevsw *bdev; 580 struct disklabel *lp; 581 RF_Raid_t *raidPtr; 582 daddr_t offset; 583 int part, c, sparecol, j, scol, dumpto; 584 int error = 0; 585 586 if (unit >= numraid) 587 return (ENXIO); 588 589 rs = &raid_softc[unit]; 590 raidPtr = raidPtrs[unit]; 591 592 if ((rs->sc_flags & RAIDF_INITED) == 0) 593 return ENXIO; 594 595 /* we only support dumping to RAID 1 sets */ 596 if (raidPtr->Layout.numDataCol != 1 || 597 raidPtr->Layout.numParityCol != 1) 598 return EINVAL; 599 600 601 if ((error = raidlock(rs)) != 0) 602 return error; 603 604 if (size % DEV_BSIZE != 0) { 605 error = EINVAL; 606 goto out; 607 } 608 609 if (blkno + size / DEV_BSIZE > rs->sc_size) { 610 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 611 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 612 size / DEV_BSIZE, rs->sc_size); 613 error = EINVAL; 614 goto out; 615 } 616 617 part = DISKPART(dev); 618 lp = rs->sc_dkdev.dk_label; 619 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 620 621 /* figure out what device is alive.. */ 622 623 /* 624 Look for a component to dump to. The preference for the 625 component to dump to is as follows: 626 1) the master 627 2) a used_spare of the master 628 3) the slave 629 4) a used_spare of the slave 630 */ 631 632 dumpto = -1; 633 for (c = 0; c < raidPtr->numCol; c++) { 634 if (raidPtr->Disks[c].status == rf_ds_optimal) { 635 /* this might be the one */ 636 dumpto = c; 637 break; 638 } 639 } 640 641 /* 642 At this point we have possibly selected a live master or a 643 live slave. We now check to see if there is a spared 644 master (or a spared slave), if we didn't find a live master 645 or a live slave. 646 */ 647 648 for (c = 0; c < raidPtr->numSpare; c++) { 649 sparecol = raidPtr->numCol + c; 650 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 651 /* How about this one? */ 652 scol = -1; 653 for(j=0;j<raidPtr->numCol;j++) { 654 if (raidPtr->Disks[j].spareCol == sparecol) { 655 scol = j; 656 break; 657 } 658 } 659 if (scol == 0) { 660 /* 661 We must have found a spared master! 662 We'll take that over anything else 663 found so far. (We couldn't have 664 found a real master before, since 665 this is a used spare, and it's 666 saying that it's replacing the 667 master.) On reboot (with 668 autoconfiguration turned on) 669 sparecol will become the 1st 670 component (component0) of this set. 671 */ 672 dumpto = sparecol; 673 break; 674 } else if (scol != -1) { 675 /* 676 Must be a spared slave. We'll dump 677 to that if we havn't found anything 678 else so far. 679 */ 680 if (dumpto == -1) 681 dumpto = sparecol; 682 } 683 } 684 } 685 686 if (dumpto == -1) { 687 /* we couldn't find any live components to dump to!?!? 688 */ 689 error = EINVAL; 690 goto out; 691 } 692 693 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 694 695 /* 696 Note that blkno is relative to this particular partition. 697 By adding the offset of this partition in the RAID 698 set, and also adding RF_PROTECTED_SECTORS, we get a 699 value that is relative to the partition used for the 700 underlying component. 701 */ 702 703 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 704 blkno + offset, va, size); 705 706 out: 707 raidunlock(rs); 708 709 return error; 710 } 711 /* ARGSUSED */ 712 int 713 raidopen(dev_t dev, int flags, int fmt, 714 struct lwp *l) 715 { 716 int unit = raidunit(dev); 717 struct raid_softc *rs; 718 struct disklabel *lp; 719 int part, pmask; 720 int error = 0; 721 722 if (unit >= numraid) 723 return (ENXIO); 724 rs = &raid_softc[unit]; 725 726 if ((error = raidlock(rs)) != 0) 727 return (error); 728 lp = rs->sc_dkdev.dk_label; 729 730 part = DISKPART(dev); 731 732 /* 733 * If there are wedges, and this is not RAW_PART, then we 734 * need to fail. 735 */ 736 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 737 error = EBUSY; 738 goto bad; 739 } 740 pmask = (1 << part); 741 742 if ((rs->sc_flags & RAIDF_INITED) && 743 (rs->sc_dkdev.dk_openmask == 0)) 744 raidgetdisklabel(dev); 745 746 /* make sure that this partition exists */ 747 748 if (part != RAW_PART) { 749 if (((rs->sc_flags & RAIDF_INITED) == 0) || 750 ((part >= lp->d_npartitions) || 751 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 752 error = ENXIO; 753 goto bad; 754 } 755 } 756 /* Prevent this unit from being unconfigured while open. */ 757 switch (fmt) { 758 case S_IFCHR: 759 rs->sc_dkdev.dk_copenmask |= pmask; 760 break; 761 762 case S_IFBLK: 763 rs->sc_dkdev.dk_bopenmask |= pmask; 764 break; 765 } 766 767 if ((rs->sc_dkdev.dk_openmask == 0) && 768 ((rs->sc_flags & RAIDF_INITED) != 0)) { 769 /* First one... mark things as dirty... Note that we *MUST* 770 have done a configure before this. I DO NOT WANT TO BE 771 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 772 THAT THEY BELONG TOGETHER!!!!! */ 773 /* XXX should check to see if we're only open for reading 774 here... If so, we needn't do this, but then need some 775 other way of keeping track of what's happened.. */ 776 777 rf_markalldirty( raidPtrs[unit] ); 778 } 779 780 781 rs->sc_dkdev.dk_openmask = 782 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 783 784 bad: 785 raidunlock(rs); 786 787 return (error); 788 789 790 } 791 /* ARGSUSED */ 792 int 793 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 794 { 795 int unit = raidunit(dev); 796 struct cfdata *cf; 797 struct raid_softc *rs; 798 int error = 0; 799 int part; 800 801 if (unit >= numraid) 802 return (ENXIO); 803 rs = &raid_softc[unit]; 804 805 if ((error = raidlock(rs)) != 0) 806 return (error); 807 808 part = DISKPART(dev); 809 810 /* ...that much closer to allowing unconfiguration... */ 811 switch (fmt) { 812 case S_IFCHR: 813 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 814 break; 815 816 case S_IFBLK: 817 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 818 break; 819 } 820 rs->sc_dkdev.dk_openmask = 821 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 822 823 if ((rs->sc_dkdev.dk_openmask == 0) && 824 ((rs->sc_flags & RAIDF_INITED) != 0)) { 825 /* Last one... device is not unconfigured yet. 826 Device shutdown has taken care of setting the 827 clean bits if RAIDF_INITED is not set 828 mark things as clean... */ 829 830 rf_update_component_labels(raidPtrs[unit], 831 RF_FINAL_COMPONENT_UPDATE); 832 if (doing_shutdown) { 833 /* last one, and we're going down, so 834 lights out for this RAID set too. */ 835 error = rf_Shutdown(raidPtrs[unit]); 836 837 /* It's no longer initialized... */ 838 rs->sc_flags &= ~RAIDF_INITED; 839 840 /* detach the device */ 841 842 cf = device_cfdata(rs->sc_dev); 843 error = config_detach(rs->sc_dev, DETACH_QUIET); 844 free(cf, M_RAIDFRAME); 845 846 /* Detach the disk. */ 847 disk_detach(&rs->sc_dkdev); 848 disk_destroy(&rs->sc_dkdev); 849 } 850 } 851 852 raidunlock(rs); 853 return (0); 854 855 } 856 857 void 858 raidstrategy(struct buf *bp) 859 { 860 int s; 861 862 unsigned int raidID = raidunit(bp->b_dev); 863 RF_Raid_t *raidPtr; 864 struct raid_softc *rs = &raid_softc[raidID]; 865 int wlabel; 866 867 if ((rs->sc_flags & RAIDF_INITED) ==0) { 868 bp->b_error = ENXIO; 869 goto done; 870 } 871 if (raidID >= numraid || !raidPtrs[raidID]) { 872 bp->b_error = ENODEV; 873 goto done; 874 } 875 raidPtr = raidPtrs[raidID]; 876 if (!raidPtr->valid) { 877 bp->b_error = ENODEV; 878 goto done; 879 } 880 if (bp->b_bcount == 0) { 881 db1_printf(("b_bcount is zero..\n")); 882 goto done; 883 } 884 885 /* 886 * Do bounds checking and adjust transfer. If there's an 887 * error, the bounds check will flag that for us. 888 */ 889 890 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 891 if (DISKPART(bp->b_dev) == RAW_PART) { 892 uint64_t size; /* device size in DEV_BSIZE unit */ 893 894 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 895 size = raidPtr->totalSectors << 896 (raidPtr->logBytesPerSector - DEV_BSHIFT); 897 } else { 898 size = raidPtr->totalSectors >> 899 (DEV_BSHIFT - raidPtr->logBytesPerSector); 900 } 901 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 902 goto done; 903 } 904 } else { 905 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 906 db1_printf(("Bounds check failed!!:%d %d\n", 907 (int) bp->b_blkno, (int) wlabel)); 908 goto done; 909 } 910 } 911 s = splbio(); 912 913 bp->b_resid = 0; 914 915 /* stuff it onto our queue */ 916 BUFQ_PUT(rs->buf_queue, bp); 917 918 /* scheduled the IO to happen at the next convenient time */ 919 wakeup(&(raidPtrs[raidID]->iodone)); 920 921 splx(s); 922 return; 923 924 done: 925 bp->b_resid = bp->b_bcount; 926 biodone(bp); 927 } 928 /* ARGSUSED */ 929 int 930 raidread(dev_t dev, struct uio *uio, int flags) 931 { 932 int unit = raidunit(dev); 933 struct raid_softc *rs; 934 935 if (unit >= numraid) 936 return (ENXIO); 937 rs = &raid_softc[unit]; 938 939 if ((rs->sc_flags & RAIDF_INITED) == 0) 940 return (ENXIO); 941 942 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 943 944 } 945 /* ARGSUSED */ 946 int 947 raidwrite(dev_t dev, struct uio *uio, int flags) 948 { 949 int unit = raidunit(dev); 950 struct raid_softc *rs; 951 952 if (unit >= numraid) 953 return (ENXIO); 954 rs = &raid_softc[unit]; 955 956 if ((rs->sc_flags & RAIDF_INITED) == 0) 957 return (ENXIO); 958 959 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 960 961 } 962 963 int 964 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 965 { 966 int unit = raidunit(dev); 967 int error = 0; 968 int part, pmask; 969 struct cfdata *cf; 970 struct raid_softc *rs; 971 RF_Config_t *k_cfg, *u_cfg; 972 RF_Raid_t *raidPtr; 973 RF_RaidDisk_t *diskPtr; 974 RF_AccTotals_t *totals; 975 RF_DeviceConfig_t *d_cfg, **ucfgp; 976 u_char *specific_buf; 977 int retcode = 0; 978 int column; 979 int raidid; 980 struct rf_recon_req *rrcopy, *rr; 981 RF_ComponentLabel_t *clabel; 982 RF_ComponentLabel_t *ci_label; 983 RF_ComponentLabel_t **clabel_ptr; 984 RF_SingleComponent_t *sparePtr,*componentPtr; 985 RF_SingleComponent_t component; 986 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 987 int i, j, d; 988 #ifdef __HAVE_OLD_DISKLABEL 989 struct disklabel newlabel; 990 #endif 991 struct dkwedge_info *dkw; 992 993 if (unit >= numraid) 994 return (ENXIO); 995 rs = &raid_softc[unit]; 996 raidPtr = raidPtrs[unit]; 997 998 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 999 (int) DISKPART(dev), (int) unit, (int) cmd)); 1000 1001 /* Must be open for writes for these commands... */ 1002 switch (cmd) { 1003 #ifdef DIOCGSECTORSIZE 1004 case DIOCGSECTORSIZE: 1005 *(u_int *)data = raidPtr->bytesPerSector; 1006 return 0; 1007 case DIOCGMEDIASIZE: 1008 *(off_t *)data = 1009 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1010 return 0; 1011 #endif 1012 case DIOCSDINFO: 1013 case DIOCWDINFO: 1014 #ifdef __HAVE_OLD_DISKLABEL 1015 case ODIOCWDINFO: 1016 case ODIOCSDINFO: 1017 #endif 1018 case DIOCWLABEL: 1019 case DIOCAWEDGE: 1020 case DIOCDWEDGE: 1021 if ((flag & FWRITE) == 0) 1022 return (EBADF); 1023 } 1024 1025 /* Must be initialized for these... */ 1026 switch (cmd) { 1027 case DIOCGDINFO: 1028 case DIOCSDINFO: 1029 case DIOCWDINFO: 1030 #ifdef __HAVE_OLD_DISKLABEL 1031 case ODIOCGDINFO: 1032 case ODIOCWDINFO: 1033 case ODIOCSDINFO: 1034 case ODIOCGDEFLABEL: 1035 #endif 1036 case DIOCGPART: 1037 case DIOCWLABEL: 1038 case DIOCGDEFLABEL: 1039 case DIOCAWEDGE: 1040 case DIOCDWEDGE: 1041 case DIOCLWEDGES: 1042 case RAIDFRAME_SHUTDOWN: 1043 case RAIDFRAME_REWRITEPARITY: 1044 case RAIDFRAME_GET_INFO: 1045 case RAIDFRAME_RESET_ACCTOTALS: 1046 case RAIDFRAME_GET_ACCTOTALS: 1047 case RAIDFRAME_KEEP_ACCTOTALS: 1048 case RAIDFRAME_GET_SIZE: 1049 case RAIDFRAME_FAIL_DISK: 1050 case RAIDFRAME_COPYBACK: 1051 case RAIDFRAME_CHECK_RECON_STATUS: 1052 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1053 case RAIDFRAME_GET_COMPONENT_LABEL: 1054 case RAIDFRAME_SET_COMPONENT_LABEL: 1055 case RAIDFRAME_ADD_HOT_SPARE: 1056 case RAIDFRAME_REMOVE_HOT_SPARE: 1057 case RAIDFRAME_INIT_LABELS: 1058 case RAIDFRAME_REBUILD_IN_PLACE: 1059 case RAIDFRAME_CHECK_PARITY: 1060 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1061 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1062 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1063 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1064 case RAIDFRAME_SET_AUTOCONFIG: 1065 case RAIDFRAME_SET_ROOT: 1066 case RAIDFRAME_DELETE_COMPONENT: 1067 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1068 if ((rs->sc_flags & RAIDF_INITED) == 0) 1069 return (ENXIO); 1070 } 1071 1072 switch (cmd) { 1073 1074 /* configure the system */ 1075 case RAIDFRAME_CONFIGURE: 1076 1077 if (raidPtr->valid) { 1078 /* There is a valid RAID set running on this unit! */ 1079 printf("raid%d: Device already configured!\n",unit); 1080 return(EINVAL); 1081 } 1082 1083 /* copy-in the configuration information */ 1084 /* data points to a pointer to the configuration structure */ 1085 1086 u_cfg = *((RF_Config_t **) data); 1087 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1088 if (k_cfg == NULL) { 1089 return (ENOMEM); 1090 } 1091 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1092 if (retcode) { 1093 RF_Free(k_cfg, sizeof(RF_Config_t)); 1094 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1095 retcode)); 1096 return (retcode); 1097 } 1098 /* allocate a buffer for the layout-specific data, and copy it 1099 * in */ 1100 if (k_cfg->layoutSpecificSize) { 1101 if (k_cfg->layoutSpecificSize > 10000) { 1102 /* sanity check */ 1103 RF_Free(k_cfg, sizeof(RF_Config_t)); 1104 return (EINVAL); 1105 } 1106 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1107 (u_char *)); 1108 if (specific_buf == NULL) { 1109 RF_Free(k_cfg, sizeof(RF_Config_t)); 1110 return (ENOMEM); 1111 } 1112 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1113 k_cfg->layoutSpecificSize); 1114 if (retcode) { 1115 RF_Free(k_cfg, sizeof(RF_Config_t)); 1116 RF_Free(specific_buf, 1117 k_cfg->layoutSpecificSize); 1118 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1119 retcode)); 1120 return (retcode); 1121 } 1122 } else 1123 specific_buf = NULL; 1124 k_cfg->layoutSpecific = specific_buf; 1125 1126 /* should do some kind of sanity check on the configuration. 1127 * Store the sum of all the bytes in the last byte? */ 1128 1129 /* configure the system */ 1130 1131 /* 1132 * Clear the entire RAID descriptor, just to make sure 1133 * there is no stale data left in the case of a 1134 * reconfiguration 1135 */ 1136 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 1137 raidPtr->raidid = unit; 1138 1139 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1140 1141 if (retcode == 0) { 1142 1143 /* allow this many simultaneous IO's to 1144 this RAID device */ 1145 raidPtr->openings = RAIDOUTSTANDING; 1146 1147 raidinit(raidPtr); 1148 rf_markalldirty(raidPtr); 1149 } 1150 /* free the buffers. No return code here. */ 1151 if (k_cfg->layoutSpecificSize) { 1152 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1153 } 1154 RF_Free(k_cfg, sizeof(RF_Config_t)); 1155 1156 return (retcode); 1157 1158 /* shutdown the system */ 1159 case RAIDFRAME_SHUTDOWN: 1160 1161 if ((error = raidlock(rs)) != 0) 1162 return (error); 1163 1164 /* 1165 * If somebody has a partition mounted, we shouldn't 1166 * shutdown. 1167 */ 1168 1169 part = DISKPART(dev); 1170 pmask = (1 << part); 1171 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1172 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1173 (rs->sc_dkdev.dk_copenmask & pmask))) { 1174 raidunlock(rs); 1175 return (EBUSY); 1176 } 1177 1178 retcode = rf_Shutdown(raidPtr); 1179 1180 /* It's no longer initialized... */ 1181 rs->sc_flags &= ~RAIDF_INITED; 1182 1183 /* free the pseudo device attach bits */ 1184 1185 cf = device_cfdata(rs->sc_dev); 1186 /* XXX this causes us to not return any errors 1187 from the above call to rf_Shutdown() */ 1188 retcode = config_detach(rs->sc_dev, DETACH_QUIET); 1189 free(cf, M_RAIDFRAME); 1190 1191 /* Detach the disk. */ 1192 disk_detach(&rs->sc_dkdev); 1193 disk_destroy(&rs->sc_dkdev); 1194 1195 raidunlock(rs); 1196 1197 return (retcode); 1198 case RAIDFRAME_GET_COMPONENT_LABEL: 1199 clabel_ptr = (RF_ComponentLabel_t **) data; 1200 /* need to read the component label for the disk indicated 1201 by row,column in clabel */ 1202 1203 /* For practice, let's get it directly fromdisk, rather 1204 than from the in-core copy */ 1205 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 1206 (RF_ComponentLabel_t *)); 1207 if (clabel == NULL) 1208 return (ENOMEM); 1209 1210 retcode = copyin( *clabel_ptr, clabel, 1211 sizeof(RF_ComponentLabel_t)); 1212 1213 if (retcode) { 1214 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1215 return(retcode); 1216 } 1217 1218 clabel->row = 0; /* Don't allow looking at anything else.*/ 1219 1220 column = clabel->column; 1221 1222 if ((column < 0) || (column >= raidPtr->numCol + 1223 raidPtr->numSpare)) { 1224 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1225 return(EINVAL); 1226 } 1227 1228 retcode = raidread_component_label(raidPtr->Disks[column].dev, 1229 raidPtr->raid_cinfo[column].ci_vp, 1230 clabel ); 1231 1232 if (retcode == 0) { 1233 retcode = copyout(clabel, *clabel_ptr, 1234 sizeof(RF_ComponentLabel_t)); 1235 } 1236 RF_Free(clabel, sizeof(RF_ComponentLabel_t)); 1237 return (retcode); 1238 1239 case RAIDFRAME_SET_COMPONENT_LABEL: 1240 clabel = (RF_ComponentLabel_t *) data; 1241 1242 /* XXX check the label for valid stuff... */ 1243 /* Note that some things *should not* get modified -- 1244 the user should be re-initing the labels instead of 1245 trying to patch things. 1246 */ 1247 1248 raidid = raidPtr->raidid; 1249 #ifdef DEBUG 1250 printf("raid%d: Got component label:\n", raidid); 1251 printf("raid%d: Version: %d\n", raidid, clabel->version); 1252 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1253 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1254 printf("raid%d: Column: %d\n", raidid, clabel->column); 1255 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1256 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1257 printf("raid%d: Status: %d\n", raidid, clabel->status); 1258 #endif 1259 clabel->row = 0; 1260 column = clabel->column; 1261 1262 if ((column < 0) || (column >= raidPtr->numCol)) { 1263 return(EINVAL); 1264 } 1265 1266 /* XXX this isn't allowed to do anything for now :-) */ 1267 1268 /* XXX and before it is, we need to fill in the rest 1269 of the fields!?!?!?! */ 1270 #if 0 1271 raidwrite_component_label( 1272 raidPtr->Disks[column].dev, 1273 raidPtr->raid_cinfo[column].ci_vp, 1274 clabel ); 1275 #endif 1276 return (0); 1277 1278 case RAIDFRAME_INIT_LABELS: 1279 clabel = (RF_ComponentLabel_t *) data; 1280 /* 1281 we only want the serial number from 1282 the above. We get all the rest of the information 1283 from the config that was used to create this RAID 1284 set. 1285 */ 1286 1287 raidPtr->serial_number = clabel->serial_number; 1288 1289 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t), 1290 (RF_ComponentLabel_t *)); 1291 if (ci_label == NULL) 1292 return (ENOMEM); 1293 1294 raid_init_component_label(raidPtr, ci_label); 1295 ci_label->serial_number = clabel->serial_number; 1296 ci_label->row = 0; /* we dont' pretend to support more */ 1297 1298 for(column=0;column<raidPtr->numCol;column++) { 1299 diskPtr = &raidPtr->Disks[column]; 1300 if (!RF_DEAD_DISK(diskPtr->status)) { 1301 ci_label->partitionSize = diskPtr->partitionSize; 1302 ci_label->column = column; 1303 raidwrite_component_label( 1304 raidPtr->Disks[column].dev, 1305 raidPtr->raid_cinfo[column].ci_vp, 1306 ci_label ); 1307 } 1308 } 1309 RF_Free(ci_label, sizeof(RF_ComponentLabel_t)); 1310 1311 return (retcode); 1312 case RAIDFRAME_SET_AUTOCONFIG: 1313 d = rf_set_autoconfig(raidPtr, *(int *) data); 1314 printf("raid%d: New autoconfig value is: %d\n", 1315 raidPtr->raidid, d); 1316 *(int *) data = d; 1317 return (retcode); 1318 1319 case RAIDFRAME_SET_ROOT: 1320 d = rf_set_rootpartition(raidPtr, *(int *) data); 1321 printf("raid%d: New rootpartition value is: %d\n", 1322 raidPtr->raidid, d); 1323 *(int *) data = d; 1324 return (retcode); 1325 1326 /* initialize all parity */ 1327 case RAIDFRAME_REWRITEPARITY: 1328 1329 if (raidPtr->Layout.map->faultsTolerated == 0) { 1330 /* Parity for RAID 0 is trivially correct */ 1331 raidPtr->parity_good = RF_RAID_CLEAN; 1332 return(0); 1333 } 1334 1335 if (raidPtr->parity_rewrite_in_progress == 1) { 1336 /* Re-write is already in progress! */ 1337 return(EINVAL); 1338 } 1339 1340 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1341 rf_RewriteParityThread, 1342 raidPtr,"raid_parity"); 1343 return (retcode); 1344 1345 1346 case RAIDFRAME_ADD_HOT_SPARE: 1347 sparePtr = (RF_SingleComponent_t *) data; 1348 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1349 retcode = rf_add_hot_spare(raidPtr, &component); 1350 return(retcode); 1351 1352 case RAIDFRAME_REMOVE_HOT_SPARE: 1353 return(retcode); 1354 1355 case RAIDFRAME_DELETE_COMPONENT: 1356 componentPtr = (RF_SingleComponent_t *)data; 1357 memcpy( &component, componentPtr, 1358 sizeof(RF_SingleComponent_t)); 1359 retcode = rf_delete_component(raidPtr, &component); 1360 return(retcode); 1361 1362 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1363 componentPtr = (RF_SingleComponent_t *)data; 1364 memcpy( &component, componentPtr, 1365 sizeof(RF_SingleComponent_t)); 1366 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1367 return(retcode); 1368 1369 case RAIDFRAME_REBUILD_IN_PLACE: 1370 1371 if (raidPtr->Layout.map->faultsTolerated == 0) { 1372 /* Can't do this on a RAID 0!! */ 1373 return(EINVAL); 1374 } 1375 1376 if (raidPtr->recon_in_progress == 1) { 1377 /* a reconstruct is already in progress! */ 1378 return(EINVAL); 1379 } 1380 1381 componentPtr = (RF_SingleComponent_t *) data; 1382 memcpy( &component, componentPtr, 1383 sizeof(RF_SingleComponent_t)); 1384 component.row = 0; /* we don't support any more */ 1385 column = component.column; 1386 1387 if ((column < 0) || (column >= raidPtr->numCol)) { 1388 return(EINVAL); 1389 } 1390 1391 RF_LOCK_MUTEX(raidPtr->mutex); 1392 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1393 (raidPtr->numFailures > 0)) { 1394 /* XXX 0 above shouldn't be constant!!! */ 1395 /* some component other than this has failed. 1396 Let's not make things worse than they already 1397 are... */ 1398 printf("raid%d: Unable to reconstruct to disk at:\n", 1399 raidPtr->raidid); 1400 printf("raid%d: Col: %d Too many failures.\n", 1401 raidPtr->raidid, column); 1402 RF_UNLOCK_MUTEX(raidPtr->mutex); 1403 return (EINVAL); 1404 } 1405 if (raidPtr->Disks[column].status == 1406 rf_ds_reconstructing) { 1407 printf("raid%d: Unable to reconstruct to disk at:\n", 1408 raidPtr->raidid); 1409 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1410 1411 RF_UNLOCK_MUTEX(raidPtr->mutex); 1412 return (EINVAL); 1413 } 1414 if (raidPtr->Disks[column].status == rf_ds_spared) { 1415 RF_UNLOCK_MUTEX(raidPtr->mutex); 1416 return (EINVAL); 1417 } 1418 RF_UNLOCK_MUTEX(raidPtr->mutex); 1419 1420 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1421 if (rrcopy == NULL) 1422 return(ENOMEM); 1423 1424 rrcopy->raidPtr = (void *) raidPtr; 1425 rrcopy->col = column; 1426 1427 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1428 rf_ReconstructInPlaceThread, 1429 rrcopy,"raid_reconip"); 1430 return(retcode); 1431 1432 case RAIDFRAME_GET_INFO: 1433 if (!raidPtr->valid) 1434 return (ENODEV); 1435 ucfgp = (RF_DeviceConfig_t **) data; 1436 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1437 (RF_DeviceConfig_t *)); 1438 if (d_cfg == NULL) 1439 return (ENOMEM); 1440 d_cfg->rows = 1; /* there is only 1 row now */ 1441 d_cfg->cols = raidPtr->numCol; 1442 d_cfg->ndevs = raidPtr->numCol; 1443 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1444 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1445 return (ENOMEM); 1446 } 1447 d_cfg->nspares = raidPtr->numSpare; 1448 if (d_cfg->nspares >= RF_MAX_DISKS) { 1449 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1450 return (ENOMEM); 1451 } 1452 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1453 d = 0; 1454 for (j = 0; j < d_cfg->cols; j++) { 1455 d_cfg->devs[d] = raidPtr->Disks[j]; 1456 d++; 1457 } 1458 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1459 d_cfg->spares[i] = raidPtr->Disks[j]; 1460 } 1461 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1462 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1463 1464 return (retcode); 1465 1466 case RAIDFRAME_CHECK_PARITY: 1467 *(int *) data = raidPtr->parity_good; 1468 return (0); 1469 1470 case RAIDFRAME_RESET_ACCTOTALS: 1471 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1472 return (0); 1473 1474 case RAIDFRAME_GET_ACCTOTALS: 1475 totals = (RF_AccTotals_t *) data; 1476 *totals = raidPtr->acc_totals; 1477 return (0); 1478 1479 case RAIDFRAME_KEEP_ACCTOTALS: 1480 raidPtr->keep_acc_totals = *(int *)data; 1481 return (0); 1482 1483 case RAIDFRAME_GET_SIZE: 1484 *(int *) data = raidPtr->totalSectors; 1485 return (0); 1486 1487 /* fail a disk & optionally start reconstruction */ 1488 case RAIDFRAME_FAIL_DISK: 1489 1490 if (raidPtr->Layout.map->faultsTolerated == 0) { 1491 /* Can't do this on a RAID 0!! */ 1492 return(EINVAL); 1493 } 1494 1495 rr = (struct rf_recon_req *) data; 1496 rr->row = 0; 1497 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1498 return (EINVAL); 1499 1500 1501 RF_LOCK_MUTEX(raidPtr->mutex); 1502 if (raidPtr->status == rf_rs_reconstructing) { 1503 /* you can't fail a disk while we're reconstructing! */ 1504 /* XXX wrong for RAID6 */ 1505 RF_UNLOCK_MUTEX(raidPtr->mutex); 1506 return (EINVAL); 1507 } 1508 if ((raidPtr->Disks[rr->col].status == 1509 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1510 /* some other component has failed. Let's not make 1511 things worse. XXX wrong for RAID6 */ 1512 RF_UNLOCK_MUTEX(raidPtr->mutex); 1513 return (EINVAL); 1514 } 1515 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1516 /* Can't fail a spared disk! */ 1517 RF_UNLOCK_MUTEX(raidPtr->mutex); 1518 return (EINVAL); 1519 } 1520 RF_UNLOCK_MUTEX(raidPtr->mutex); 1521 1522 /* make a copy of the recon request so that we don't rely on 1523 * the user's buffer */ 1524 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1525 if (rrcopy == NULL) 1526 return(ENOMEM); 1527 memcpy(rrcopy, rr, sizeof(*rr)); 1528 rrcopy->raidPtr = (void *) raidPtr; 1529 1530 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1531 rf_ReconThread, 1532 rrcopy,"raid_recon"); 1533 return (0); 1534 1535 /* invoke a copyback operation after recon on whatever disk 1536 * needs it, if any */ 1537 case RAIDFRAME_COPYBACK: 1538 1539 if (raidPtr->Layout.map->faultsTolerated == 0) { 1540 /* This makes no sense on a RAID 0!! */ 1541 return(EINVAL); 1542 } 1543 1544 if (raidPtr->copyback_in_progress == 1) { 1545 /* Copyback is already in progress! */ 1546 return(EINVAL); 1547 } 1548 1549 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1550 rf_CopybackThread, 1551 raidPtr,"raid_copyback"); 1552 return (retcode); 1553 1554 /* return the percentage completion of reconstruction */ 1555 case RAIDFRAME_CHECK_RECON_STATUS: 1556 if (raidPtr->Layout.map->faultsTolerated == 0) { 1557 /* This makes no sense on a RAID 0, so tell the 1558 user it's done. */ 1559 *(int *) data = 100; 1560 return(0); 1561 } 1562 if (raidPtr->status != rf_rs_reconstructing) 1563 *(int *) data = 100; 1564 else { 1565 if (raidPtr->reconControl->numRUsTotal > 0) { 1566 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1567 } else { 1568 *(int *) data = 0; 1569 } 1570 } 1571 return (0); 1572 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1573 progressInfoPtr = (RF_ProgressInfo_t **) data; 1574 if (raidPtr->status != rf_rs_reconstructing) { 1575 progressInfo.remaining = 0; 1576 progressInfo.completed = 100; 1577 progressInfo.total = 100; 1578 } else { 1579 progressInfo.total = 1580 raidPtr->reconControl->numRUsTotal; 1581 progressInfo.completed = 1582 raidPtr->reconControl->numRUsComplete; 1583 progressInfo.remaining = progressInfo.total - 1584 progressInfo.completed; 1585 } 1586 retcode = copyout(&progressInfo, *progressInfoPtr, 1587 sizeof(RF_ProgressInfo_t)); 1588 return (retcode); 1589 1590 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1591 if (raidPtr->Layout.map->faultsTolerated == 0) { 1592 /* This makes no sense on a RAID 0, so tell the 1593 user it's done. */ 1594 *(int *) data = 100; 1595 return(0); 1596 } 1597 if (raidPtr->parity_rewrite_in_progress == 1) { 1598 *(int *) data = 100 * 1599 raidPtr->parity_rewrite_stripes_done / 1600 raidPtr->Layout.numStripe; 1601 } else { 1602 *(int *) data = 100; 1603 } 1604 return (0); 1605 1606 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1607 progressInfoPtr = (RF_ProgressInfo_t **) data; 1608 if (raidPtr->parity_rewrite_in_progress == 1) { 1609 progressInfo.total = raidPtr->Layout.numStripe; 1610 progressInfo.completed = 1611 raidPtr->parity_rewrite_stripes_done; 1612 progressInfo.remaining = progressInfo.total - 1613 progressInfo.completed; 1614 } else { 1615 progressInfo.remaining = 0; 1616 progressInfo.completed = 100; 1617 progressInfo.total = 100; 1618 } 1619 retcode = copyout(&progressInfo, *progressInfoPtr, 1620 sizeof(RF_ProgressInfo_t)); 1621 return (retcode); 1622 1623 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1624 if (raidPtr->Layout.map->faultsTolerated == 0) { 1625 /* This makes no sense on a RAID 0 */ 1626 *(int *) data = 100; 1627 return(0); 1628 } 1629 if (raidPtr->copyback_in_progress == 1) { 1630 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1631 raidPtr->Layout.numStripe; 1632 } else { 1633 *(int *) data = 100; 1634 } 1635 return (0); 1636 1637 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1638 progressInfoPtr = (RF_ProgressInfo_t **) data; 1639 if (raidPtr->copyback_in_progress == 1) { 1640 progressInfo.total = raidPtr->Layout.numStripe; 1641 progressInfo.completed = 1642 raidPtr->copyback_stripes_done; 1643 progressInfo.remaining = progressInfo.total - 1644 progressInfo.completed; 1645 } else { 1646 progressInfo.remaining = 0; 1647 progressInfo.completed = 100; 1648 progressInfo.total = 100; 1649 } 1650 retcode = copyout(&progressInfo, *progressInfoPtr, 1651 sizeof(RF_ProgressInfo_t)); 1652 return (retcode); 1653 1654 /* the sparetable daemon calls this to wait for the kernel to 1655 * need a spare table. this ioctl does not return until a 1656 * spare table is needed. XXX -- calling mpsleep here in the 1657 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1658 * -- I should either compute the spare table in the kernel, 1659 * or have a different -- XXX XXX -- interface (a different 1660 * character device) for delivering the table -- XXX */ 1661 #if 0 1662 case RAIDFRAME_SPARET_WAIT: 1663 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1664 while (!rf_sparet_wait_queue) 1665 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1666 waitreq = rf_sparet_wait_queue; 1667 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1668 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1669 1670 /* structure assignment */ 1671 *((RF_SparetWait_t *) data) = *waitreq; 1672 1673 RF_Free(waitreq, sizeof(*waitreq)); 1674 return (0); 1675 1676 /* wakes up a process waiting on SPARET_WAIT and puts an error 1677 * code in it that will cause the dameon to exit */ 1678 case RAIDFRAME_ABORT_SPARET_WAIT: 1679 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1680 waitreq->fcol = -1; 1681 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1682 waitreq->next = rf_sparet_wait_queue; 1683 rf_sparet_wait_queue = waitreq; 1684 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1685 wakeup(&rf_sparet_wait_queue); 1686 return (0); 1687 1688 /* used by the spare table daemon to deliver a spare table 1689 * into the kernel */ 1690 case RAIDFRAME_SEND_SPARET: 1691 1692 /* install the spare table */ 1693 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1694 1695 /* respond to the requestor. the return status of the spare 1696 * table installation is passed in the "fcol" field */ 1697 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1698 waitreq->fcol = retcode; 1699 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1700 waitreq->next = rf_sparet_resp_queue; 1701 rf_sparet_resp_queue = waitreq; 1702 wakeup(&rf_sparet_resp_queue); 1703 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1704 1705 return (retcode); 1706 #endif 1707 1708 default: 1709 break; /* fall through to the os-specific code below */ 1710 1711 } 1712 1713 if (!raidPtr->valid) 1714 return (EINVAL); 1715 1716 /* 1717 * Add support for "regular" device ioctls here. 1718 */ 1719 1720 switch (cmd) { 1721 case DIOCGDINFO: 1722 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1723 break; 1724 #ifdef __HAVE_OLD_DISKLABEL 1725 case ODIOCGDINFO: 1726 newlabel = *(rs->sc_dkdev.dk_label); 1727 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1728 return ENOTTY; 1729 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1730 break; 1731 #endif 1732 1733 case DIOCGPART: 1734 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1735 ((struct partinfo *) data)->part = 1736 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1737 break; 1738 1739 case DIOCWDINFO: 1740 case DIOCSDINFO: 1741 #ifdef __HAVE_OLD_DISKLABEL 1742 case ODIOCWDINFO: 1743 case ODIOCSDINFO: 1744 #endif 1745 { 1746 struct disklabel *lp; 1747 #ifdef __HAVE_OLD_DISKLABEL 1748 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1749 memset(&newlabel, 0, sizeof newlabel); 1750 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1751 lp = &newlabel; 1752 } else 1753 #endif 1754 lp = (struct disklabel *)data; 1755 1756 if ((error = raidlock(rs)) != 0) 1757 return (error); 1758 1759 rs->sc_flags |= RAIDF_LABELLING; 1760 1761 error = setdisklabel(rs->sc_dkdev.dk_label, 1762 lp, 0, rs->sc_dkdev.dk_cpulabel); 1763 if (error == 0) { 1764 if (cmd == DIOCWDINFO 1765 #ifdef __HAVE_OLD_DISKLABEL 1766 || cmd == ODIOCWDINFO 1767 #endif 1768 ) 1769 error = writedisklabel(RAIDLABELDEV(dev), 1770 raidstrategy, rs->sc_dkdev.dk_label, 1771 rs->sc_dkdev.dk_cpulabel); 1772 } 1773 rs->sc_flags &= ~RAIDF_LABELLING; 1774 1775 raidunlock(rs); 1776 1777 if (error) 1778 return (error); 1779 break; 1780 } 1781 1782 case DIOCWLABEL: 1783 if (*(int *) data != 0) 1784 rs->sc_flags |= RAIDF_WLABEL; 1785 else 1786 rs->sc_flags &= ~RAIDF_WLABEL; 1787 break; 1788 1789 case DIOCGDEFLABEL: 1790 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1791 break; 1792 1793 #ifdef __HAVE_OLD_DISKLABEL 1794 case ODIOCGDEFLABEL: 1795 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1796 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1797 return ENOTTY; 1798 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1799 break; 1800 #endif 1801 1802 case DIOCAWEDGE: 1803 case DIOCDWEDGE: 1804 dkw = (void *)data; 1805 1806 /* If the ioctl happens here, the parent is us. */ 1807 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1808 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1809 1810 case DIOCLWEDGES: 1811 return dkwedge_list(&rs->sc_dkdev, 1812 (struct dkwedge_list *)data, l); 1813 1814 default: 1815 retcode = ENOTTY; 1816 } 1817 return (retcode); 1818 1819 } 1820 1821 1822 /* raidinit -- complete the rest of the initialization for the 1823 RAIDframe device. */ 1824 1825 1826 static void 1827 raidinit(RF_Raid_t *raidPtr) 1828 { 1829 struct cfdata *cf; 1830 struct raid_softc *rs; 1831 int unit; 1832 1833 unit = raidPtr->raidid; 1834 1835 rs = &raid_softc[unit]; 1836 1837 /* XXX should check return code first... */ 1838 rs->sc_flags |= RAIDF_INITED; 1839 1840 /* XXX doesn't check bounds. */ 1841 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1842 1843 /* attach the pseudo device */ 1844 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1845 cf->cf_name = raid_cd.cd_name; 1846 cf->cf_atname = raid_cd.cd_name; 1847 cf->cf_unit = unit; 1848 cf->cf_fstate = FSTATE_STAR; 1849 1850 rs->sc_dev = config_attach_pseudo(cf); 1851 1852 if (rs->sc_dev==NULL) { 1853 printf("raid%d: config_attach_pseudo failed\n", 1854 raidPtr->raidid); 1855 } 1856 1857 /* disk_attach actually creates space for the CPU disklabel, among 1858 * other things, so it's critical to call this *BEFORE* we try putzing 1859 * with disklabels. */ 1860 1861 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1862 disk_attach(&rs->sc_dkdev); 1863 1864 /* XXX There may be a weird interaction here between this, and 1865 * protectedSectors, as used in RAIDframe. */ 1866 1867 rs->sc_size = raidPtr->totalSectors; 1868 1869 dkwedge_discover(&rs->sc_dkdev); 1870 1871 rf_set_properties(rs, raidPtr); 1872 1873 } 1874 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1875 /* wake up the daemon & tell it to get us a spare table 1876 * XXX 1877 * the entries in the queues should be tagged with the raidPtr 1878 * so that in the extremely rare case that two recons happen at once, 1879 * we know for which device were requesting a spare table 1880 * XXX 1881 * 1882 * XXX This code is not currently used. GO 1883 */ 1884 int 1885 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1886 { 1887 int retcode; 1888 1889 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1890 req->next = rf_sparet_wait_queue; 1891 rf_sparet_wait_queue = req; 1892 wakeup(&rf_sparet_wait_queue); 1893 1894 /* mpsleep unlocks the mutex */ 1895 while (!rf_sparet_resp_queue) { 1896 tsleep(&rf_sparet_resp_queue, PRIBIO, 1897 "raidframe getsparetable", 0); 1898 } 1899 req = rf_sparet_resp_queue; 1900 rf_sparet_resp_queue = req->next; 1901 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1902 1903 retcode = req->fcol; 1904 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1905 * alloc'd */ 1906 return (retcode); 1907 } 1908 #endif 1909 1910 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1911 * bp & passes it down. 1912 * any calls originating in the kernel must use non-blocking I/O 1913 * do some extra sanity checking to return "appropriate" error values for 1914 * certain conditions (to make some standard utilities work) 1915 * 1916 * Formerly known as: rf_DoAccessKernel 1917 */ 1918 void 1919 raidstart(RF_Raid_t *raidPtr) 1920 { 1921 RF_SectorCount_t num_blocks, pb, sum; 1922 RF_RaidAddr_t raid_addr; 1923 struct partition *pp; 1924 daddr_t blocknum; 1925 int unit; 1926 struct raid_softc *rs; 1927 int do_async; 1928 struct buf *bp; 1929 int rc; 1930 1931 unit = raidPtr->raidid; 1932 rs = &raid_softc[unit]; 1933 1934 /* quick check to see if anything has died recently */ 1935 RF_LOCK_MUTEX(raidPtr->mutex); 1936 if (raidPtr->numNewFailures > 0) { 1937 RF_UNLOCK_MUTEX(raidPtr->mutex); 1938 rf_update_component_labels(raidPtr, 1939 RF_NORMAL_COMPONENT_UPDATE); 1940 RF_LOCK_MUTEX(raidPtr->mutex); 1941 raidPtr->numNewFailures--; 1942 } 1943 1944 /* Check to see if we're at the limit... */ 1945 while (raidPtr->openings > 0) { 1946 RF_UNLOCK_MUTEX(raidPtr->mutex); 1947 1948 /* get the next item, if any, from the queue */ 1949 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) { 1950 /* nothing more to do */ 1951 return; 1952 } 1953 1954 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1955 * partition.. Need to make it absolute to the underlying 1956 * device.. */ 1957 1958 blocknum = bp->b_blkno; 1959 if (DISKPART(bp->b_dev) != RAW_PART) { 1960 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1961 blocknum += pp->p_offset; 1962 } 1963 1964 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1965 (int) blocknum)); 1966 1967 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1968 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1969 1970 /* *THIS* is where we adjust what block we're going to... 1971 * but DO NOT TOUCH bp->b_blkno!!! */ 1972 raid_addr = blocknum; 1973 1974 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1975 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1976 sum = raid_addr + num_blocks + pb; 1977 if (1 || rf_debugKernelAccess) { 1978 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1979 (int) raid_addr, (int) sum, (int) num_blocks, 1980 (int) pb, (int) bp->b_resid)); 1981 } 1982 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1983 || (sum < num_blocks) || (sum < pb)) { 1984 bp->b_error = ENOSPC; 1985 bp->b_resid = bp->b_bcount; 1986 biodone(bp); 1987 RF_LOCK_MUTEX(raidPtr->mutex); 1988 continue; 1989 } 1990 /* 1991 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1992 */ 1993 1994 if (bp->b_bcount & raidPtr->sectorMask) { 1995 bp->b_error = EINVAL; 1996 bp->b_resid = bp->b_bcount; 1997 biodone(bp); 1998 RF_LOCK_MUTEX(raidPtr->mutex); 1999 continue; 2000 2001 } 2002 db1_printf(("Calling DoAccess..\n")); 2003 2004 2005 RF_LOCK_MUTEX(raidPtr->mutex); 2006 raidPtr->openings--; 2007 RF_UNLOCK_MUTEX(raidPtr->mutex); 2008 2009 /* 2010 * Everything is async. 2011 */ 2012 do_async = 1; 2013 2014 disk_busy(&rs->sc_dkdev); 2015 2016 /* XXX we're still at splbio() here... do we *really* 2017 need to be? */ 2018 2019 /* don't ever condition on bp->b_flags & B_WRITE. 2020 * always condition on B_READ instead */ 2021 2022 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2023 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2024 do_async, raid_addr, num_blocks, 2025 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2026 2027 if (rc) { 2028 bp->b_error = rc; 2029 bp->b_resid = bp->b_bcount; 2030 biodone(bp); 2031 /* continue loop */ 2032 } 2033 2034 RF_LOCK_MUTEX(raidPtr->mutex); 2035 } 2036 RF_UNLOCK_MUTEX(raidPtr->mutex); 2037 } 2038 2039 2040 2041 2042 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2043 2044 int 2045 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2046 { 2047 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2048 struct buf *bp; 2049 2050 req->queue = queue; 2051 2052 #if DIAGNOSTIC 2053 if (queue->raidPtr->raidid >= numraid) { 2054 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid, 2055 numraid); 2056 panic("Invalid Unit number in rf_DispatchKernelIO"); 2057 } 2058 #endif 2059 2060 bp = req->bp; 2061 2062 switch (req->type) { 2063 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2064 /* XXX need to do something extra here.. */ 2065 /* I'm leaving this in, as I've never actually seen it used, 2066 * and I'd like folks to report it... GO */ 2067 printf(("WAKEUP CALLED\n")); 2068 queue->numOutstanding++; 2069 2070 bp->b_flags = 0; 2071 bp->b_private = req; 2072 2073 KernelWakeupFunc(bp); 2074 break; 2075 2076 case RF_IO_TYPE_READ: 2077 case RF_IO_TYPE_WRITE: 2078 #if RF_ACC_TRACE > 0 2079 if (req->tracerec) { 2080 RF_ETIMER_START(req->tracerec->timer); 2081 } 2082 #endif 2083 InitBP(bp, queue->rf_cinfo->ci_vp, 2084 op, queue->rf_cinfo->ci_dev, 2085 req->sectorOffset, req->numSector, 2086 req->buf, KernelWakeupFunc, (void *) req, 2087 queue->raidPtr->logBytesPerSector, req->b_proc); 2088 2089 if (rf_debugKernelAccess) { 2090 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2091 (long) bp->b_blkno)); 2092 } 2093 queue->numOutstanding++; 2094 queue->last_deq_sector = req->sectorOffset; 2095 /* acc wouldn't have been let in if there were any pending 2096 * reqs at any other priority */ 2097 queue->curPriority = req->priority; 2098 2099 db1_printf(("Going for %c to unit %d col %d\n", 2100 req->type, queue->raidPtr->raidid, 2101 queue->col)); 2102 db1_printf(("sector %d count %d (%d bytes) %d\n", 2103 (int) req->sectorOffset, (int) req->numSector, 2104 (int) (req->numSector << 2105 queue->raidPtr->logBytesPerSector), 2106 (int) queue->raidPtr->logBytesPerSector)); 2107 bdev_strategy(bp); 2108 2109 break; 2110 2111 default: 2112 panic("bad req->type in rf_DispatchKernelIO"); 2113 } 2114 db1_printf(("Exiting from DispatchKernelIO\n")); 2115 2116 return (0); 2117 } 2118 /* this is the callback function associated with a I/O invoked from 2119 kernel code. 2120 */ 2121 static void 2122 KernelWakeupFunc(struct buf *bp) 2123 { 2124 RF_DiskQueueData_t *req = NULL; 2125 RF_DiskQueue_t *queue; 2126 int s; 2127 2128 s = splbio(); 2129 db1_printf(("recovering the request queue:\n")); 2130 req = bp->b_private; 2131 2132 queue = (RF_DiskQueue_t *) req->queue; 2133 2134 #if RF_ACC_TRACE > 0 2135 if (req->tracerec) { 2136 RF_ETIMER_STOP(req->tracerec->timer); 2137 RF_ETIMER_EVAL(req->tracerec->timer); 2138 RF_LOCK_MUTEX(rf_tracing_mutex); 2139 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2140 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2141 req->tracerec->num_phys_ios++; 2142 RF_UNLOCK_MUTEX(rf_tracing_mutex); 2143 } 2144 #endif 2145 2146 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2147 * ballistic, and mark the component as hosed... */ 2148 2149 if (bp->b_error != 0) { 2150 /* Mark the disk as dead */ 2151 /* but only mark it once... */ 2152 /* and only if it wouldn't leave this RAID set 2153 completely broken */ 2154 if (((queue->raidPtr->Disks[queue->col].status == 2155 rf_ds_optimal) || 2156 (queue->raidPtr->Disks[queue->col].status == 2157 rf_ds_used_spare)) && 2158 (queue->raidPtr->numFailures < 2159 queue->raidPtr->Layout.map->faultsTolerated)) { 2160 printf("raid%d: IO Error. Marking %s as failed.\n", 2161 queue->raidPtr->raidid, 2162 queue->raidPtr->Disks[queue->col].devname); 2163 queue->raidPtr->Disks[queue->col].status = 2164 rf_ds_failed; 2165 queue->raidPtr->status = rf_rs_degraded; 2166 queue->raidPtr->numFailures++; 2167 queue->raidPtr->numNewFailures++; 2168 } else { /* Disk is already dead... */ 2169 /* printf("Disk already marked as dead!\n"); */ 2170 } 2171 2172 } 2173 2174 /* Fill in the error value */ 2175 2176 req->error = bp->b_error; 2177 2178 simple_lock(&queue->raidPtr->iodone_lock); 2179 2180 /* Drop this one on the "finished" queue... */ 2181 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2182 2183 /* Let the raidio thread know there is work to be done. */ 2184 wakeup(&(queue->raidPtr->iodone)); 2185 2186 simple_unlock(&queue->raidPtr->iodone_lock); 2187 2188 splx(s); 2189 } 2190 2191 2192 2193 /* 2194 * initialize a buf structure for doing an I/O in the kernel. 2195 */ 2196 static void 2197 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2198 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2199 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2200 struct proc *b_proc) 2201 { 2202 /* bp->b_flags = B_PHYS | rw_flag; */ 2203 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2204 bp->b_oflags = 0; 2205 bp->b_cflags = 0; 2206 bp->b_bcount = numSect << logBytesPerSector; 2207 bp->b_bufsize = bp->b_bcount; 2208 bp->b_error = 0; 2209 bp->b_dev = dev; 2210 bp->b_data = bf; 2211 bp->b_blkno = startSect; 2212 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2213 if (bp->b_bcount == 0) { 2214 panic("bp->b_bcount is zero in InitBP!!"); 2215 } 2216 bp->b_proc = b_proc; 2217 bp->b_iodone = cbFunc; 2218 bp->b_private = cbArg; 2219 } 2220 2221 static void 2222 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2223 struct disklabel *lp) 2224 { 2225 memset(lp, 0, sizeof(*lp)); 2226 2227 /* fabricate a label... */ 2228 lp->d_secperunit = raidPtr->totalSectors; 2229 lp->d_secsize = raidPtr->bytesPerSector; 2230 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2231 lp->d_ntracks = 4 * raidPtr->numCol; 2232 lp->d_ncylinders = raidPtr->totalSectors / 2233 (lp->d_nsectors * lp->d_ntracks); 2234 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2235 2236 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2237 lp->d_type = DTYPE_RAID; 2238 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2239 lp->d_rpm = 3600; 2240 lp->d_interleave = 1; 2241 lp->d_flags = 0; 2242 2243 lp->d_partitions[RAW_PART].p_offset = 0; 2244 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2245 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2246 lp->d_npartitions = RAW_PART + 1; 2247 2248 lp->d_magic = DISKMAGIC; 2249 lp->d_magic2 = DISKMAGIC; 2250 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2251 2252 } 2253 /* 2254 * Read the disklabel from the raid device. If one is not present, fake one 2255 * up. 2256 */ 2257 static void 2258 raidgetdisklabel(dev_t dev) 2259 { 2260 int unit = raidunit(dev); 2261 struct raid_softc *rs = &raid_softc[unit]; 2262 const char *errstring; 2263 struct disklabel *lp = rs->sc_dkdev.dk_label; 2264 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2265 RF_Raid_t *raidPtr; 2266 2267 db1_printf(("Getting the disklabel...\n")); 2268 2269 memset(clp, 0, sizeof(*clp)); 2270 2271 raidPtr = raidPtrs[unit]; 2272 2273 raidgetdefaultlabel(raidPtr, rs, lp); 2274 2275 /* 2276 * Call the generic disklabel extraction routine. 2277 */ 2278 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2279 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2280 if (errstring) 2281 raidmakedisklabel(rs); 2282 else { 2283 int i; 2284 struct partition *pp; 2285 2286 /* 2287 * Sanity check whether the found disklabel is valid. 2288 * 2289 * This is necessary since total size of the raid device 2290 * may vary when an interleave is changed even though exactly 2291 * same components are used, and old disklabel may used 2292 * if that is found. 2293 */ 2294 if (lp->d_secperunit != rs->sc_size) 2295 printf("raid%d: WARNING: %s: " 2296 "total sector size in disklabel (%d) != " 2297 "the size of raid (%ld)\n", unit, rs->sc_xname, 2298 lp->d_secperunit, (long) rs->sc_size); 2299 for (i = 0; i < lp->d_npartitions; i++) { 2300 pp = &lp->d_partitions[i]; 2301 if (pp->p_offset + pp->p_size > rs->sc_size) 2302 printf("raid%d: WARNING: %s: end of partition `%c' " 2303 "exceeds the size of raid (%ld)\n", 2304 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size); 2305 } 2306 } 2307 2308 } 2309 /* 2310 * Take care of things one might want to take care of in the event 2311 * that a disklabel isn't present. 2312 */ 2313 static void 2314 raidmakedisklabel(struct raid_softc *rs) 2315 { 2316 struct disklabel *lp = rs->sc_dkdev.dk_label; 2317 db1_printf(("Making a label..\n")); 2318 2319 /* 2320 * For historical reasons, if there's no disklabel present 2321 * the raw partition must be marked FS_BSDFFS. 2322 */ 2323 2324 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2325 2326 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2327 2328 lp->d_checksum = dkcksum(lp); 2329 } 2330 /* 2331 * Wait interruptibly for an exclusive lock. 2332 * 2333 * XXX 2334 * Several drivers do this; it should be abstracted and made MP-safe. 2335 * (Hmm... where have we seen this warning before :-> GO ) 2336 */ 2337 static int 2338 raidlock(struct raid_softc *rs) 2339 { 2340 int error; 2341 2342 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2343 rs->sc_flags |= RAIDF_WANTED; 2344 if ((error = 2345 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2346 return (error); 2347 } 2348 rs->sc_flags |= RAIDF_LOCKED; 2349 return (0); 2350 } 2351 /* 2352 * Unlock and wake up any waiters. 2353 */ 2354 static void 2355 raidunlock(struct raid_softc *rs) 2356 { 2357 2358 rs->sc_flags &= ~RAIDF_LOCKED; 2359 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2360 rs->sc_flags &= ~RAIDF_WANTED; 2361 wakeup(rs); 2362 } 2363 } 2364 2365 2366 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2367 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2368 2369 int 2370 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2371 { 2372 RF_ComponentLabel_t clabel; 2373 raidread_component_label(dev, b_vp, &clabel); 2374 clabel.mod_counter = mod_counter; 2375 clabel.clean = RF_RAID_CLEAN; 2376 raidwrite_component_label(dev, b_vp, &clabel); 2377 return(0); 2378 } 2379 2380 2381 int 2382 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2383 { 2384 RF_ComponentLabel_t clabel; 2385 raidread_component_label(dev, b_vp, &clabel); 2386 clabel.mod_counter = mod_counter; 2387 clabel.clean = RF_RAID_DIRTY; 2388 raidwrite_component_label(dev, b_vp, &clabel); 2389 return(0); 2390 } 2391 2392 /* ARGSUSED */ 2393 int 2394 raidread_component_label(dev_t dev, struct vnode *b_vp, 2395 RF_ComponentLabel_t *clabel) 2396 { 2397 struct buf *bp; 2398 const struct bdevsw *bdev; 2399 int error; 2400 2401 /* XXX should probably ensure that we don't try to do this if 2402 someone has changed rf_protected_sectors. */ 2403 2404 if (b_vp == NULL) { 2405 /* For whatever reason, this component is not valid. 2406 Don't try to read a component label from it. */ 2407 return(EINVAL); 2408 } 2409 2410 /* get a block of the appropriate size... */ 2411 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2412 bp->b_dev = dev; 2413 2414 /* get our ducks in a row for the read */ 2415 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2416 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2417 bp->b_flags |= B_READ; 2418 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2419 2420 bdev = bdevsw_lookup(bp->b_dev); 2421 if (bdev == NULL) 2422 return (ENXIO); 2423 (*bdev->d_strategy)(bp); 2424 2425 error = biowait(bp); 2426 2427 if (!error) { 2428 memcpy(clabel, bp->b_data, 2429 sizeof(RF_ComponentLabel_t)); 2430 } 2431 2432 brelse(bp, 0); 2433 return(error); 2434 } 2435 /* ARGSUSED */ 2436 int 2437 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 2438 RF_ComponentLabel_t *clabel) 2439 { 2440 struct buf *bp; 2441 const struct bdevsw *bdev; 2442 int error; 2443 2444 /* get a block of the appropriate size... */ 2445 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2446 bp->b_dev = dev; 2447 2448 /* get our ducks in a row for the write */ 2449 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2450 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2451 bp->b_flags |= B_WRITE; 2452 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2453 2454 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2455 2456 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2457 2458 bdev = bdevsw_lookup(bp->b_dev); 2459 if (bdev == NULL) 2460 return (ENXIO); 2461 (*bdev->d_strategy)(bp); 2462 error = biowait(bp); 2463 brelse(bp, 0); 2464 if (error) { 2465 #if 1 2466 printf("Failed to write RAID component info!\n"); 2467 #endif 2468 } 2469 2470 return(error); 2471 } 2472 2473 void 2474 rf_markalldirty(RF_Raid_t *raidPtr) 2475 { 2476 RF_ComponentLabel_t clabel; 2477 int sparecol; 2478 int c; 2479 int j; 2480 int scol = -1; 2481 2482 raidPtr->mod_counter++; 2483 for (c = 0; c < raidPtr->numCol; c++) { 2484 /* we don't want to touch (at all) a disk that has 2485 failed */ 2486 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2487 raidread_component_label( 2488 raidPtr->Disks[c].dev, 2489 raidPtr->raid_cinfo[c].ci_vp, 2490 &clabel); 2491 if (clabel.status == rf_ds_spared) { 2492 /* XXX do something special... 2493 but whatever you do, don't 2494 try to access it!! */ 2495 } else { 2496 raidmarkdirty( 2497 raidPtr->Disks[c].dev, 2498 raidPtr->raid_cinfo[c].ci_vp, 2499 raidPtr->mod_counter); 2500 } 2501 } 2502 } 2503 2504 for( c = 0; c < raidPtr->numSpare ; c++) { 2505 sparecol = raidPtr->numCol + c; 2506 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2507 /* 2508 2509 we claim this disk is "optimal" if it's 2510 rf_ds_used_spare, as that means it should be 2511 directly substitutable for the disk it replaced. 2512 We note that too... 2513 2514 */ 2515 2516 for(j=0;j<raidPtr->numCol;j++) { 2517 if (raidPtr->Disks[j].spareCol == sparecol) { 2518 scol = j; 2519 break; 2520 } 2521 } 2522 2523 raidread_component_label( 2524 raidPtr->Disks[sparecol].dev, 2525 raidPtr->raid_cinfo[sparecol].ci_vp, 2526 &clabel); 2527 /* make sure status is noted */ 2528 2529 raid_init_component_label(raidPtr, &clabel); 2530 2531 clabel.row = 0; 2532 clabel.column = scol; 2533 /* Note: we *don't* change status from rf_ds_used_spare 2534 to rf_ds_optimal */ 2535 /* clabel.status = rf_ds_optimal; */ 2536 2537 raidmarkdirty(raidPtr->Disks[sparecol].dev, 2538 raidPtr->raid_cinfo[sparecol].ci_vp, 2539 raidPtr->mod_counter); 2540 } 2541 } 2542 } 2543 2544 2545 void 2546 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2547 { 2548 RF_ComponentLabel_t clabel; 2549 int sparecol; 2550 int c; 2551 int j; 2552 int scol; 2553 2554 scol = -1; 2555 2556 /* XXX should do extra checks to make sure things really are clean, 2557 rather than blindly setting the clean bit... */ 2558 2559 raidPtr->mod_counter++; 2560 2561 for (c = 0; c < raidPtr->numCol; c++) { 2562 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2563 raidread_component_label( 2564 raidPtr->Disks[c].dev, 2565 raidPtr->raid_cinfo[c].ci_vp, 2566 &clabel); 2567 /* make sure status is noted */ 2568 clabel.status = rf_ds_optimal; 2569 2570 /* bump the counter */ 2571 clabel.mod_counter = raidPtr->mod_counter; 2572 2573 /* note what unit we are configured as */ 2574 clabel.last_unit = raidPtr->raidid; 2575 2576 raidwrite_component_label( 2577 raidPtr->Disks[c].dev, 2578 raidPtr->raid_cinfo[c].ci_vp, 2579 &clabel); 2580 if (final == RF_FINAL_COMPONENT_UPDATE) { 2581 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2582 raidmarkclean( 2583 raidPtr->Disks[c].dev, 2584 raidPtr->raid_cinfo[c].ci_vp, 2585 raidPtr->mod_counter); 2586 } 2587 } 2588 } 2589 /* else we don't touch it.. */ 2590 } 2591 2592 for( c = 0; c < raidPtr->numSpare ; c++) { 2593 sparecol = raidPtr->numCol + c; 2594 /* Need to ensure that the reconstruct actually completed! */ 2595 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2596 /* 2597 2598 we claim this disk is "optimal" if it's 2599 rf_ds_used_spare, as that means it should be 2600 directly substitutable for the disk it replaced. 2601 We note that too... 2602 2603 */ 2604 2605 for(j=0;j<raidPtr->numCol;j++) { 2606 if (raidPtr->Disks[j].spareCol == sparecol) { 2607 scol = j; 2608 break; 2609 } 2610 } 2611 2612 /* XXX shouldn't *really* need this... */ 2613 raidread_component_label( 2614 raidPtr->Disks[sparecol].dev, 2615 raidPtr->raid_cinfo[sparecol].ci_vp, 2616 &clabel); 2617 /* make sure status is noted */ 2618 2619 raid_init_component_label(raidPtr, &clabel); 2620 2621 clabel.mod_counter = raidPtr->mod_counter; 2622 clabel.column = scol; 2623 clabel.status = rf_ds_optimal; 2624 clabel.last_unit = raidPtr->raidid; 2625 2626 raidwrite_component_label( 2627 raidPtr->Disks[sparecol].dev, 2628 raidPtr->raid_cinfo[sparecol].ci_vp, 2629 &clabel); 2630 if (final == RF_FINAL_COMPONENT_UPDATE) { 2631 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2632 raidmarkclean( raidPtr->Disks[sparecol].dev, 2633 raidPtr->raid_cinfo[sparecol].ci_vp, 2634 raidPtr->mod_counter); 2635 } 2636 } 2637 } 2638 } 2639 } 2640 2641 void 2642 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2643 { 2644 2645 if (vp != NULL) { 2646 if (auto_configured == 1) { 2647 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2648 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2649 vput(vp); 2650 2651 } else { 2652 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2653 } 2654 } 2655 } 2656 2657 2658 void 2659 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2660 { 2661 int r,c; 2662 struct vnode *vp; 2663 int acd; 2664 2665 2666 /* We take this opportunity to close the vnodes like we should.. */ 2667 2668 for (c = 0; c < raidPtr->numCol; c++) { 2669 vp = raidPtr->raid_cinfo[c].ci_vp; 2670 acd = raidPtr->Disks[c].auto_configured; 2671 rf_close_component(raidPtr, vp, acd); 2672 raidPtr->raid_cinfo[c].ci_vp = NULL; 2673 raidPtr->Disks[c].auto_configured = 0; 2674 } 2675 2676 for (r = 0; r < raidPtr->numSpare; r++) { 2677 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2678 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2679 rf_close_component(raidPtr, vp, acd); 2680 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2681 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2682 } 2683 } 2684 2685 2686 void 2687 rf_ReconThread(struct rf_recon_req *req) 2688 { 2689 int s; 2690 RF_Raid_t *raidPtr; 2691 2692 s = splbio(); 2693 raidPtr = (RF_Raid_t *) req->raidPtr; 2694 raidPtr->recon_in_progress = 1; 2695 2696 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2697 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2698 2699 RF_Free(req, sizeof(*req)); 2700 2701 raidPtr->recon_in_progress = 0; 2702 splx(s); 2703 2704 /* That's all... */ 2705 kthread_exit(0); /* does not return */ 2706 } 2707 2708 void 2709 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2710 { 2711 int retcode; 2712 int s; 2713 2714 raidPtr->parity_rewrite_stripes_done = 0; 2715 raidPtr->parity_rewrite_in_progress = 1; 2716 s = splbio(); 2717 retcode = rf_RewriteParity(raidPtr); 2718 splx(s); 2719 if (retcode) { 2720 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2721 } else { 2722 /* set the clean bit! If we shutdown correctly, 2723 the clean bit on each component label will get 2724 set */ 2725 raidPtr->parity_good = RF_RAID_CLEAN; 2726 } 2727 raidPtr->parity_rewrite_in_progress = 0; 2728 2729 /* Anyone waiting for us to stop? If so, inform them... */ 2730 if (raidPtr->waitShutdown) { 2731 wakeup(&raidPtr->parity_rewrite_in_progress); 2732 } 2733 2734 /* That's all... */ 2735 kthread_exit(0); /* does not return */ 2736 } 2737 2738 2739 void 2740 rf_CopybackThread(RF_Raid_t *raidPtr) 2741 { 2742 int s; 2743 2744 raidPtr->copyback_in_progress = 1; 2745 s = splbio(); 2746 rf_CopybackReconstructedData(raidPtr); 2747 splx(s); 2748 raidPtr->copyback_in_progress = 0; 2749 2750 /* That's all... */ 2751 kthread_exit(0); /* does not return */ 2752 } 2753 2754 2755 void 2756 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2757 { 2758 int s; 2759 RF_Raid_t *raidPtr; 2760 2761 s = splbio(); 2762 raidPtr = req->raidPtr; 2763 raidPtr->recon_in_progress = 1; 2764 rf_ReconstructInPlace(raidPtr, req->col); 2765 RF_Free(req, sizeof(*req)); 2766 raidPtr->recon_in_progress = 0; 2767 splx(s); 2768 2769 /* That's all... */ 2770 kthread_exit(0); /* does not return */ 2771 } 2772 2773 static RF_AutoConfig_t * 2774 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2775 const char *cname, RF_SectorCount_t size) 2776 { 2777 int good_one = 0; 2778 RF_ComponentLabel_t *clabel; 2779 RF_AutoConfig_t *ac; 2780 2781 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 2782 if (clabel == NULL) { 2783 oomem: 2784 while(ac_list) { 2785 ac = ac_list; 2786 if (ac->clabel) 2787 free(ac->clabel, M_RAIDFRAME); 2788 ac_list = ac_list->next; 2789 free(ac, M_RAIDFRAME); 2790 } 2791 printf("RAID auto config: out of memory!\n"); 2792 return NULL; /* XXX probably should panic? */ 2793 } 2794 2795 if (!raidread_component_label(dev, vp, clabel)) { 2796 /* Got the label. Does it look reasonable? */ 2797 if (rf_reasonable_label(clabel) && 2798 (clabel->partitionSize <= size)) { 2799 #ifdef DEBUG 2800 printf("Component on: %s: %llu\n", 2801 cname, (unsigned long long)size); 2802 rf_print_component_label(clabel); 2803 #endif 2804 /* if it's reasonable, add it, else ignore it. */ 2805 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 2806 M_NOWAIT); 2807 if (ac == NULL) { 2808 free(clabel, M_RAIDFRAME); 2809 goto oomem; 2810 } 2811 strlcpy(ac->devname, cname, sizeof(ac->devname)); 2812 ac->dev = dev; 2813 ac->vp = vp; 2814 ac->clabel = clabel; 2815 ac->next = ac_list; 2816 ac_list = ac; 2817 good_one = 1; 2818 } 2819 } 2820 if (!good_one) { 2821 /* cleanup */ 2822 free(clabel, M_RAIDFRAME); 2823 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2824 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2825 vput(vp); 2826 } 2827 return ac_list; 2828 } 2829 2830 RF_AutoConfig_t * 2831 rf_find_raid_components() 2832 { 2833 struct vnode *vp; 2834 struct disklabel label; 2835 struct device *dv; 2836 dev_t dev; 2837 int bmajor, bminor, wedge; 2838 int error; 2839 int i; 2840 RF_AutoConfig_t *ac_list; 2841 2842 2843 /* initialize the AutoConfig list */ 2844 ac_list = NULL; 2845 2846 /* we begin by trolling through *all* the devices on the system */ 2847 2848 for (dv = alldevs.tqh_first; dv != NULL; 2849 dv = dv->dv_list.tqe_next) { 2850 2851 /* we are only interested in disks... */ 2852 if (device_class(dv) != DV_DISK) 2853 continue; 2854 2855 /* we don't care about floppies... */ 2856 if (device_is_a(dv, "fd")) { 2857 continue; 2858 } 2859 2860 /* we don't care about CD's... */ 2861 if (device_is_a(dv, "cd")) { 2862 continue; 2863 } 2864 2865 /* we don't care about md's... */ 2866 if (device_is_a(dv, "md")) { 2867 continue; 2868 } 2869 2870 /* hdfd is the Atari/Hades floppy driver */ 2871 if (device_is_a(dv, "hdfd")) { 2872 continue; 2873 } 2874 2875 /* fdisa is the Atari/Milan floppy driver */ 2876 if (device_is_a(dv, "fdisa")) { 2877 continue; 2878 } 2879 2880 /* need to find the device_name_to_block_device_major stuff */ 2881 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 2882 2883 /* get a vnode for the raw partition of this disk */ 2884 2885 wedge = device_is_a(dv, "dk"); 2886 bminor = minor(device_unit(dv)); 2887 dev = wedge ? makedev(bmajor, bminor) : 2888 MAKEDISKDEV(bmajor, bminor, RAW_PART); 2889 if (bdevvp(dev, &vp)) 2890 panic("RAID can't alloc vnode"); 2891 2892 error = VOP_OPEN(vp, FREAD, NOCRED); 2893 2894 if (error) { 2895 /* "Who cares." Continue looking 2896 for something that exists*/ 2897 vput(vp); 2898 continue; 2899 } 2900 2901 if (wedge) { 2902 struct dkwedge_info dkw; 2903 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 2904 NOCRED); 2905 if (error) { 2906 printf("RAIDframe: can't get wedge info for " 2907 "dev %s (%d)\n", device_xname(dv), error); 2908 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2909 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2910 vput(vp); 2911 continue; 2912 } 2913 2914 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 2915 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2916 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2917 vput(vp); 2918 continue; 2919 } 2920 2921 ac_list = rf_get_component(ac_list, dev, vp, 2922 device_xname(dv), dkw.dkw_size); 2923 continue; 2924 } 2925 2926 /* Ok, the disk exists. Go get the disklabel. */ 2927 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 2928 if (error) { 2929 /* 2930 * XXX can't happen - open() would 2931 * have errored out (or faked up one) 2932 */ 2933 if (error != ENOTTY) 2934 printf("RAIDframe: can't get label for dev " 2935 "%s (%d)\n", device_xname(dv), error); 2936 } 2937 2938 /* don't need this any more. We'll allocate it again 2939 a little later if we really do... */ 2940 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2941 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2942 vput(vp); 2943 2944 if (error) 2945 continue; 2946 2947 for (i = 0; i < label.d_npartitions; i++) { 2948 char cname[sizeof(ac_list->devname)]; 2949 2950 /* We only support partitions marked as RAID */ 2951 if (label.d_partitions[i].p_fstype != FS_RAID) 2952 continue; 2953 2954 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 2955 if (bdevvp(dev, &vp)) 2956 panic("RAID can't alloc vnode"); 2957 2958 error = VOP_OPEN(vp, FREAD, NOCRED); 2959 if (error) { 2960 /* Whatever... */ 2961 vput(vp); 2962 continue; 2963 } 2964 snprintf(cname, sizeof(cname), "%s%c", 2965 device_xname(dv), 'a' + i); 2966 ac_list = rf_get_component(ac_list, dev, vp, cname, 2967 label.d_partitions[i].p_size); 2968 } 2969 } 2970 return ac_list; 2971 } 2972 2973 2974 static int 2975 rf_reasonable_label(RF_ComponentLabel_t *clabel) 2976 { 2977 2978 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2979 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2980 ((clabel->clean == RF_RAID_CLEAN) || 2981 (clabel->clean == RF_RAID_DIRTY)) && 2982 clabel->row >=0 && 2983 clabel->column >= 0 && 2984 clabel->num_rows > 0 && 2985 clabel->num_columns > 0 && 2986 clabel->row < clabel->num_rows && 2987 clabel->column < clabel->num_columns && 2988 clabel->blockSize > 0 && 2989 clabel->numBlocks > 0) { 2990 /* label looks reasonable enough... */ 2991 return(1); 2992 } 2993 return(0); 2994 } 2995 2996 2997 #ifdef DEBUG 2998 void 2999 rf_print_component_label(RF_ComponentLabel_t *clabel) 3000 { 3001 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3002 clabel->row, clabel->column, 3003 clabel->num_rows, clabel->num_columns); 3004 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3005 clabel->version, clabel->serial_number, 3006 clabel->mod_counter); 3007 printf(" Clean: %s Status: %d\n", 3008 clabel->clean ? "Yes" : "No", clabel->status ); 3009 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3010 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3011 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 3012 (char) clabel->parityConfig, clabel->blockSize, 3013 clabel->numBlocks); 3014 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 3015 printf(" Contains root partition: %s\n", 3016 clabel->root_partition ? "Yes" : "No" ); 3017 printf(" Last configured as: raid%d\n", clabel->last_unit ); 3018 #if 0 3019 printf(" Config order: %d\n", clabel->config_order); 3020 #endif 3021 3022 } 3023 #endif 3024 3025 RF_ConfigSet_t * 3026 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3027 { 3028 RF_AutoConfig_t *ac; 3029 RF_ConfigSet_t *config_sets; 3030 RF_ConfigSet_t *cset; 3031 RF_AutoConfig_t *ac_next; 3032 3033 3034 config_sets = NULL; 3035 3036 /* Go through the AutoConfig list, and figure out which components 3037 belong to what sets. */ 3038 ac = ac_list; 3039 while(ac!=NULL) { 3040 /* we're going to putz with ac->next, so save it here 3041 for use at the end of the loop */ 3042 ac_next = ac->next; 3043 3044 if (config_sets == NULL) { 3045 /* will need at least this one... */ 3046 config_sets = (RF_ConfigSet_t *) 3047 malloc(sizeof(RF_ConfigSet_t), 3048 M_RAIDFRAME, M_NOWAIT); 3049 if (config_sets == NULL) { 3050 panic("rf_create_auto_sets: No memory!"); 3051 } 3052 /* this one is easy :) */ 3053 config_sets->ac = ac; 3054 config_sets->next = NULL; 3055 config_sets->rootable = 0; 3056 ac->next = NULL; 3057 } else { 3058 /* which set does this component fit into? */ 3059 cset = config_sets; 3060 while(cset!=NULL) { 3061 if (rf_does_it_fit(cset, ac)) { 3062 /* looks like it matches... */ 3063 ac->next = cset->ac; 3064 cset->ac = ac; 3065 break; 3066 } 3067 cset = cset->next; 3068 } 3069 if (cset==NULL) { 3070 /* didn't find a match above... new set..*/ 3071 cset = (RF_ConfigSet_t *) 3072 malloc(sizeof(RF_ConfigSet_t), 3073 M_RAIDFRAME, M_NOWAIT); 3074 if (cset == NULL) { 3075 panic("rf_create_auto_sets: No memory!"); 3076 } 3077 cset->ac = ac; 3078 ac->next = NULL; 3079 cset->next = config_sets; 3080 cset->rootable = 0; 3081 config_sets = cset; 3082 } 3083 } 3084 ac = ac_next; 3085 } 3086 3087 3088 return(config_sets); 3089 } 3090 3091 static int 3092 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3093 { 3094 RF_ComponentLabel_t *clabel1, *clabel2; 3095 3096 /* If this one matches the *first* one in the set, that's good 3097 enough, since the other members of the set would have been 3098 through here too... */ 3099 /* note that we are not checking partitionSize here.. 3100 3101 Note that we are also not checking the mod_counters here. 3102 If everything else matches execpt the mod_counter, that's 3103 good enough for this test. We will deal with the mod_counters 3104 a little later in the autoconfiguration process. 3105 3106 (clabel1->mod_counter == clabel2->mod_counter) && 3107 3108 The reason we don't check for this is that failed disks 3109 will have lower modification counts. If those disks are 3110 not added to the set they used to belong to, then they will 3111 form their own set, which may result in 2 different sets, 3112 for example, competing to be configured at raid0, and 3113 perhaps competing to be the root filesystem set. If the 3114 wrong ones get configured, or both attempt to become /, 3115 weird behaviour and or serious lossage will occur. Thus we 3116 need to bring them into the fold here, and kick them out at 3117 a later point. 3118 3119 */ 3120 3121 clabel1 = cset->ac->clabel; 3122 clabel2 = ac->clabel; 3123 if ((clabel1->version == clabel2->version) && 3124 (clabel1->serial_number == clabel2->serial_number) && 3125 (clabel1->num_rows == clabel2->num_rows) && 3126 (clabel1->num_columns == clabel2->num_columns) && 3127 (clabel1->sectPerSU == clabel2->sectPerSU) && 3128 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3129 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3130 (clabel1->parityConfig == clabel2->parityConfig) && 3131 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3132 (clabel1->blockSize == clabel2->blockSize) && 3133 (clabel1->numBlocks == clabel2->numBlocks) && 3134 (clabel1->autoconfigure == clabel2->autoconfigure) && 3135 (clabel1->root_partition == clabel2->root_partition) && 3136 (clabel1->last_unit == clabel2->last_unit) && 3137 (clabel1->config_order == clabel2->config_order)) { 3138 /* if it get's here, it almost *has* to be a match */ 3139 } else { 3140 /* it's not consistent with somebody in the set.. 3141 punt */ 3142 return(0); 3143 } 3144 /* all was fine.. it must fit... */ 3145 return(1); 3146 } 3147 3148 int 3149 rf_have_enough_components(RF_ConfigSet_t *cset) 3150 { 3151 RF_AutoConfig_t *ac; 3152 RF_AutoConfig_t *auto_config; 3153 RF_ComponentLabel_t *clabel; 3154 int c; 3155 int num_cols; 3156 int num_missing; 3157 int mod_counter; 3158 int mod_counter_found; 3159 int even_pair_failed; 3160 char parity_type; 3161 3162 3163 /* check to see that we have enough 'live' components 3164 of this set. If so, we can configure it if necessary */ 3165 3166 num_cols = cset->ac->clabel->num_columns; 3167 parity_type = cset->ac->clabel->parityConfig; 3168 3169 /* XXX Check for duplicate components!?!?!? */ 3170 3171 /* Determine what the mod_counter is supposed to be for this set. */ 3172 3173 mod_counter_found = 0; 3174 mod_counter = 0; 3175 ac = cset->ac; 3176 while(ac!=NULL) { 3177 if (mod_counter_found==0) { 3178 mod_counter = ac->clabel->mod_counter; 3179 mod_counter_found = 1; 3180 } else { 3181 if (ac->clabel->mod_counter > mod_counter) { 3182 mod_counter = ac->clabel->mod_counter; 3183 } 3184 } 3185 ac = ac->next; 3186 } 3187 3188 num_missing = 0; 3189 auto_config = cset->ac; 3190 3191 even_pair_failed = 0; 3192 for(c=0; c<num_cols; c++) { 3193 ac = auto_config; 3194 while(ac!=NULL) { 3195 if ((ac->clabel->column == c) && 3196 (ac->clabel->mod_counter == mod_counter)) { 3197 /* it's this one... */ 3198 #ifdef DEBUG 3199 printf("Found: %s at %d\n", 3200 ac->devname,c); 3201 #endif 3202 break; 3203 } 3204 ac=ac->next; 3205 } 3206 if (ac==NULL) { 3207 /* Didn't find one here! */ 3208 /* special case for RAID 1, especially 3209 where there are more than 2 3210 components (where RAIDframe treats 3211 things a little differently :( ) */ 3212 if (parity_type == '1') { 3213 if (c%2 == 0) { /* even component */ 3214 even_pair_failed = 1; 3215 } else { /* odd component. If 3216 we're failed, and 3217 so is the even 3218 component, it's 3219 "Good Night, Charlie" */ 3220 if (even_pair_failed == 1) { 3221 return(0); 3222 } 3223 } 3224 } else { 3225 /* normal accounting */ 3226 num_missing++; 3227 } 3228 } 3229 if ((parity_type == '1') && (c%2 == 1)) { 3230 /* Just did an even component, and we didn't 3231 bail.. reset the even_pair_failed flag, 3232 and go on to the next component.... */ 3233 even_pair_failed = 0; 3234 } 3235 } 3236 3237 clabel = cset->ac->clabel; 3238 3239 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3240 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3241 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3242 /* XXX this needs to be made *much* more general */ 3243 /* Too many failures */ 3244 return(0); 3245 } 3246 /* otherwise, all is well, and we've got enough to take a kick 3247 at autoconfiguring this set */ 3248 return(1); 3249 } 3250 3251 void 3252 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3253 RF_Raid_t *raidPtr) 3254 { 3255 RF_ComponentLabel_t *clabel; 3256 int i; 3257 3258 clabel = ac->clabel; 3259 3260 /* 1. Fill in the common stuff */ 3261 config->numRow = clabel->num_rows = 1; 3262 config->numCol = clabel->num_columns; 3263 config->numSpare = 0; /* XXX should this be set here? */ 3264 config->sectPerSU = clabel->sectPerSU; 3265 config->SUsPerPU = clabel->SUsPerPU; 3266 config->SUsPerRU = clabel->SUsPerRU; 3267 config->parityConfig = clabel->parityConfig; 3268 /* XXX... */ 3269 strcpy(config->diskQueueType,"fifo"); 3270 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3271 config->layoutSpecificSize = 0; /* XXX ?? */ 3272 3273 while(ac!=NULL) { 3274 /* row/col values will be in range due to the checks 3275 in reasonable_label() */ 3276 strcpy(config->devnames[0][ac->clabel->column], 3277 ac->devname); 3278 ac = ac->next; 3279 } 3280 3281 for(i=0;i<RF_MAXDBGV;i++) { 3282 config->debugVars[i][0] = 0; 3283 } 3284 } 3285 3286 int 3287 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3288 { 3289 RF_ComponentLabel_t clabel; 3290 struct vnode *vp; 3291 dev_t dev; 3292 int column; 3293 int sparecol; 3294 3295 raidPtr->autoconfigure = new_value; 3296 3297 for(column=0; column<raidPtr->numCol; column++) { 3298 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3299 dev = raidPtr->Disks[column].dev; 3300 vp = raidPtr->raid_cinfo[column].ci_vp; 3301 raidread_component_label(dev, vp, &clabel); 3302 clabel.autoconfigure = new_value; 3303 raidwrite_component_label(dev, vp, &clabel); 3304 } 3305 } 3306 for(column = 0; column < raidPtr->numSpare ; column++) { 3307 sparecol = raidPtr->numCol + column; 3308 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3309 dev = raidPtr->Disks[sparecol].dev; 3310 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3311 raidread_component_label(dev, vp, &clabel); 3312 clabel.autoconfigure = new_value; 3313 raidwrite_component_label(dev, vp, &clabel); 3314 } 3315 } 3316 return(new_value); 3317 } 3318 3319 int 3320 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3321 { 3322 RF_ComponentLabel_t clabel; 3323 struct vnode *vp; 3324 dev_t dev; 3325 int column; 3326 int sparecol; 3327 3328 raidPtr->root_partition = new_value; 3329 for(column=0; column<raidPtr->numCol; column++) { 3330 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3331 dev = raidPtr->Disks[column].dev; 3332 vp = raidPtr->raid_cinfo[column].ci_vp; 3333 raidread_component_label(dev, vp, &clabel); 3334 clabel.root_partition = new_value; 3335 raidwrite_component_label(dev, vp, &clabel); 3336 } 3337 } 3338 for(column = 0; column < raidPtr->numSpare ; column++) { 3339 sparecol = raidPtr->numCol + column; 3340 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3341 dev = raidPtr->Disks[sparecol].dev; 3342 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3343 raidread_component_label(dev, vp, &clabel); 3344 clabel.root_partition = new_value; 3345 raidwrite_component_label(dev, vp, &clabel); 3346 } 3347 } 3348 return(new_value); 3349 } 3350 3351 void 3352 rf_release_all_vps(RF_ConfigSet_t *cset) 3353 { 3354 RF_AutoConfig_t *ac; 3355 3356 ac = cset->ac; 3357 while(ac!=NULL) { 3358 /* Close the vp, and give it back */ 3359 if (ac->vp) { 3360 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3361 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3362 vput(ac->vp); 3363 ac->vp = NULL; 3364 } 3365 ac = ac->next; 3366 } 3367 } 3368 3369 3370 void 3371 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3372 { 3373 RF_AutoConfig_t *ac; 3374 RF_AutoConfig_t *next_ac; 3375 3376 ac = cset->ac; 3377 while(ac!=NULL) { 3378 next_ac = ac->next; 3379 /* nuke the label */ 3380 free(ac->clabel, M_RAIDFRAME); 3381 /* cleanup the config structure */ 3382 free(ac, M_RAIDFRAME); 3383 /* "next.." */ 3384 ac = next_ac; 3385 } 3386 /* and, finally, nuke the config set */ 3387 free(cset, M_RAIDFRAME); 3388 } 3389 3390 3391 void 3392 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3393 { 3394 /* current version number */ 3395 clabel->version = RF_COMPONENT_LABEL_VERSION; 3396 clabel->serial_number = raidPtr->serial_number; 3397 clabel->mod_counter = raidPtr->mod_counter; 3398 clabel->num_rows = 1; 3399 clabel->num_columns = raidPtr->numCol; 3400 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3401 clabel->status = rf_ds_optimal; /* "It's good!" */ 3402 3403 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3404 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3405 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3406 3407 clabel->blockSize = raidPtr->bytesPerSector; 3408 clabel->numBlocks = raidPtr->sectorsPerDisk; 3409 3410 /* XXX not portable */ 3411 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3412 clabel->maxOutstanding = raidPtr->maxOutstanding; 3413 clabel->autoconfigure = raidPtr->autoconfigure; 3414 clabel->root_partition = raidPtr->root_partition; 3415 clabel->last_unit = raidPtr->raidid; 3416 clabel->config_order = raidPtr->config_order; 3417 } 3418 3419 int 3420 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3421 { 3422 RF_Raid_t *raidPtr; 3423 RF_Config_t *config; 3424 int raidID; 3425 int retcode; 3426 3427 #ifdef DEBUG 3428 printf("RAID autoconfigure\n"); 3429 #endif 3430 3431 retcode = 0; 3432 *unit = -1; 3433 3434 /* 1. Create a config structure */ 3435 3436 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3437 M_RAIDFRAME, 3438 M_NOWAIT); 3439 if (config==NULL) { 3440 printf("Out of mem!?!?\n"); 3441 /* XXX do something more intelligent here. */ 3442 return(1); 3443 } 3444 3445 memset(config, 0, sizeof(RF_Config_t)); 3446 3447 /* 3448 2. Figure out what RAID ID this one is supposed to live at 3449 See if we can get the same RAID dev that it was configured 3450 on last time.. 3451 */ 3452 3453 raidID = cset->ac->clabel->last_unit; 3454 if ((raidID < 0) || (raidID >= numraid)) { 3455 /* let's not wander off into lala land. */ 3456 raidID = numraid - 1; 3457 } 3458 if (raidPtrs[raidID]->valid != 0) { 3459 3460 /* 3461 Nope... Go looking for an alternative... 3462 Start high so we don't immediately use raid0 if that's 3463 not taken. 3464 */ 3465 3466 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3467 if (raidPtrs[raidID]->valid == 0) { 3468 /* can use this one! */ 3469 break; 3470 } 3471 } 3472 } 3473 3474 if (raidID < 0) { 3475 /* punt... */ 3476 printf("Unable to auto configure this set!\n"); 3477 printf("(Out of RAID devs!)\n"); 3478 free(config, M_RAIDFRAME); 3479 return(1); 3480 } 3481 3482 #ifdef DEBUG 3483 printf("Configuring raid%d:\n",raidID); 3484 #endif 3485 3486 raidPtr = raidPtrs[raidID]; 3487 3488 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3489 raidPtr->raidid = raidID; 3490 raidPtr->openings = RAIDOUTSTANDING; 3491 3492 /* 3. Build the configuration structure */ 3493 rf_create_configuration(cset->ac, config, raidPtr); 3494 3495 /* 4. Do the configuration */ 3496 retcode = rf_Configure(raidPtr, config, cset->ac); 3497 3498 if (retcode == 0) { 3499 3500 raidinit(raidPtrs[raidID]); 3501 3502 rf_markalldirty(raidPtrs[raidID]); 3503 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3504 if (cset->ac->clabel->root_partition==1) { 3505 /* everything configured just fine. Make a note 3506 that this set is eligible to be root. */ 3507 cset->rootable = 1; 3508 /* XXX do this here? */ 3509 raidPtrs[raidID]->root_partition = 1; 3510 } 3511 } 3512 3513 /* 5. Cleanup */ 3514 free(config, M_RAIDFRAME); 3515 3516 *unit = raidID; 3517 return(retcode); 3518 } 3519 3520 void 3521 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3522 { 3523 struct buf *bp; 3524 3525 bp = (struct buf *)desc->bp; 3526 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3527 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3528 } 3529 3530 void 3531 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3532 size_t xmin, size_t xmax) 3533 { 3534 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3535 pool_sethiwat(p, xmax); 3536 pool_prime(p, xmin); 3537 pool_setlowat(p, xmin); 3538 } 3539 3540 /* 3541 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see 3542 * if there is IO pending and if that IO could possibly be done for a 3543 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3544 * otherwise. 3545 * 3546 */ 3547 3548 int 3549 rf_buf_queue_check(int raidid) 3550 { 3551 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) && 3552 raidPtrs[raidid]->openings > 0) { 3553 /* there is work to do */ 3554 return 0; 3555 } 3556 /* default is nothing to do */ 3557 return 1; 3558 } 3559 3560 int 3561 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr) 3562 { 3563 struct partinfo dpart; 3564 struct dkwedge_info dkw; 3565 int error; 3566 3567 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred); 3568 if (error == 0) { 3569 diskPtr->blockSize = dpart.disklab->d_secsize; 3570 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 3571 diskPtr->partitionSize = dpart.part->p_size; 3572 return 0; 3573 } 3574 3575 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred); 3576 if (error == 0) { 3577 diskPtr->blockSize = 512; /* XXX */ 3578 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors; 3579 diskPtr->partitionSize = dkw.dkw_size; 3580 return 0; 3581 } 3582 return error; 3583 } 3584 3585 static int 3586 raid_match(struct device *self, struct cfdata *cfdata, 3587 void *aux) 3588 { 3589 return 1; 3590 } 3591 3592 static void 3593 raid_attach(struct device *parent, struct device *self, 3594 void *aux) 3595 { 3596 3597 } 3598 3599 3600 static int 3601 raid_detach(struct device *self, int flags) 3602 { 3603 struct raid_softc *rs = (struct raid_softc *)self; 3604 3605 if (rs->sc_flags & RAIDF_INITED) 3606 return EBUSY; 3607 3608 return 0; 3609 } 3610 3611 static void 3612 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr) 3613 { 3614 prop_dictionary_t disk_info, odisk_info, geom; 3615 disk_info = prop_dictionary_create(); 3616 geom = prop_dictionary_create(); 3617 prop_dictionary_set_uint64(geom, "sectors-per-unit", 3618 raidPtr->totalSectors); 3619 prop_dictionary_set_uint32(geom, "sector-size", 3620 raidPtr->bytesPerSector); 3621 3622 prop_dictionary_set_uint16(geom, "sectors-per-track", 3623 raidPtr->Layout.dataSectorsPerStripe); 3624 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 3625 4 * raidPtr->numCol); 3626 3627 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 3628 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe * 3629 (4 * raidPtr->numCol))); 3630 3631 prop_dictionary_set(disk_info, "geometry", geom); 3632 prop_object_release(geom); 3633 prop_dictionary_set(device_properties(rs->sc_dev), 3634 "disk-info", disk_info); 3635 odisk_info = rs->sc_dkdev.dk_info; 3636 rs->sc_dkdev.dk_info = disk_info; 3637 if (odisk_info) 3638 prop_object_release(odisk_info); 3639 } 3640