1 /* $NetBSD: rf_netbsdkintf.c,v 1.266 2009/07/23 21:58:06 dyoung Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * Copyright (c) 1990, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * This code is derived from software contributed to Berkeley by 36 * the Systems Programming Group of the University of Utah Computer 37 * Science Department. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 64 * 65 * @(#)cd.c 8.2 (Berkeley) 11/16/93 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * 71 * This code is derived from software contributed to Berkeley by 72 * the Systems Programming Group of the University of Utah Computer 73 * Science Department. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 3. All advertising materials mentioning features or use of this software 84 * must display the following acknowledgement: 85 * This product includes software developed by the University of 86 * California, Berkeley and its contributors. 87 * 4. Neither the name of the University nor the names of its contributors 88 * may be used to endorse or promote products derived from this software 89 * without specific prior written permission. 90 * 91 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 92 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 94 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 95 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 96 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 97 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 98 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 99 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 100 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 101 * SUCH DAMAGE. 102 * 103 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 104 * 105 * @(#)cd.c 8.2 (Berkeley) 11/16/93 106 */ 107 108 /* 109 * Copyright (c) 1995 Carnegie-Mellon University. 110 * All rights reserved. 111 * 112 * Authors: Mark Holland, Jim Zelenka 113 * 114 * Permission to use, copy, modify and distribute this software and 115 * its documentation is hereby granted, provided that both the copyright 116 * notice and this permission notice appear in all copies of the 117 * software, derivative works or modified versions, and any portions 118 * thereof, and that both notices appear in supporting documentation. 119 * 120 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 121 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 122 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 123 * 124 * Carnegie Mellon requests users of this software to return to 125 * 126 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 127 * School of Computer Science 128 * Carnegie Mellon University 129 * Pittsburgh PA 15213-3890 130 * 131 * any improvements or extensions that they make and grant Carnegie the 132 * rights to redistribute these changes. 133 */ 134 135 /*********************************************************** 136 * 137 * rf_kintf.c -- the kernel interface routines for RAIDframe 138 * 139 ***********************************************************/ 140 141 #include <sys/cdefs.h> 142 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.266 2009/07/23 21:58:06 dyoung Exp $"); 143 144 #ifdef _KERNEL_OPT 145 #include "opt_compat_netbsd.h" 146 #include "opt_raid_autoconfig.h" 147 #include "raid.h" 148 #endif 149 150 #include <sys/param.h> 151 #include <sys/errno.h> 152 #include <sys/pool.h> 153 #include <sys/proc.h> 154 #include <sys/queue.h> 155 #include <sys/disk.h> 156 #include <sys/device.h> 157 #include <sys/stat.h> 158 #include <sys/ioctl.h> 159 #include <sys/fcntl.h> 160 #include <sys/systm.h> 161 #include <sys/vnode.h> 162 #include <sys/disklabel.h> 163 #include <sys/conf.h> 164 #include <sys/buf.h> 165 #include <sys/bufq.h> 166 #include <sys/user.h> 167 #include <sys/reboot.h> 168 #include <sys/kauth.h> 169 170 #include <prop/proplib.h> 171 172 #include <dev/raidframe/raidframevar.h> 173 #include <dev/raidframe/raidframeio.h> 174 175 #include "rf_raid.h" 176 #include "rf_copyback.h" 177 #include "rf_dag.h" 178 #include "rf_dagflags.h" 179 #include "rf_desc.h" 180 #include "rf_diskqueue.h" 181 #include "rf_etimer.h" 182 #include "rf_general.h" 183 #include "rf_kintf.h" 184 #include "rf_options.h" 185 #include "rf_driver.h" 186 #include "rf_parityscan.h" 187 #include "rf_threadstuff.h" 188 189 #ifdef COMPAT_50 190 #include "rf_compat50.h" 191 #endif 192 193 #ifdef DEBUG 194 int rf_kdebug_level = 0; 195 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 196 #else /* DEBUG */ 197 #define db1_printf(a) { } 198 #endif /* DEBUG */ 199 200 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 201 202 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 203 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 204 205 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 206 * spare table */ 207 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 208 * installation process */ 209 #endif 210 211 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 212 213 /* prototypes */ 214 static void KernelWakeupFunc(struct buf *); 215 static void InitBP(struct buf *, struct vnode *, unsigned, 216 dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *), 217 void *, int, struct proc *); 218 static void raidinit(RF_Raid_t *); 219 220 void raidattach(int); 221 static int raid_match(device_t, cfdata_t, void *); 222 static void raid_attach(device_t, device_t, void *); 223 static int raid_detach(device_t, int); 224 225 dev_type_open(raidopen); 226 dev_type_close(raidclose); 227 dev_type_read(raidread); 228 dev_type_write(raidwrite); 229 dev_type_ioctl(raidioctl); 230 dev_type_strategy(raidstrategy); 231 dev_type_dump(raiddump); 232 dev_type_size(raidsize); 233 234 const struct bdevsw raid_bdevsw = { 235 raidopen, raidclose, raidstrategy, raidioctl, 236 raiddump, raidsize, D_DISK 237 }; 238 239 const struct cdevsw raid_cdevsw = { 240 raidopen, raidclose, raidread, raidwrite, raidioctl, 241 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 242 }; 243 244 static struct dkdriver rf_dkdriver = { raidstrategy, minphys }; 245 246 /* XXX Not sure if the following should be replacing the raidPtrs above, 247 or if it should be used in conjunction with that... 248 */ 249 250 struct raid_softc { 251 device_t sc_dev; 252 int sc_flags; /* flags */ 253 int sc_cflags; /* configuration flags */ 254 uint64_t sc_size; /* size of the raid device */ 255 char sc_xname[20]; /* XXX external name */ 256 struct disk sc_dkdev; /* generic disk device info */ 257 struct bufq_state *buf_queue; /* used for the device queue */ 258 }; 259 /* sc_flags */ 260 #define RAIDF_INITED 0x01 /* unit has been initialized */ 261 #define RAIDF_WLABEL 0x02 /* label area is writable */ 262 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 263 #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */ 264 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 265 #define RAIDF_LOCKED 0x80 /* unit is locked */ 266 267 #define raidunit(x) DISKUNIT(x) 268 int numraid = 0; 269 270 extern struct cfdriver raid_cd; 271 CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc), 272 raid_match, raid_attach, raid_detach, NULL, NULL, NULL, 273 DVF_DETACH_SHUTDOWN); 274 275 /* 276 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 277 * Be aware that large numbers can allow the driver to consume a lot of 278 * kernel memory, especially on writes, and in degraded mode reads. 279 * 280 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 281 * a single 64K write will typically require 64K for the old data, 282 * 64K for the old parity, and 64K for the new parity, for a total 283 * of 192K (if the parity buffer is not re-used immediately). 284 * Even it if is used immediately, that's still 128K, which when multiplied 285 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 286 * 287 * Now in degraded mode, for example, a 64K read on the above setup may 288 * require data reconstruction, which will require *all* of the 4 remaining 289 * disks to participate -- 4 * 32K/disk == 128K again. 290 */ 291 292 #ifndef RAIDOUTSTANDING 293 #define RAIDOUTSTANDING 6 294 #endif 295 296 #define RAIDLABELDEV(dev) \ 297 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 298 299 /* declared here, and made public, for the benefit of KVM stuff.. */ 300 struct raid_softc *raid_softc; 301 302 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 303 struct disklabel *); 304 static void raidgetdisklabel(dev_t); 305 static void raidmakedisklabel(struct raid_softc *); 306 307 static int raidlock(struct raid_softc *); 308 static void raidunlock(struct raid_softc *); 309 310 static int raid_detach_unlocked(struct raid_softc *); 311 312 static void rf_markalldirty(RF_Raid_t *); 313 static void rf_set_properties(struct raid_softc *, RF_Raid_t *); 314 315 void rf_ReconThread(struct rf_recon_req *); 316 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 317 void rf_CopybackThread(RF_Raid_t *raidPtr); 318 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 319 int rf_autoconfig(device_t); 320 void rf_buildroothack(RF_ConfigSet_t *); 321 322 RF_AutoConfig_t *rf_find_raid_components(void); 323 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 324 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 325 static int rf_reasonable_label(RF_ComponentLabel_t *); 326 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 327 int rf_set_autoconfig(RF_Raid_t *, int); 328 int rf_set_rootpartition(RF_Raid_t *, int); 329 void rf_release_all_vps(RF_ConfigSet_t *); 330 void rf_cleanup_config_set(RF_ConfigSet_t *); 331 int rf_have_enough_components(RF_ConfigSet_t *); 332 int rf_auto_config_set(RF_ConfigSet_t *, int *); 333 static int rf_sync_component_caches(RF_Raid_t *raidPtr); 334 335 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 336 allow autoconfig to take place. 337 Note that this is overridden by having 338 RAID_AUTOCONFIG as an option in the 339 kernel config file. */ 340 341 struct RF_Pools_s rf_pools; 342 343 void 344 raidattach(int num) 345 { 346 int raidID; 347 int i, rc; 348 349 aprint_debug("raidattach: Asked for %d units\n", num); 350 351 if (num <= 0) { 352 #ifdef DIAGNOSTIC 353 panic("raidattach: count <= 0"); 354 #endif 355 return; 356 } 357 /* This is where all the initialization stuff gets done. */ 358 359 numraid = num; 360 361 /* Make some space for requested number of units... */ 362 363 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 364 if (raidPtrs == NULL) { 365 panic("raidPtrs is NULL!!"); 366 } 367 368 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 369 rf_mutex_init(&rf_sparet_wait_mutex); 370 371 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 372 #endif 373 374 for (i = 0; i < num; i++) 375 raidPtrs[i] = NULL; 376 rc = rf_BootRaidframe(); 377 if (rc == 0) 378 aprint_normal("Kernelized RAIDframe activated\n"); 379 else 380 panic("Serious error booting RAID!!"); 381 382 /* put together some datastructures like the CCD device does.. This 383 * lets us lock the device and what-not when it gets opened. */ 384 385 raid_softc = (struct raid_softc *) 386 malloc(num * sizeof(struct raid_softc), 387 M_RAIDFRAME, M_NOWAIT); 388 if (raid_softc == NULL) { 389 aprint_error("WARNING: no memory for RAIDframe driver\n"); 390 return; 391 } 392 393 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 394 395 for (raidID = 0; raidID < num; raidID++) { 396 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0); 397 398 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 399 (RF_Raid_t *)); 400 if (raidPtrs[raidID] == NULL) { 401 aprint_error("WARNING: raidPtrs[%d] is NULL\n", raidID); 402 numraid = raidID; 403 return; 404 } 405 } 406 407 if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) { 408 aprint_error("raidattach: config_cfattach_attach failed?\n"); 409 } 410 411 #ifdef RAID_AUTOCONFIG 412 raidautoconfig = 1; 413 #endif 414 415 /* 416 * Register a finalizer which will be used to auto-config RAID 417 * sets once all real hardware devices have been found. 418 */ 419 if (config_finalize_register(NULL, rf_autoconfig) != 0) 420 aprint_error("WARNING: unable to register RAIDframe finalizer\n"); 421 } 422 423 int 424 rf_autoconfig(device_t self) 425 { 426 RF_AutoConfig_t *ac_list; 427 RF_ConfigSet_t *config_sets; 428 429 if (raidautoconfig == 0) 430 return (0); 431 432 /* XXX This code can only be run once. */ 433 raidautoconfig = 0; 434 435 /* 1. locate all RAID components on the system */ 436 aprint_debug("Searching for RAID components...\n"); 437 ac_list = rf_find_raid_components(); 438 439 /* 2. Sort them into their respective sets. */ 440 config_sets = rf_create_auto_sets(ac_list); 441 442 /* 443 * 3. Evaluate each set andconfigure the valid ones. 444 * This gets done in rf_buildroothack(). 445 */ 446 rf_buildroothack(config_sets); 447 448 return 1; 449 } 450 451 void 452 rf_buildroothack(RF_ConfigSet_t *config_sets) 453 { 454 RF_ConfigSet_t *cset; 455 RF_ConfigSet_t *next_cset; 456 int retcode; 457 int raidID; 458 int rootID; 459 int col; 460 int num_root; 461 char *devname; 462 463 rootID = 0; 464 num_root = 0; 465 cset = config_sets; 466 while(cset != NULL ) { 467 next_cset = cset->next; 468 if (rf_have_enough_components(cset) && 469 cset->ac->clabel->autoconfigure==1) { 470 retcode = rf_auto_config_set(cset,&raidID); 471 if (!retcode) { 472 aprint_debug("raid%d: configured ok\n", raidID); 473 if (cset->rootable) { 474 rootID = raidID; 475 num_root++; 476 } 477 } else { 478 /* The autoconfig didn't work :( */ 479 aprint_debug("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 480 rf_release_all_vps(cset); 481 } 482 } else { 483 /* we're not autoconfiguring this set... 484 release the associated resources */ 485 rf_release_all_vps(cset); 486 } 487 /* cleanup */ 488 rf_cleanup_config_set(cset); 489 cset = next_cset; 490 } 491 492 /* if the user has specified what the root device should be 493 then we don't touch booted_device or boothowto... */ 494 495 if (rootspec != NULL) 496 return; 497 498 /* we found something bootable... */ 499 500 if (num_root == 1) { 501 booted_device = raid_softc[rootID].sc_dev; 502 } else if (num_root > 1) { 503 504 /* 505 * Maybe the MD code can help. If it cannot, then 506 * setroot() will discover that we have no 507 * booted_device and will ask the user if nothing was 508 * hardwired in the kernel config file 509 */ 510 511 if (booted_device == NULL) 512 cpu_rootconf(); 513 if (booted_device == NULL) 514 return; 515 516 num_root = 0; 517 for (raidID = 0; raidID < numraid; raidID++) { 518 if (raidPtrs[raidID]->valid == 0) 519 continue; 520 521 if (raidPtrs[raidID]->root_partition == 0) 522 continue; 523 524 for (col = 0; col < raidPtrs[raidID]->numCol; col++) { 525 devname = raidPtrs[raidID]->Disks[col].devname; 526 devname += sizeof("/dev/") - 1; 527 if (strncmp(devname, device_xname(booted_device), 528 strlen(device_xname(booted_device))) != 0) 529 continue; 530 aprint_debug("raid%d includes boot device %s\n", 531 raidID, devname); 532 num_root++; 533 rootID = raidID; 534 } 535 } 536 537 if (num_root == 1) { 538 booted_device = raid_softc[rootID].sc_dev; 539 } else { 540 /* we can't guess.. require the user to answer... */ 541 boothowto |= RB_ASKNAME; 542 } 543 } 544 } 545 546 547 int 548 raidsize(dev_t dev) 549 { 550 struct raid_softc *rs; 551 struct disklabel *lp; 552 int part, unit, omask, size; 553 554 unit = raidunit(dev); 555 if (unit >= numraid) 556 return (-1); 557 rs = &raid_softc[unit]; 558 559 if ((rs->sc_flags & RAIDF_INITED) == 0) 560 return (-1); 561 562 part = DISKPART(dev); 563 omask = rs->sc_dkdev.dk_openmask & (1 << part); 564 lp = rs->sc_dkdev.dk_label; 565 566 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 567 return (-1); 568 569 if (lp->d_partitions[part].p_fstype != FS_SWAP) 570 size = -1; 571 else 572 size = lp->d_partitions[part].p_size * 573 (lp->d_secsize / DEV_BSIZE); 574 575 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 576 return (-1); 577 578 return (size); 579 580 } 581 582 int 583 raiddump(dev_t dev, daddr_t blkno, void *va, size_t size) 584 { 585 int unit = raidunit(dev); 586 struct raid_softc *rs; 587 const struct bdevsw *bdev; 588 struct disklabel *lp; 589 RF_Raid_t *raidPtr; 590 daddr_t offset; 591 int part, c, sparecol, j, scol, dumpto; 592 int error = 0; 593 594 if (unit >= numraid) 595 return (ENXIO); 596 597 rs = &raid_softc[unit]; 598 raidPtr = raidPtrs[unit]; 599 600 if ((rs->sc_flags & RAIDF_INITED) == 0) 601 return ENXIO; 602 603 /* we only support dumping to RAID 1 sets */ 604 if (raidPtr->Layout.numDataCol != 1 || 605 raidPtr->Layout.numParityCol != 1) 606 return EINVAL; 607 608 609 if ((error = raidlock(rs)) != 0) 610 return error; 611 612 if (size % DEV_BSIZE != 0) { 613 error = EINVAL; 614 goto out; 615 } 616 617 if (blkno + size / DEV_BSIZE > rs->sc_size) { 618 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 619 "sc->sc_size (%" PRIu64 ")\n", __func__, blkno, 620 size / DEV_BSIZE, rs->sc_size); 621 error = EINVAL; 622 goto out; 623 } 624 625 part = DISKPART(dev); 626 lp = rs->sc_dkdev.dk_label; 627 offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS; 628 629 /* figure out what device is alive.. */ 630 631 /* 632 Look for a component to dump to. The preference for the 633 component to dump to is as follows: 634 1) the master 635 2) a used_spare of the master 636 3) the slave 637 4) a used_spare of the slave 638 */ 639 640 dumpto = -1; 641 for (c = 0; c < raidPtr->numCol; c++) { 642 if (raidPtr->Disks[c].status == rf_ds_optimal) { 643 /* this might be the one */ 644 dumpto = c; 645 break; 646 } 647 } 648 649 /* 650 At this point we have possibly selected a live master or a 651 live slave. We now check to see if there is a spared 652 master (or a spared slave), if we didn't find a live master 653 or a live slave. 654 */ 655 656 for (c = 0; c < raidPtr->numSpare; c++) { 657 sparecol = raidPtr->numCol + c; 658 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 659 /* How about this one? */ 660 scol = -1; 661 for(j=0;j<raidPtr->numCol;j++) { 662 if (raidPtr->Disks[j].spareCol == sparecol) { 663 scol = j; 664 break; 665 } 666 } 667 if (scol == 0) { 668 /* 669 We must have found a spared master! 670 We'll take that over anything else 671 found so far. (We couldn't have 672 found a real master before, since 673 this is a used spare, and it's 674 saying that it's replacing the 675 master.) On reboot (with 676 autoconfiguration turned on) 677 sparecol will become the 1st 678 component (component0) of this set. 679 */ 680 dumpto = sparecol; 681 break; 682 } else if (scol != -1) { 683 /* 684 Must be a spared slave. We'll dump 685 to that if we havn't found anything 686 else so far. 687 */ 688 if (dumpto == -1) 689 dumpto = sparecol; 690 } 691 } 692 } 693 694 if (dumpto == -1) { 695 /* we couldn't find any live components to dump to!?!? 696 */ 697 error = EINVAL; 698 goto out; 699 } 700 701 bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev); 702 703 /* 704 Note that blkno is relative to this particular partition. 705 By adding the offset of this partition in the RAID 706 set, and also adding RF_PROTECTED_SECTORS, we get a 707 value that is relative to the partition used for the 708 underlying component. 709 */ 710 711 error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev, 712 blkno + offset, va, size); 713 714 out: 715 raidunlock(rs); 716 717 return error; 718 } 719 /* ARGSUSED */ 720 int 721 raidopen(dev_t dev, int flags, int fmt, 722 struct lwp *l) 723 { 724 int unit = raidunit(dev); 725 struct raid_softc *rs; 726 struct disklabel *lp; 727 int part, pmask; 728 int error = 0; 729 730 if (unit >= numraid) 731 return (ENXIO); 732 rs = &raid_softc[unit]; 733 734 if ((error = raidlock(rs)) != 0) 735 return (error); 736 737 if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) { 738 error = EBUSY; 739 goto bad; 740 } 741 742 lp = rs->sc_dkdev.dk_label; 743 744 part = DISKPART(dev); 745 746 /* 747 * If there are wedges, and this is not RAW_PART, then we 748 * need to fail. 749 */ 750 if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 751 error = EBUSY; 752 goto bad; 753 } 754 pmask = (1 << part); 755 756 if ((rs->sc_flags & RAIDF_INITED) && 757 (rs->sc_dkdev.dk_openmask == 0)) 758 raidgetdisklabel(dev); 759 760 /* make sure that this partition exists */ 761 762 if (part != RAW_PART) { 763 if (((rs->sc_flags & RAIDF_INITED) == 0) || 764 ((part >= lp->d_npartitions) || 765 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 766 error = ENXIO; 767 goto bad; 768 } 769 } 770 /* Prevent this unit from being unconfigured while open. */ 771 switch (fmt) { 772 case S_IFCHR: 773 rs->sc_dkdev.dk_copenmask |= pmask; 774 break; 775 776 case S_IFBLK: 777 rs->sc_dkdev.dk_bopenmask |= pmask; 778 break; 779 } 780 781 if ((rs->sc_dkdev.dk_openmask == 0) && 782 ((rs->sc_flags & RAIDF_INITED) != 0)) { 783 /* First one... mark things as dirty... Note that we *MUST* 784 have done a configure before this. I DO NOT WANT TO BE 785 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 786 THAT THEY BELONG TOGETHER!!!!! */ 787 /* XXX should check to see if we're only open for reading 788 here... If so, we needn't do this, but then need some 789 other way of keeping track of what's happened.. */ 790 791 rf_markalldirty( raidPtrs[unit] ); 792 } 793 794 795 rs->sc_dkdev.dk_openmask = 796 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 797 798 bad: 799 raidunlock(rs); 800 801 return (error); 802 803 804 } 805 /* ARGSUSED */ 806 int 807 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 808 { 809 int unit = raidunit(dev); 810 struct raid_softc *rs; 811 int error = 0; 812 int part; 813 814 if (unit >= numraid) 815 return (ENXIO); 816 rs = &raid_softc[unit]; 817 818 if ((error = raidlock(rs)) != 0) 819 return (error); 820 821 part = DISKPART(dev); 822 823 /* ...that much closer to allowing unconfiguration... */ 824 switch (fmt) { 825 case S_IFCHR: 826 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 827 break; 828 829 case S_IFBLK: 830 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 831 break; 832 } 833 rs->sc_dkdev.dk_openmask = 834 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 835 836 if ((rs->sc_dkdev.dk_openmask == 0) && 837 ((rs->sc_flags & RAIDF_INITED) != 0)) { 838 /* Last one... device is not unconfigured yet. 839 Device shutdown has taken care of setting the 840 clean bits if RAIDF_INITED is not set 841 mark things as clean... */ 842 843 rf_update_component_labels(raidPtrs[unit], 844 RF_FINAL_COMPONENT_UPDATE); 845 846 /* If the kernel is shutting down, it will detach 847 * this RAID set soon enough. 848 */ 849 } 850 851 raidunlock(rs); 852 return (0); 853 854 } 855 856 void 857 raidstrategy(struct buf *bp) 858 { 859 int s; 860 861 unsigned int raidID = raidunit(bp->b_dev); 862 RF_Raid_t *raidPtr; 863 struct raid_softc *rs = &raid_softc[raidID]; 864 int wlabel; 865 866 if ((rs->sc_flags & RAIDF_INITED) ==0) { 867 bp->b_error = ENXIO; 868 goto done; 869 } 870 if (raidID >= numraid || !raidPtrs[raidID]) { 871 bp->b_error = ENODEV; 872 goto done; 873 } 874 raidPtr = raidPtrs[raidID]; 875 if (!raidPtr->valid) { 876 bp->b_error = ENODEV; 877 goto done; 878 } 879 if (bp->b_bcount == 0) { 880 db1_printf(("b_bcount is zero..\n")); 881 goto done; 882 } 883 884 /* 885 * Do bounds checking and adjust transfer. If there's an 886 * error, the bounds check will flag that for us. 887 */ 888 889 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 890 if (DISKPART(bp->b_dev) == RAW_PART) { 891 uint64_t size; /* device size in DEV_BSIZE unit */ 892 893 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 894 size = raidPtr->totalSectors << 895 (raidPtr->logBytesPerSector - DEV_BSHIFT); 896 } else { 897 size = raidPtr->totalSectors >> 898 (DEV_BSHIFT - raidPtr->logBytesPerSector); 899 } 900 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 901 goto done; 902 } 903 } else { 904 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 905 db1_printf(("Bounds check failed!!:%d %d\n", 906 (int) bp->b_blkno, (int) wlabel)); 907 goto done; 908 } 909 } 910 s = splbio(); 911 912 bp->b_resid = 0; 913 914 /* stuff it onto our queue */ 915 bufq_put(rs->buf_queue, bp); 916 917 /* scheduled the IO to happen at the next convenient time */ 918 wakeup(&(raidPtrs[raidID]->iodone)); 919 920 splx(s); 921 return; 922 923 done: 924 bp->b_resid = bp->b_bcount; 925 biodone(bp); 926 } 927 /* ARGSUSED */ 928 int 929 raidread(dev_t dev, struct uio *uio, int flags) 930 { 931 int unit = raidunit(dev); 932 struct raid_softc *rs; 933 934 if (unit >= numraid) 935 return (ENXIO); 936 rs = &raid_softc[unit]; 937 938 if ((rs->sc_flags & RAIDF_INITED) == 0) 939 return (ENXIO); 940 941 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 942 943 } 944 /* ARGSUSED */ 945 int 946 raidwrite(dev_t dev, struct uio *uio, int flags) 947 { 948 int unit = raidunit(dev); 949 struct raid_softc *rs; 950 951 if (unit >= numraid) 952 return (ENXIO); 953 rs = &raid_softc[unit]; 954 955 if ((rs->sc_flags & RAIDF_INITED) == 0) 956 return (ENXIO); 957 958 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 959 960 } 961 962 static int 963 raid_detach_unlocked(struct raid_softc *rs) 964 { 965 int error; 966 RF_Raid_t *raidPtr; 967 968 raidPtr = raidPtrs[device_unit(rs->sc_dev)]; 969 970 /* 971 * If somebody has a partition mounted, we shouldn't 972 * shutdown. 973 */ 974 if (rs->sc_dkdev.dk_openmask != 0) 975 return EBUSY; 976 977 if ((rs->sc_flags & RAIDF_INITED) == 0) 978 ; /* not initialized: nothing to do */ 979 else if ((error = rf_Shutdown(raidPtr)) != 0) 980 return error; 981 else 982 rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN); 983 984 /* Detach the disk. */ 985 disk_detach(&rs->sc_dkdev); 986 disk_destroy(&rs->sc_dkdev); 987 988 return 0; 989 } 990 991 int 992 raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 993 { 994 int unit = raidunit(dev); 995 int error = 0; 996 int part, pmask; 997 cfdata_t cf; 998 struct raid_softc *rs; 999 RF_Config_t *k_cfg, *u_cfg; 1000 RF_Raid_t *raidPtr; 1001 RF_RaidDisk_t *diskPtr; 1002 RF_AccTotals_t *totals; 1003 RF_DeviceConfig_t *d_cfg, **ucfgp; 1004 u_char *specific_buf; 1005 int retcode = 0; 1006 int column; 1007 int raidid; 1008 struct rf_recon_req *rrcopy, *rr; 1009 RF_ComponentLabel_t *clabel; 1010 RF_ComponentLabel_t *ci_label; 1011 RF_ComponentLabel_t **clabel_ptr; 1012 RF_SingleComponent_t *sparePtr,*componentPtr; 1013 RF_SingleComponent_t component; 1014 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 1015 int i, j, d; 1016 #ifdef __HAVE_OLD_DISKLABEL 1017 struct disklabel newlabel; 1018 #endif 1019 struct dkwedge_info *dkw; 1020 1021 if (unit >= numraid) 1022 return (ENXIO); 1023 rs = &raid_softc[unit]; 1024 raidPtr = raidPtrs[unit]; 1025 1026 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 1027 (int) DISKPART(dev), (int) unit, (int) cmd)); 1028 1029 /* Must be open for writes for these commands... */ 1030 switch (cmd) { 1031 #ifdef DIOCGSECTORSIZE 1032 case DIOCGSECTORSIZE: 1033 *(u_int *)data = raidPtr->bytesPerSector; 1034 return 0; 1035 case DIOCGMEDIASIZE: 1036 *(off_t *)data = 1037 (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector; 1038 return 0; 1039 #endif 1040 case DIOCSDINFO: 1041 case DIOCWDINFO: 1042 #ifdef __HAVE_OLD_DISKLABEL 1043 case ODIOCWDINFO: 1044 case ODIOCSDINFO: 1045 #endif 1046 case DIOCWLABEL: 1047 case DIOCAWEDGE: 1048 case DIOCDWEDGE: 1049 if ((flag & FWRITE) == 0) 1050 return (EBADF); 1051 } 1052 1053 /* Must be initialized for these... */ 1054 switch (cmd) { 1055 case DIOCGDINFO: 1056 case DIOCSDINFO: 1057 case DIOCWDINFO: 1058 #ifdef __HAVE_OLD_DISKLABEL 1059 case ODIOCGDINFO: 1060 case ODIOCWDINFO: 1061 case ODIOCSDINFO: 1062 case ODIOCGDEFLABEL: 1063 #endif 1064 case DIOCGPART: 1065 case DIOCWLABEL: 1066 case DIOCGDEFLABEL: 1067 case DIOCAWEDGE: 1068 case DIOCDWEDGE: 1069 case DIOCLWEDGES: 1070 case DIOCCACHESYNC: 1071 case RAIDFRAME_SHUTDOWN: 1072 case RAIDFRAME_REWRITEPARITY: 1073 case RAIDFRAME_GET_INFO: 1074 case RAIDFRAME_RESET_ACCTOTALS: 1075 case RAIDFRAME_GET_ACCTOTALS: 1076 case RAIDFRAME_KEEP_ACCTOTALS: 1077 case RAIDFRAME_GET_SIZE: 1078 case RAIDFRAME_FAIL_DISK: 1079 case RAIDFRAME_COPYBACK: 1080 case RAIDFRAME_CHECK_RECON_STATUS: 1081 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1082 case RAIDFRAME_GET_COMPONENT_LABEL: 1083 case RAIDFRAME_SET_COMPONENT_LABEL: 1084 case RAIDFRAME_ADD_HOT_SPARE: 1085 case RAIDFRAME_REMOVE_HOT_SPARE: 1086 case RAIDFRAME_INIT_LABELS: 1087 case RAIDFRAME_REBUILD_IN_PLACE: 1088 case RAIDFRAME_CHECK_PARITY: 1089 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1090 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1091 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1092 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1093 case RAIDFRAME_SET_AUTOCONFIG: 1094 case RAIDFRAME_SET_ROOT: 1095 case RAIDFRAME_DELETE_COMPONENT: 1096 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1097 if ((rs->sc_flags & RAIDF_INITED) == 0) 1098 return (ENXIO); 1099 } 1100 1101 switch (cmd) { 1102 #ifdef COMPAT_50 1103 case RAIDFRAME_GET_INFO50: 1104 return rf_get_info50(raidPtr, data); 1105 1106 case RAIDFRAME_CONFIGURE50: 1107 if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0) 1108 return retcode; 1109 goto config; 1110 #endif 1111 /* configure the system */ 1112 case RAIDFRAME_CONFIGURE: 1113 1114 if (raidPtr->valid) { 1115 /* There is a valid RAID set running on this unit! */ 1116 printf("raid%d: Device already configured!\n",unit); 1117 return(EINVAL); 1118 } 1119 1120 /* copy-in the configuration information */ 1121 /* data points to a pointer to the configuration structure */ 1122 1123 u_cfg = *((RF_Config_t **) data); 1124 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 1125 if (k_cfg == NULL) { 1126 return (ENOMEM); 1127 } 1128 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 1129 if (retcode) { 1130 RF_Free(k_cfg, sizeof(RF_Config_t)); 1131 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 1132 retcode)); 1133 return (retcode); 1134 } 1135 goto config; 1136 config: 1137 /* allocate a buffer for the layout-specific data, and copy it 1138 * in */ 1139 if (k_cfg->layoutSpecificSize) { 1140 if (k_cfg->layoutSpecificSize > 10000) { 1141 /* sanity check */ 1142 RF_Free(k_cfg, sizeof(RF_Config_t)); 1143 return (EINVAL); 1144 } 1145 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 1146 (u_char *)); 1147 if (specific_buf == NULL) { 1148 RF_Free(k_cfg, sizeof(RF_Config_t)); 1149 return (ENOMEM); 1150 } 1151 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 1152 k_cfg->layoutSpecificSize); 1153 if (retcode) { 1154 RF_Free(k_cfg, sizeof(RF_Config_t)); 1155 RF_Free(specific_buf, 1156 k_cfg->layoutSpecificSize); 1157 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 1158 retcode)); 1159 return (retcode); 1160 } 1161 } else 1162 specific_buf = NULL; 1163 k_cfg->layoutSpecific = specific_buf; 1164 1165 /* should do some kind of sanity check on the configuration. 1166 * Store the sum of all the bytes in the last byte? */ 1167 1168 /* configure the system */ 1169 1170 /* 1171 * Clear the entire RAID descriptor, just to make sure 1172 * there is no stale data left in the case of a 1173 * reconfiguration 1174 */ 1175 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 1176 raidPtr->raidid = unit; 1177 1178 retcode = rf_Configure(raidPtr, k_cfg, NULL); 1179 1180 if (retcode == 0) { 1181 1182 /* allow this many simultaneous IO's to 1183 this RAID device */ 1184 raidPtr->openings = RAIDOUTSTANDING; 1185 1186 raidinit(raidPtr); 1187 rf_markalldirty(raidPtr); 1188 } 1189 /* free the buffers. No return code here. */ 1190 if (k_cfg->layoutSpecificSize) { 1191 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 1192 } 1193 RF_Free(k_cfg, sizeof(RF_Config_t)); 1194 1195 return (retcode); 1196 1197 /* shutdown the system */ 1198 case RAIDFRAME_SHUTDOWN: 1199 1200 part = DISKPART(dev); 1201 pmask = (1 << part); 1202 1203 if ((error = raidlock(rs)) != 0) 1204 return (error); 1205 1206 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 1207 ((rs->sc_dkdev.dk_bopenmask & pmask) && 1208 (rs->sc_dkdev.dk_copenmask & pmask))) 1209 retcode = EBUSY; 1210 else { 1211 rs->sc_flags |= RAIDF_SHUTDOWN; 1212 rs->sc_dkdev.dk_copenmask &= ~pmask; 1213 rs->sc_dkdev.dk_bopenmask &= ~pmask; 1214 rs->sc_dkdev.dk_openmask &= ~pmask; 1215 retcode = 0; 1216 } 1217 1218 raidunlock(rs); 1219 1220 if (retcode != 0) 1221 return retcode; 1222 1223 /* free the pseudo device attach bits */ 1224 1225 cf = device_cfdata(rs->sc_dev); 1226 if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0) 1227 free(cf, M_RAIDFRAME); 1228 1229 return (retcode); 1230 case RAIDFRAME_GET_COMPONENT_LABEL: 1231 clabel_ptr = (RF_ComponentLabel_t **) data; 1232 /* need to read the component label for the disk indicated 1233 by row,column in clabel */ 1234 1235 /* For practice, let's get it directly fromdisk, rather 1236 than from the in-core copy */ 1237 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 1238 (RF_ComponentLabel_t *)); 1239 if (clabel == NULL) 1240 return (ENOMEM); 1241 1242 retcode = copyin( *clabel_ptr, clabel, 1243 sizeof(RF_ComponentLabel_t)); 1244 1245 if (retcode) { 1246 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1247 return(retcode); 1248 } 1249 1250 clabel->row = 0; /* Don't allow looking at anything else.*/ 1251 1252 column = clabel->column; 1253 1254 if ((column < 0) || (column >= raidPtr->numCol + 1255 raidPtr->numSpare)) { 1256 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1257 return(EINVAL); 1258 } 1259 1260 retcode = raidread_component_label(raidPtr->Disks[column].dev, 1261 raidPtr->raid_cinfo[column].ci_vp, 1262 clabel ); 1263 1264 if (retcode == 0) { 1265 retcode = copyout(clabel, *clabel_ptr, 1266 sizeof(RF_ComponentLabel_t)); 1267 } 1268 RF_Free(clabel, sizeof(RF_ComponentLabel_t)); 1269 return (retcode); 1270 1271 case RAIDFRAME_SET_COMPONENT_LABEL: 1272 clabel = (RF_ComponentLabel_t *) data; 1273 1274 /* XXX check the label for valid stuff... */ 1275 /* Note that some things *should not* get modified -- 1276 the user should be re-initing the labels instead of 1277 trying to patch things. 1278 */ 1279 1280 raidid = raidPtr->raidid; 1281 #ifdef DEBUG 1282 printf("raid%d: Got component label:\n", raidid); 1283 printf("raid%d: Version: %d\n", raidid, clabel->version); 1284 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1285 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1286 printf("raid%d: Column: %d\n", raidid, clabel->column); 1287 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1288 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1289 printf("raid%d: Status: %d\n", raidid, clabel->status); 1290 #endif 1291 clabel->row = 0; 1292 column = clabel->column; 1293 1294 if ((column < 0) || (column >= raidPtr->numCol)) { 1295 return(EINVAL); 1296 } 1297 1298 /* XXX this isn't allowed to do anything for now :-) */ 1299 1300 /* XXX and before it is, we need to fill in the rest 1301 of the fields!?!?!?! */ 1302 #if 0 1303 raidwrite_component_label( 1304 raidPtr->Disks[column].dev, 1305 raidPtr->raid_cinfo[column].ci_vp, 1306 clabel ); 1307 #endif 1308 return (0); 1309 1310 case RAIDFRAME_INIT_LABELS: 1311 clabel = (RF_ComponentLabel_t *) data; 1312 /* 1313 we only want the serial number from 1314 the above. We get all the rest of the information 1315 from the config that was used to create this RAID 1316 set. 1317 */ 1318 1319 raidPtr->serial_number = clabel->serial_number; 1320 1321 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t), 1322 (RF_ComponentLabel_t *)); 1323 if (ci_label == NULL) 1324 return (ENOMEM); 1325 1326 raid_init_component_label(raidPtr, ci_label); 1327 ci_label->serial_number = clabel->serial_number; 1328 ci_label->row = 0; /* we dont' pretend to support more */ 1329 1330 for(column=0;column<raidPtr->numCol;column++) { 1331 diskPtr = &raidPtr->Disks[column]; 1332 if (!RF_DEAD_DISK(diskPtr->status)) { 1333 ci_label->partitionSize = diskPtr->partitionSize; 1334 ci_label->column = column; 1335 raidwrite_component_label( 1336 raidPtr->Disks[column].dev, 1337 raidPtr->raid_cinfo[column].ci_vp, 1338 ci_label ); 1339 } 1340 } 1341 RF_Free(ci_label, sizeof(RF_ComponentLabel_t)); 1342 1343 return (retcode); 1344 case RAIDFRAME_SET_AUTOCONFIG: 1345 d = rf_set_autoconfig(raidPtr, *(int *) data); 1346 printf("raid%d: New autoconfig value is: %d\n", 1347 raidPtr->raidid, d); 1348 *(int *) data = d; 1349 return (retcode); 1350 1351 case RAIDFRAME_SET_ROOT: 1352 d = rf_set_rootpartition(raidPtr, *(int *) data); 1353 printf("raid%d: New rootpartition value is: %d\n", 1354 raidPtr->raidid, d); 1355 *(int *) data = d; 1356 return (retcode); 1357 1358 /* initialize all parity */ 1359 case RAIDFRAME_REWRITEPARITY: 1360 1361 if (raidPtr->Layout.map->faultsTolerated == 0) { 1362 /* Parity for RAID 0 is trivially correct */ 1363 raidPtr->parity_good = RF_RAID_CLEAN; 1364 return(0); 1365 } 1366 1367 if (raidPtr->parity_rewrite_in_progress == 1) { 1368 /* Re-write is already in progress! */ 1369 return(EINVAL); 1370 } 1371 1372 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1373 rf_RewriteParityThread, 1374 raidPtr,"raid_parity"); 1375 return (retcode); 1376 1377 1378 case RAIDFRAME_ADD_HOT_SPARE: 1379 sparePtr = (RF_SingleComponent_t *) data; 1380 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1381 retcode = rf_add_hot_spare(raidPtr, &component); 1382 return(retcode); 1383 1384 case RAIDFRAME_REMOVE_HOT_SPARE: 1385 return(retcode); 1386 1387 case RAIDFRAME_DELETE_COMPONENT: 1388 componentPtr = (RF_SingleComponent_t *)data; 1389 memcpy( &component, componentPtr, 1390 sizeof(RF_SingleComponent_t)); 1391 retcode = rf_delete_component(raidPtr, &component); 1392 return(retcode); 1393 1394 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1395 componentPtr = (RF_SingleComponent_t *)data; 1396 memcpy( &component, componentPtr, 1397 sizeof(RF_SingleComponent_t)); 1398 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1399 return(retcode); 1400 1401 case RAIDFRAME_REBUILD_IN_PLACE: 1402 1403 if (raidPtr->Layout.map->faultsTolerated == 0) { 1404 /* Can't do this on a RAID 0!! */ 1405 return(EINVAL); 1406 } 1407 1408 if (raidPtr->recon_in_progress == 1) { 1409 /* a reconstruct is already in progress! */ 1410 return(EINVAL); 1411 } 1412 1413 componentPtr = (RF_SingleComponent_t *) data; 1414 memcpy( &component, componentPtr, 1415 sizeof(RF_SingleComponent_t)); 1416 component.row = 0; /* we don't support any more */ 1417 column = component.column; 1418 1419 if ((column < 0) || (column >= raidPtr->numCol)) { 1420 return(EINVAL); 1421 } 1422 1423 RF_LOCK_MUTEX(raidPtr->mutex); 1424 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1425 (raidPtr->numFailures > 0)) { 1426 /* XXX 0 above shouldn't be constant!!! */ 1427 /* some component other than this has failed. 1428 Let's not make things worse than they already 1429 are... */ 1430 printf("raid%d: Unable to reconstruct to disk at:\n", 1431 raidPtr->raidid); 1432 printf("raid%d: Col: %d Too many failures.\n", 1433 raidPtr->raidid, column); 1434 RF_UNLOCK_MUTEX(raidPtr->mutex); 1435 return (EINVAL); 1436 } 1437 if (raidPtr->Disks[column].status == 1438 rf_ds_reconstructing) { 1439 printf("raid%d: Unable to reconstruct to disk at:\n", 1440 raidPtr->raidid); 1441 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1442 1443 RF_UNLOCK_MUTEX(raidPtr->mutex); 1444 return (EINVAL); 1445 } 1446 if (raidPtr->Disks[column].status == rf_ds_spared) { 1447 RF_UNLOCK_MUTEX(raidPtr->mutex); 1448 return (EINVAL); 1449 } 1450 RF_UNLOCK_MUTEX(raidPtr->mutex); 1451 1452 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1453 if (rrcopy == NULL) 1454 return(ENOMEM); 1455 1456 rrcopy->raidPtr = (void *) raidPtr; 1457 rrcopy->col = column; 1458 1459 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1460 rf_ReconstructInPlaceThread, 1461 rrcopy,"raid_reconip"); 1462 return(retcode); 1463 1464 case RAIDFRAME_GET_INFO: 1465 if (!raidPtr->valid) 1466 return (ENODEV); 1467 ucfgp = (RF_DeviceConfig_t **) data; 1468 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1469 (RF_DeviceConfig_t *)); 1470 if (d_cfg == NULL) 1471 return (ENOMEM); 1472 d_cfg->rows = 1; /* there is only 1 row now */ 1473 d_cfg->cols = raidPtr->numCol; 1474 d_cfg->ndevs = raidPtr->numCol; 1475 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1476 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1477 return (ENOMEM); 1478 } 1479 d_cfg->nspares = raidPtr->numSpare; 1480 if (d_cfg->nspares >= RF_MAX_DISKS) { 1481 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1482 return (ENOMEM); 1483 } 1484 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1485 d = 0; 1486 for (j = 0; j < d_cfg->cols; j++) { 1487 d_cfg->devs[d] = raidPtr->Disks[j]; 1488 d++; 1489 } 1490 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1491 d_cfg->spares[i] = raidPtr->Disks[j]; 1492 } 1493 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1494 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1495 1496 return (retcode); 1497 1498 case RAIDFRAME_CHECK_PARITY: 1499 *(int *) data = raidPtr->parity_good; 1500 return (0); 1501 1502 case RAIDFRAME_RESET_ACCTOTALS: 1503 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1504 return (0); 1505 1506 case RAIDFRAME_GET_ACCTOTALS: 1507 totals = (RF_AccTotals_t *) data; 1508 *totals = raidPtr->acc_totals; 1509 return (0); 1510 1511 case RAIDFRAME_KEEP_ACCTOTALS: 1512 raidPtr->keep_acc_totals = *(int *)data; 1513 return (0); 1514 1515 case RAIDFRAME_GET_SIZE: 1516 *(int *) data = raidPtr->totalSectors; 1517 return (0); 1518 1519 /* fail a disk & optionally start reconstruction */ 1520 case RAIDFRAME_FAIL_DISK: 1521 1522 if (raidPtr->Layout.map->faultsTolerated == 0) { 1523 /* Can't do this on a RAID 0!! */ 1524 return(EINVAL); 1525 } 1526 1527 rr = (struct rf_recon_req *) data; 1528 rr->row = 0; 1529 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1530 return (EINVAL); 1531 1532 1533 RF_LOCK_MUTEX(raidPtr->mutex); 1534 if (raidPtr->status == rf_rs_reconstructing) { 1535 /* you can't fail a disk while we're reconstructing! */ 1536 /* XXX wrong for RAID6 */ 1537 RF_UNLOCK_MUTEX(raidPtr->mutex); 1538 return (EINVAL); 1539 } 1540 if ((raidPtr->Disks[rr->col].status == 1541 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1542 /* some other component has failed. Let's not make 1543 things worse. XXX wrong for RAID6 */ 1544 RF_UNLOCK_MUTEX(raidPtr->mutex); 1545 return (EINVAL); 1546 } 1547 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1548 /* Can't fail a spared disk! */ 1549 RF_UNLOCK_MUTEX(raidPtr->mutex); 1550 return (EINVAL); 1551 } 1552 RF_UNLOCK_MUTEX(raidPtr->mutex); 1553 1554 /* make a copy of the recon request so that we don't rely on 1555 * the user's buffer */ 1556 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1557 if (rrcopy == NULL) 1558 return(ENOMEM); 1559 memcpy(rrcopy, rr, sizeof(*rr)); 1560 rrcopy->raidPtr = (void *) raidPtr; 1561 1562 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1563 rf_ReconThread, 1564 rrcopy,"raid_recon"); 1565 return (0); 1566 1567 /* invoke a copyback operation after recon on whatever disk 1568 * needs it, if any */ 1569 case RAIDFRAME_COPYBACK: 1570 1571 if (raidPtr->Layout.map->faultsTolerated == 0) { 1572 /* This makes no sense on a RAID 0!! */ 1573 return(EINVAL); 1574 } 1575 1576 if (raidPtr->copyback_in_progress == 1) { 1577 /* Copyback is already in progress! */ 1578 return(EINVAL); 1579 } 1580 1581 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1582 rf_CopybackThread, 1583 raidPtr,"raid_copyback"); 1584 return (retcode); 1585 1586 /* return the percentage completion of reconstruction */ 1587 case RAIDFRAME_CHECK_RECON_STATUS: 1588 if (raidPtr->Layout.map->faultsTolerated == 0) { 1589 /* This makes no sense on a RAID 0, so tell the 1590 user it's done. */ 1591 *(int *) data = 100; 1592 return(0); 1593 } 1594 if (raidPtr->status != rf_rs_reconstructing) 1595 *(int *) data = 100; 1596 else { 1597 if (raidPtr->reconControl->numRUsTotal > 0) { 1598 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1599 } else { 1600 *(int *) data = 0; 1601 } 1602 } 1603 return (0); 1604 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1605 progressInfoPtr = (RF_ProgressInfo_t **) data; 1606 if (raidPtr->status != rf_rs_reconstructing) { 1607 progressInfo.remaining = 0; 1608 progressInfo.completed = 100; 1609 progressInfo.total = 100; 1610 } else { 1611 progressInfo.total = 1612 raidPtr->reconControl->numRUsTotal; 1613 progressInfo.completed = 1614 raidPtr->reconControl->numRUsComplete; 1615 progressInfo.remaining = progressInfo.total - 1616 progressInfo.completed; 1617 } 1618 retcode = copyout(&progressInfo, *progressInfoPtr, 1619 sizeof(RF_ProgressInfo_t)); 1620 return (retcode); 1621 1622 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1623 if (raidPtr->Layout.map->faultsTolerated == 0) { 1624 /* This makes no sense on a RAID 0, so tell the 1625 user it's done. */ 1626 *(int *) data = 100; 1627 return(0); 1628 } 1629 if (raidPtr->parity_rewrite_in_progress == 1) { 1630 *(int *) data = 100 * 1631 raidPtr->parity_rewrite_stripes_done / 1632 raidPtr->Layout.numStripe; 1633 } else { 1634 *(int *) data = 100; 1635 } 1636 return (0); 1637 1638 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1639 progressInfoPtr = (RF_ProgressInfo_t **) data; 1640 if (raidPtr->parity_rewrite_in_progress == 1) { 1641 progressInfo.total = raidPtr->Layout.numStripe; 1642 progressInfo.completed = 1643 raidPtr->parity_rewrite_stripes_done; 1644 progressInfo.remaining = progressInfo.total - 1645 progressInfo.completed; 1646 } else { 1647 progressInfo.remaining = 0; 1648 progressInfo.completed = 100; 1649 progressInfo.total = 100; 1650 } 1651 retcode = copyout(&progressInfo, *progressInfoPtr, 1652 sizeof(RF_ProgressInfo_t)); 1653 return (retcode); 1654 1655 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1656 if (raidPtr->Layout.map->faultsTolerated == 0) { 1657 /* This makes no sense on a RAID 0 */ 1658 *(int *) data = 100; 1659 return(0); 1660 } 1661 if (raidPtr->copyback_in_progress == 1) { 1662 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1663 raidPtr->Layout.numStripe; 1664 } else { 1665 *(int *) data = 100; 1666 } 1667 return (0); 1668 1669 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1670 progressInfoPtr = (RF_ProgressInfo_t **) data; 1671 if (raidPtr->copyback_in_progress == 1) { 1672 progressInfo.total = raidPtr->Layout.numStripe; 1673 progressInfo.completed = 1674 raidPtr->copyback_stripes_done; 1675 progressInfo.remaining = progressInfo.total - 1676 progressInfo.completed; 1677 } else { 1678 progressInfo.remaining = 0; 1679 progressInfo.completed = 100; 1680 progressInfo.total = 100; 1681 } 1682 retcode = copyout(&progressInfo, *progressInfoPtr, 1683 sizeof(RF_ProgressInfo_t)); 1684 return (retcode); 1685 1686 /* the sparetable daemon calls this to wait for the kernel to 1687 * need a spare table. this ioctl does not return until a 1688 * spare table is needed. XXX -- calling mpsleep here in the 1689 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1690 * -- I should either compute the spare table in the kernel, 1691 * or have a different -- XXX XXX -- interface (a different 1692 * character device) for delivering the table -- XXX */ 1693 #if 0 1694 case RAIDFRAME_SPARET_WAIT: 1695 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1696 while (!rf_sparet_wait_queue) 1697 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1698 waitreq = rf_sparet_wait_queue; 1699 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1700 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1701 1702 /* structure assignment */ 1703 *((RF_SparetWait_t *) data) = *waitreq; 1704 1705 RF_Free(waitreq, sizeof(*waitreq)); 1706 return (0); 1707 1708 /* wakes up a process waiting on SPARET_WAIT and puts an error 1709 * code in it that will cause the dameon to exit */ 1710 case RAIDFRAME_ABORT_SPARET_WAIT: 1711 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1712 waitreq->fcol = -1; 1713 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1714 waitreq->next = rf_sparet_wait_queue; 1715 rf_sparet_wait_queue = waitreq; 1716 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1717 wakeup(&rf_sparet_wait_queue); 1718 return (0); 1719 1720 /* used by the spare table daemon to deliver a spare table 1721 * into the kernel */ 1722 case RAIDFRAME_SEND_SPARET: 1723 1724 /* install the spare table */ 1725 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1726 1727 /* respond to the requestor. the return status of the spare 1728 * table installation is passed in the "fcol" field */ 1729 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1730 waitreq->fcol = retcode; 1731 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1732 waitreq->next = rf_sparet_resp_queue; 1733 rf_sparet_resp_queue = waitreq; 1734 wakeup(&rf_sparet_resp_queue); 1735 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1736 1737 return (retcode); 1738 #endif 1739 1740 default: 1741 break; /* fall through to the os-specific code below */ 1742 1743 } 1744 1745 if (!raidPtr->valid) 1746 return (EINVAL); 1747 1748 /* 1749 * Add support for "regular" device ioctls here. 1750 */ 1751 1752 error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l); 1753 if (error != EPASSTHROUGH) 1754 return (error); 1755 1756 switch (cmd) { 1757 case DIOCGDINFO: 1758 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1759 break; 1760 #ifdef __HAVE_OLD_DISKLABEL 1761 case ODIOCGDINFO: 1762 newlabel = *(rs->sc_dkdev.dk_label); 1763 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1764 return ENOTTY; 1765 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1766 break; 1767 #endif 1768 1769 case DIOCGPART: 1770 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1771 ((struct partinfo *) data)->part = 1772 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1773 break; 1774 1775 case DIOCWDINFO: 1776 case DIOCSDINFO: 1777 #ifdef __HAVE_OLD_DISKLABEL 1778 case ODIOCWDINFO: 1779 case ODIOCSDINFO: 1780 #endif 1781 { 1782 struct disklabel *lp; 1783 #ifdef __HAVE_OLD_DISKLABEL 1784 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1785 memset(&newlabel, 0, sizeof newlabel); 1786 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1787 lp = &newlabel; 1788 } else 1789 #endif 1790 lp = (struct disklabel *)data; 1791 1792 if ((error = raidlock(rs)) != 0) 1793 return (error); 1794 1795 rs->sc_flags |= RAIDF_LABELLING; 1796 1797 error = setdisklabel(rs->sc_dkdev.dk_label, 1798 lp, 0, rs->sc_dkdev.dk_cpulabel); 1799 if (error == 0) { 1800 if (cmd == DIOCWDINFO 1801 #ifdef __HAVE_OLD_DISKLABEL 1802 || cmd == ODIOCWDINFO 1803 #endif 1804 ) 1805 error = writedisklabel(RAIDLABELDEV(dev), 1806 raidstrategy, rs->sc_dkdev.dk_label, 1807 rs->sc_dkdev.dk_cpulabel); 1808 } 1809 rs->sc_flags &= ~RAIDF_LABELLING; 1810 1811 raidunlock(rs); 1812 1813 if (error) 1814 return (error); 1815 break; 1816 } 1817 1818 case DIOCWLABEL: 1819 if (*(int *) data != 0) 1820 rs->sc_flags |= RAIDF_WLABEL; 1821 else 1822 rs->sc_flags &= ~RAIDF_WLABEL; 1823 break; 1824 1825 case DIOCGDEFLABEL: 1826 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1827 break; 1828 1829 #ifdef __HAVE_OLD_DISKLABEL 1830 case ODIOCGDEFLABEL: 1831 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1832 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1833 return ENOTTY; 1834 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1835 break; 1836 #endif 1837 1838 case DIOCAWEDGE: 1839 case DIOCDWEDGE: 1840 dkw = (void *)data; 1841 1842 /* If the ioctl happens here, the parent is us. */ 1843 (void)strcpy(dkw->dkw_parent, rs->sc_xname); 1844 return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw); 1845 1846 case DIOCLWEDGES: 1847 return dkwedge_list(&rs->sc_dkdev, 1848 (struct dkwedge_list *)data, l); 1849 case DIOCCACHESYNC: 1850 return rf_sync_component_caches(raidPtr); 1851 default: 1852 retcode = ENOTTY; 1853 } 1854 return (retcode); 1855 1856 } 1857 1858 1859 /* raidinit -- complete the rest of the initialization for the 1860 RAIDframe device. */ 1861 1862 1863 static void 1864 raidinit(RF_Raid_t *raidPtr) 1865 { 1866 cfdata_t cf; 1867 struct raid_softc *rs; 1868 int unit; 1869 1870 unit = raidPtr->raidid; 1871 1872 rs = &raid_softc[unit]; 1873 1874 /* XXX should check return code first... */ 1875 rs->sc_flags |= RAIDF_INITED; 1876 1877 /* XXX doesn't check bounds. */ 1878 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1879 1880 /* attach the pseudo device */ 1881 cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK); 1882 cf->cf_name = raid_cd.cd_name; 1883 cf->cf_atname = raid_cd.cd_name; 1884 cf->cf_unit = unit; 1885 cf->cf_fstate = FSTATE_STAR; 1886 1887 rs->sc_dev = config_attach_pseudo(cf); 1888 1889 if (rs->sc_dev==NULL) { 1890 printf("raid%d: config_attach_pseudo failed\n", 1891 raidPtr->raidid); 1892 rs->sc_flags &= ~RAIDF_INITED; 1893 free(cf, M_RAIDFRAME); 1894 return; 1895 } 1896 1897 /* disk_attach actually creates space for the CPU disklabel, among 1898 * other things, so it's critical to call this *BEFORE* we try putzing 1899 * with disklabels. */ 1900 1901 disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver); 1902 disk_attach(&rs->sc_dkdev); 1903 1904 /* XXX There may be a weird interaction here between this, and 1905 * protectedSectors, as used in RAIDframe. */ 1906 1907 rs->sc_size = raidPtr->totalSectors; 1908 1909 dkwedge_discover(&rs->sc_dkdev); 1910 1911 rf_set_properties(rs, raidPtr); 1912 1913 } 1914 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1915 /* wake up the daemon & tell it to get us a spare table 1916 * XXX 1917 * the entries in the queues should be tagged with the raidPtr 1918 * so that in the extremely rare case that two recons happen at once, 1919 * we know for which device were requesting a spare table 1920 * XXX 1921 * 1922 * XXX This code is not currently used. GO 1923 */ 1924 int 1925 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1926 { 1927 int retcode; 1928 1929 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1930 req->next = rf_sparet_wait_queue; 1931 rf_sparet_wait_queue = req; 1932 wakeup(&rf_sparet_wait_queue); 1933 1934 /* mpsleep unlocks the mutex */ 1935 while (!rf_sparet_resp_queue) { 1936 tsleep(&rf_sparet_resp_queue, PRIBIO, 1937 "raidframe getsparetable", 0); 1938 } 1939 req = rf_sparet_resp_queue; 1940 rf_sparet_resp_queue = req->next; 1941 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1942 1943 retcode = req->fcol; 1944 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1945 * alloc'd */ 1946 return (retcode); 1947 } 1948 #endif 1949 1950 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1951 * bp & passes it down. 1952 * any calls originating in the kernel must use non-blocking I/O 1953 * do some extra sanity checking to return "appropriate" error values for 1954 * certain conditions (to make some standard utilities work) 1955 * 1956 * Formerly known as: rf_DoAccessKernel 1957 */ 1958 void 1959 raidstart(RF_Raid_t *raidPtr) 1960 { 1961 RF_SectorCount_t num_blocks, pb, sum; 1962 RF_RaidAddr_t raid_addr; 1963 struct partition *pp; 1964 daddr_t blocknum; 1965 int unit; 1966 struct raid_softc *rs; 1967 int do_async; 1968 struct buf *bp; 1969 int rc; 1970 1971 unit = raidPtr->raidid; 1972 rs = &raid_softc[unit]; 1973 1974 /* quick check to see if anything has died recently */ 1975 RF_LOCK_MUTEX(raidPtr->mutex); 1976 if (raidPtr->numNewFailures > 0) { 1977 RF_UNLOCK_MUTEX(raidPtr->mutex); 1978 rf_update_component_labels(raidPtr, 1979 RF_NORMAL_COMPONENT_UPDATE); 1980 RF_LOCK_MUTEX(raidPtr->mutex); 1981 raidPtr->numNewFailures--; 1982 } 1983 1984 /* Check to see if we're at the limit... */ 1985 while (raidPtr->openings > 0) { 1986 RF_UNLOCK_MUTEX(raidPtr->mutex); 1987 1988 /* get the next item, if any, from the queue */ 1989 if ((bp = bufq_get(rs->buf_queue)) == NULL) { 1990 /* nothing more to do */ 1991 return; 1992 } 1993 1994 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1995 * partition.. Need to make it absolute to the underlying 1996 * device.. */ 1997 1998 blocknum = bp->b_blkno; 1999 if (DISKPART(bp->b_dev) != RAW_PART) { 2000 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 2001 blocknum += pp->p_offset; 2002 } 2003 2004 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 2005 (int) blocknum)); 2006 2007 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 2008 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 2009 2010 /* *THIS* is where we adjust what block we're going to... 2011 * but DO NOT TOUCH bp->b_blkno!!! */ 2012 raid_addr = blocknum; 2013 2014 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 2015 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 2016 sum = raid_addr + num_blocks + pb; 2017 if (1 || rf_debugKernelAccess) { 2018 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 2019 (int) raid_addr, (int) sum, (int) num_blocks, 2020 (int) pb, (int) bp->b_resid)); 2021 } 2022 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 2023 || (sum < num_blocks) || (sum < pb)) { 2024 bp->b_error = ENOSPC; 2025 bp->b_resid = bp->b_bcount; 2026 biodone(bp); 2027 RF_LOCK_MUTEX(raidPtr->mutex); 2028 continue; 2029 } 2030 /* 2031 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 2032 */ 2033 2034 if (bp->b_bcount & raidPtr->sectorMask) { 2035 bp->b_error = EINVAL; 2036 bp->b_resid = bp->b_bcount; 2037 biodone(bp); 2038 RF_LOCK_MUTEX(raidPtr->mutex); 2039 continue; 2040 2041 } 2042 db1_printf(("Calling DoAccess..\n")); 2043 2044 2045 RF_LOCK_MUTEX(raidPtr->mutex); 2046 raidPtr->openings--; 2047 RF_UNLOCK_MUTEX(raidPtr->mutex); 2048 2049 /* 2050 * Everything is async. 2051 */ 2052 do_async = 1; 2053 2054 disk_busy(&rs->sc_dkdev); 2055 2056 /* XXX we're still at splbio() here... do we *really* 2057 need to be? */ 2058 2059 /* don't ever condition on bp->b_flags & B_WRITE. 2060 * always condition on B_READ instead */ 2061 2062 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 2063 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 2064 do_async, raid_addr, num_blocks, 2065 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 2066 2067 if (rc) { 2068 bp->b_error = rc; 2069 bp->b_resid = bp->b_bcount; 2070 biodone(bp); 2071 /* continue loop */ 2072 } 2073 2074 RF_LOCK_MUTEX(raidPtr->mutex); 2075 } 2076 RF_UNLOCK_MUTEX(raidPtr->mutex); 2077 } 2078 2079 2080 2081 2082 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 2083 2084 int 2085 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 2086 { 2087 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 2088 struct buf *bp; 2089 2090 req->queue = queue; 2091 bp = req->bp; 2092 2093 switch (req->type) { 2094 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 2095 /* XXX need to do something extra here.. */ 2096 /* I'm leaving this in, as I've never actually seen it used, 2097 * and I'd like folks to report it... GO */ 2098 printf(("WAKEUP CALLED\n")); 2099 queue->numOutstanding++; 2100 2101 bp->b_flags = 0; 2102 bp->b_private = req; 2103 2104 KernelWakeupFunc(bp); 2105 break; 2106 2107 case RF_IO_TYPE_READ: 2108 case RF_IO_TYPE_WRITE: 2109 #if RF_ACC_TRACE > 0 2110 if (req->tracerec) { 2111 RF_ETIMER_START(req->tracerec->timer); 2112 } 2113 #endif 2114 InitBP(bp, queue->rf_cinfo->ci_vp, 2115 op, queue->rf_cinfo->ci_dev, 2116 req->sectorOffset, req->numSector, 2117 req->buf, KernelWakeupFunc, (void *) req, 2118 queue->raidPtr->logBytesPerSector, req->b_proc); 2119 2120 if (rf_debugKernelAccess) { 2121 db1_printf(("dispatch: bp->b_blkno = %ld\n", 2122 (long) bp->b_blkno)); 2123 } 2124 queue->numOutstanding++; 2125 queue->last_deq_sector = req->sectorOffset; 2126 /* acc wouldn't have been let in if there were any pending 2127 * reqs at any other priority */ 2128 queue->curPriority = req->priority; 2129 2130 db1_printf(("Going for %c to unit %d col %d\n", 2131 req->type, queue->raidPtr->raidid, 2132 queue->col)); 2133 db1_printf(("sector %d count %d (%d bytes) %d\n", 2134 (int) req->sectorOffset, (int) req->numSector, 2135 (int) (req->numSector << 2136 queue->raidPtr->logBytesPerSector), 2137 (int) queue->raidPtr->logBytesPerSector)); 2138 2139 /* 2140 * XXX: drop lock here since this can block at 2141 * least with backing SCSI devices. Retake it 2142 * to minimize fuss with calling interfaces. 2143 */ 2144 2145 RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam"); 2146 bdev_strategy(bp); 2147 RF_LOCK_QUEUE_MUTEX(queue, "unusedparam"); 2148 break; 2149 2150 default: 2151 panic("bad req->type in rf_DispatchKernelIO"); 2152 } 2153 db1_printf(("Exiting from DispatchKernelIO\n")); 2154 2155 return (0); 2156 } 2157 /* this is the callback function associated with a I/O invoked from 2158 kernel code. 2159 */ 2160 static void 2161 KernelWakeupFunc(struct buf *bp) 2162 { 2163 RF_DiskQueueData_t *req = NULL; 2164 RF_DiskQueue_t *queue; 2165 int s; 2166 2167 s = splbio(); 2168 db1_printf(("recovering the request queue:\n")); 2169 req = bp->b_private; 2170 2171 queue = (RF_DiskQueue_t *) req->queue; 2172 2173 #if RF_ACC_TRACE > 0 2174 if (req->tracerec) { 2175 RF_ETIMER_STOP(req->tracerec->timer); 2176 RF_ETIMER_EVAL(req->tracerec->timer); 2177 RF_LOCK_MUTEX(rf_tracing_mutex); 2178 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2179 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 2180 req->tracerec->num_phys_ios++; 2181 RF_UNLOCK_MUTEX(rf_tracing_mutex); 2182 } 2183 #endif 2184 2185 /* XXX Ok, let's get aggressive... If b_error is set, let's go 2186 * ballistic, and mark the component as hosed... */ 2187 2188 if (bp->b_error != 0) { 2189 /* Mark the disk as dead */ 2190 /* but only mark it once... */ 2191 /* and only if it wouldn't leave this RAID set 2192 completely broken */ 2193 if (((queue->raidPtr->Disks[queue->col].status == 2194 rf_ds_optimal) || 2195 (queue->raidPtr->Disks[queue->col].status == 2196 rf_ds_used_spare)) && 2197 (queue->raidPtr->numFailures < 2198 queue->raidPtr->Layout.map->faultsTolerated)) { 2199 printf("raid%d: IO Error. Marking %s as failed.\n", 2200 queue->raidPtr->raidid, 2201 queue->raidPtr->Disks[queue->col].devname); 2202 queue->raidPtr->Disks[queue->col].status = 2203 rf_ds_failed; 2204 queue->raidPtr->status = rf_rs_degraded; 2205 queue->raidPtr->numFailures++; 2206 queue->raidPtr->numNewFailures++; 2207 } else { /* Disk is already dead... */ 2208 /* printf("Disk already marked as dead!\n"); */ 2209 } 2210 2211 } 2212 2213 /* Fill in the error value */ 2214 2215 req->error = bp->b_error; 2216 2217 simple_lock(&queue->raidPtr->iodone_lock); 2218 2219 /* Drop this one on the "finished" queue... */ 2220 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 2221 2222 /* Let the raidio thread know there is work to be done. */ 2223 wakeup(&(queue->raidPtr->iodone)); 2224 2225 simple_unlock(&queue->raidPtr->iodone_lock); 2226 2227 splx(s); 2228 } 2229 2230 2231 2232 /* 2233 * initialize a buf structure for doing an I/O in the kernel. 2234 */ 2235 static void 2236 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 2237 RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf, 2238 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2239 struct proc *b_proc) 2240 { 2241 /* bp->b_flags = B_PHYS | rw_flag; */ 2242 bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */ 2243 bp->b_oflags = 0; 2244 bp->b_cflags = 0; 2245 bp->b_bcount = numSect << logBytesPerSector; 2246 bp->b_bufsize = bp->b_bcount; 2247 bp->b_error = 0; 2248 bp->b_dev = dev; 2249 bp->b_data = bf; 2250 bp->b_blkno = startSect; 2251 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2252 if (bp->b_bcount == 0) { 2253 panic("bp->b_bcount is zero in InitBP!!"); 2254 } 2255 bp->b_proc = b_proc; 2256 bp->b_iodone = cbFunc; 2257 bp->b_private = cbArg; 2258 } 2259 2260 static void 2261 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2262 struct disklabel *lp) 2263 { 2264 memset(lp, 0, sizeof(*lp)); 2265 2266 /* fabricate a label... */ 2267 lp->d_secperunit = raidPtr->totalSectors; 2268 lp->d_secsize = raidPtr->bytesPerSector; 2269 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2270 lp->d_ntracks = 4 * raidPtr->numCol; 2271 lp->d_ncylinders = raidPtr->totalSectors / 2272 (lp->d_nsectors * lp->d_ntracks); 2273 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2274 2275 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2276 lp->d_type = DTYPE_RAID; 2277 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2278 lp->d_rpm = 3600; 2279 lp->d_interleave = 1; 2280 lp->d_flags = 0; 2281 2282 lp->d_partitions[RAW_PART].p_offset = 0; 2283 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2284 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2285 lp->d_npartitions = RAW_PART + 1; 2286 2287 lp->d_magic = DISKMAGIC; 2288 lp->d_magic2 = DISKMAGIC; 2289 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2290 2291 } 2292 /* 2293 * Read the disklabel from the raid device. If one is not present, fake one 2294 * up. 2295 */ 2296 static void 2297 raidgetdisklabel(dev_t dev) 2298 { 2299 int unit = raidunit(dev); 2300 struct raid_softc *rs = &raid_softc[unit]; 2301 const char *errstring; 2302 struct disklabel *lp = rs->sc_dkdev.dk_label; 2303 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2304 RF_Raid_t *raidPtr; 2305 2306 db1_printf(("Getting the disklabel...\n")); 2307 2308 memset(clp, 0, sizeof(*clp)); 2309 2310 raidPtr = raidPtrs[unit]; 2311 2312 raidgetdefaultlabel(raidPtr, rs, lp); 2313 2314 /* 2315 * Call the generic disklabel extraction routine. 2316 */ 2317 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2318 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2319 if (errstring) 2320 raidmakedisklabel(rs); 2321 else { 2322 int i; 2323 struct partition *pp; 2324 2325 /* 2326 * Sanity check whether the found disklabel is valid. 2327 * 2328 * This is necessary since total size of the raid device 2329 * may vary when an interleave is changed even though exactly 2330 * same components are used, and old disklabel may used 2331 * if that is found. 2332 */ 2333 if (lp->d_secperunit != rs->sc_size) 2334 printf("raid%d: WARNING: %s: " 2335 "total sector size in disklabel (%" PRIu32 ") != " 2336 "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname, 2337 lp->d_secperunit, rs->sc_size); 2338 for (i = 0; i < lp->d_npartitions; i++) { 2339 pp = &lp->d_partitions[i]; 2340 if (pp->p_offset + pp->p_size > rs->sc_size) 2341 printf("raid%d: WARNING: %s: end of partition `%c' " 2342 "exceeds the size of raid (%" PRIu64 ")\n", 2343 unit, rs->sc_xname, 'a' + i, rs->sc_size); 2344 } 2345 } 2346 2347 } 2348 /* 2349 * Take care of things one might want to take care of in the event 2350 * that a disklabel isn't present. 2351 */ 2352 static void 2353 raidmakedisklabel(struct raid_softc *rs) 2354 { 2355 struct disklabel *lp = rs->sc_dkdev.dk_label; 2356 db1_printf(("Making a label..\n")); 2357 2358 /* 2359 * For historical reasons, if there's no disklabel present 2360 * the raw partition must be marked FS_BSDFFS. 2361 */ 2362 2363 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2364 2365 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2366 2367 lp->d_checksum = dkcksum(lp); 2368 } 2369 /* 2370 * Wait interruptibly for an exclusive lock. 2371 * 2372 * XXX 2373 * Several drivers do this; it should be abstracted and made MP-safe. 2374 * (Hmm... where have we seen this warning before :-> GO ) 2375 */ 2376 static int 2377 raidlock(struct raid_softc *rs) 2378 { 2379 int error; 2380 2381 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2382 rs->sc_flags |= RAIDF_WANTED; 2383 if ((error = 2384 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2385 return (error); 2386 } 2387 rs->sc_flags |= RAIDF_LOCKED; 2388 return (0); 2389 } 2390 /* 2391 * Unlock and wake up any waiters. 2392 */ 2393 static void 2394 raidunlock(struct raid_softc *rs) 2395 { 2396 2397 rs->sc_flags &= ~RAIDF_LOCKED; 2398 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2399 rs->sc_flags &= ~RAIDF_WANTED; 2400 wakeup(rs); 2401 } 2402 } 2403 2404 2405 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2406 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2407 2408 int 2409 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2410 { 2411 RF_ComponentLabel_t clabel; 2412 raidread_component_label(dev, b_vp, &clabel); 2413 clabel.mod_counter = mod_counter; 2414 clabel.clean = RF_RAID_CLEAN; 2415 raidwrite_component_label(dev, b_vp, &clabel); 2416 return(0); 2417 } 2418 2419 2420 int 2421 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2422 { 2423 RF_ComponentLabel_t clabel; 2424 raidread_component_label(dev, b_vp, &clabel); 2425 clabel.mod_counter = mod_counter; 2426 clabel.clean = RF_RAID_DIRTY; 2427 raidwrite_component_label(dev, b_vp, &clabel); 2428 return(0); 2429 } 2430 2431 /* ARGSUSED */ 2432 int 2433 raidread_component_label(dev_t dev, struct vnode *b_vp, 2434 RF_ComponentLabel_t *clabel) 2435 { 2436 struct buf *bp; 2437 const struct bdevsw *bdev; 2438 int error; 2439 2440 /* XXX should probably ensure that we don't try to do this if 2441 someone has changed rf_protected_sectors. */ 2442 2443 if (b_vp == NULL) { 2444 /* For whatever reason, this component is not valid. 2445 Don't try to read a component label from it. */ 2446 return(EINVAL); 2447 } 2448 2449 /* get a block of the appropriate size... */ 2450 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2451 bp->b_dev = dev; 2452 2453 /* get our ducks in a row for the read */ 2454 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2455 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2456 bp->b_flags |= B_READ; 2457 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2458 2459 bdev = bdevsw_lookup(bp->b_dev); 2460 if (bdev == NULL) 2461 return (ENXIO); 2462 (*bdev->d_strategy)(bp); 2463 2464 error = biowait(bp); 2465 2466 if (!error) { 2467 memcpy(clabel, bp->b_data, 2468 sizeof(RF_ComponentLabel_t)); 2469 } 2470 2471 brelse(bp, 0); 2472 return(error); 2473 } 2474 /* ARGSUSED */ 2475 int 2476 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 2477 RF_ComponentLabel_t *clabel) 2478 { 2479 struct buf *bp; 2480 const struct bdevsw *bdev; 2481 int error; 2482 2483 /* get a block of the appropriate size... */ 2484 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2485 bp->b_dev = dev; 2486 2487 /* get our ducks in a row for the write */ 2488 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2489 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2490 bp->b_flags |= B_WRITE; 2491 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2492 2493 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2494 2495 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2496 2497 bdev = bdevsw_lookup(bp->b_dev); 2498 if (bdev == NULL) 2499 return (ENXIO); 2500 (*bdev->d_strategy)(bp); 2501 error = biowait(bp); 2502 brelse(bp, 0); 2503 if (error) { 2504 #if 1 2505 printf("Failed to write RAID component info!\n"); 2506 #endif 2507 } 2508 2509 return(error); 2510 } 2511 2512 void 2513 rf_markalldirty(RF_Raid_t *raidPtr) 2514 { 2515 RF_ComponentLabel_t clabel; 2516 int sparecol; 2517 int c; 2518 int j; 2519 int scol = -1; 2520 2521 raidPtr->mod_counter++; 2522 for (c = 0; c < raidPtr->numCol; c++) { 2523 /* we don't want to touch (at all) a disk that has 2524 failed */ 2525 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2526 raidread_component_label( 2527 raidPtr->Disks[c].dev, 2528 raidPtr->raid_cinfo[c].ci_vp, 2529 &clabel); 2530 if (clabel.status == rf_ds_spared) { 2531 /* XXX do something special... 2532 but whatever you do, don't 2533 try to access it!! */ 2534 } else { 2535 raidmarkdirty( 2536 raidPtr->Disks[c].dev, 2537 raidPtr->raid_cinfo[c].ci_vp, 2538 raidPtr->mod_counter); 2539 } 2540 } 2541 } 2542 2543 for( c = 0; c < raidPtr->numSpare ; c++) { 2544 sparecol = raidPtr->numCol + c; 2545 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2546 /* 2547 2548 we claim this disk is "optimal" if it's 2549 rf_ds_used_spare, as that means it should be 2550 directly substitutable for the disk it replaced. 2551 We note that too... 2552 2553 */ 2554 2555 for(j=0;j<raidPtr->numCol;j++) { 2556 if (raidPtr->Disks[j].spareCol == sparecol) { 2557 scol = j; 2558 break; 2559 } 2560 } 2561 2562 raidread_component_label( 2563 raidPtr->Disks[sparecol].dev, 2564 raidPtr->raid_cinfo[sparecol].ci_vp, 2565 &clabel); 2566 /* make sure status is noted */ 2567 2568 raid_init_component_label(raidPtr, &clabel); 2569 2570 clabel.row = 0; 2571 clabel.column = scol; 2572 /* Note: we *don't* change status from rf_ds_used_spare 2573 to rf_ds_optimal */ 2574 /* clabel.status = rf_ds_optimal; */ 2575 2576 raidmarkdirty(raidPtr->Disks[sparecol].dev, 2577 raidPtr->raid_cinfo[sparecol].ci_vp, 2578 raidPtr->mod_counter); 2579 } 2580 } 2581 } 2582 2583 2584 void 2585 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2586 { 2587 RF_ComponentLabel_t clabel; 2588 int sparecol; 2589 int c; 2590 int j; 2591 int scol; 2592 2593 scol = -1; 2594 2595 /* XXX should do extra checks to make sure things really are clean, 2596 rather than blindly setting the clean bit... */ 2597 2598 raidPtr->mod_counter++; 2599 2600 for (c = 0; c < raidPtr->numCol; c++) { 2601 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2602 raidread_component_label( 2603 raidPtr->Disks[c].dev, 2604 raidPtr->raid_cinfo[c].ci_vp, 2605 &clabel); 2606 /* make sure status is noted */ 2607 clabel.status = rf_ds_optimal; 2608 2609 /* bump the counter */ 2610 clabel.mod_counter = raidPtr->mod_counter; 2611 2612 /* note what unit we are configured as */ 2613 clabel.last_unit = raidPtr->raidid; 2614 2615 raidwrite_component_label( 2616 raidPtr->Disks[c].dev, 2617 raidPtr->raid_cinfo[c].ci_vp, 2618 &clabel); 2619 if (final == RF_FINAL_COMPONENT_UPDATE) { 2620 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2621 raidmarkclean( 2622 raidPtr->Disks[c].dev, 2623 raidPtr->raid_cinfo[c].ci_vp, 2624 raidPtr->mod_counter); 2625 } 2626 } 2627 } 2628 /* else we don't touch it.. */ 2629 } 2630 2631 for( c = 0; c < raidPtr->numSpare ; c++) { 2632 sparecol = raidPtr->numCol + c; 2633 /* Need to ensure that the reconstruct actually completed! */ 2634 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2635 /* 2636 2637 we claim this disk is "optimal" if it's 2638 rf_ds_used_spare, as that means it should be 2639 directly substitutable for the disk it replaced. 2640 We note that too... 2641 2642 */ 2643 2644 for(j=0;j<raidPtr->numCol;j++) { 2645 if (raidPtr->Disks[j].spareCol == sparecol) { 2646 scol = j; 2647 break; 2648 } 2649 } 2650 2651 /* XXX shouldn't *really* need this... */ 2652 raidread_component_label( 2653 raidPtr->Disks[sparecol].dev, 2654 raidPtr->raid_cinfo[sparecol].ci_vp, 2655 &clabel); 2656 /* make sure status is noted */ 2657 2658 raid_init_component_label(raidPtr, &clabel); 2659 2660 clabel.mod_counter = raidPtr->mod_counter; 2661 clabel.column = scol; 2662 clabel.status = rf_ds_optimal; 2663 clabel.last_unit = raidPtr->raidid; 2664 2665 raidwrite_component_label( 2666 raidPtr->Disks[sparecol].dev, 2667 raidPtr->raid_cinfo[sparecol].ci_vp, 2668 &clabel); 2669 if (final == RF_FINAL_COMPONENT_UPDATE) { 2670 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2671 raidmarkclean( raidPtr->Disks[sparecol].dev, 2672 raidPtr->raid_cinfo[sparecol].ci_vp, 2673 raidPtr->mod_counter); 2674 } 2675 } 2676 } 2677 } 2678 } 2679 2680 void 2681 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2682 { 2683 2684 if (vp != NULL) { 2685 if (auto_configured == 1) { 2686 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2687 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2688 vput(vp); 2689 2690 } else { 2691 (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred); 2692 } 2693 } 2694 } 2695 2696 2697 void 2698 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2699 { 2700 int r,c; 2701 struct vnode *vp; 2702 int acd; 2703 2704 2705 /* We take this opportunity to close the vnodes like we should.. */ 2706 2707 for (c = 0; c < raidPtr->numCol; c++) { 2708 vp = raidPtr->raid_cinfo[c].ci_vp; 2709 acd = raidPtr->Disks[c].auto_configured; 2710 rf_close_component(raidPtr, vp, acd); 2711 raidPtr->raid_cinfo[c].ci_vp = NULL; 2712 raidPtr->Disks[c].auto_configured = 0; 2713 } 2714 2715 for (r = 0; r < raidPtr->numSpare; r++) { 2716 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2717 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2718 rf_close_component(raidPtr, vp, acd); 2719 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2720 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2721 } 2722 } 2723 2724 2725 void 2726 rf_ReconThread(struct rf_recon_req *req) 2727 { 2728 int s; 2729 RF_Raid_t *raidPtr; 2730 2731 s = splbio(); 2732 raidPtr = (RF_Raid_t *) req->raidPtr; 2733 raidPtr->recon_in_progress = 1; 2734 2735 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2736 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2737 2738 RF_Free(req, sizeof(*req)); 2739 2740 raidPtr->recon_in_progress = 0; 2741 splx(s); 2742 2743 /* That's all... */ 2744 kthread_exit(0); /* does not return */ 2745 } 2746 2747 void 2748 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2749 { 2750 int retcode; 2751 int s; 2752 2753 raidPtr->parity_rewrite_stripes_done = 0; 2754 raidPtr->parity_rewrite_in_progress = 1; 2755 s = splbio(); 2756 retcode = rf_RewriteParity(raidPtr); 2757 splx(s); 2758 if (retcode) { 2759 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2760 } else { 2761 /* set the clean bit! If we shutdown correctly, 2762 the clean bit on each component label will get 2763 set */ 2764 raidPtr->parity_good = RF_RAID_CLEAN; 2765 } 2766 raidPtr->parity_rewrite_in_progress = 0; 2767 2768 /* Anyone waiting for us to stop? If so, inform them... */ 2769 if (raidPtr->waitShutdown) { 2770 wakeup(&raidPtr->parity_rewrite_in_progress); 2771 } 2772 2773 /* That's all... */ 2774 kthread_exit(0); /* does not return */ 2775 } 2776 2777 2778 void 2779 rf_CopybackThread(RF_Raid_t *raidPtr) 2780 { 2781 int s; 2782 2783 raidPtr->copyback_in_progress = 1; 2784 s = splbio(); 2785 rf_CopybackReconstructedData(raidPtr); 2786 splx(s); 2787 raidPtr->copyback_in_progress = 0; 2788 2789 /* That's all... */ 2790 kthread_exit(0); /* does not return */ 2791 } 2792 2793 2794 void 2795 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2796 { 2797 int s; 2798 RF_Raid_t *raidPtr; 2799 2800 s = splbio(); 2801 raidPtr = req->raidPtr; 2802 raidPtr->recon_in_progress = 1; 2803 rf_ReconstructInPlace(raidPtr, req->col); 2804 RF_Free(req, sizeof(*req)); 2805 raidPtr->recon_in_progress = 0; 2806 splx(s); 2807 2808 /* That's all... */ 2809 kthread_exit(0); /* does not return */ 2810 } 2811 2812 static RF_AutoConfig_t * 2813 rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp, 2814 const char *cname, RF_SectorCount_t size) 2815 { 2816 int good_one = 0; 2817 RF_ComponentLabel_t *clabel; 2818 RF_AutoConfig_t *ac; 2819 2820 clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT); 2821 if (clabel == NULL) { 2822 oomem: 2823 while(ac_list) { 2824 ac = ac_list; 2825 if (ac->clabel) 2826 free(ac->clabel, M_RAIDFRAME); 2827 ac_list = ac_list->next; 2828 free(ac, M_RAIDFRAME); 2829 } 2830 printf("RAID auto config: out of memory!\n"); 2831 return NULL; /* XXX probably should panic? */ 2832 } 2833 2834 if (!raidread_component_label(dev, vp, clabel)) { 2835 /* Got the label. Does it look reasonable? */ 2836 if (rf_reasonable_label(clabel) && 2837 (clabel->partitionSize <= size)) { 2838 #ifdef DEBUG 2839 printf("Component on: %s: %llu\n", 2840 cname, (unsigned long long)size); 2841 rf_print_component_label(clabel); 2842 #endif 2843 /* if it's reasonable, add it, else ignore it. */ 2844 ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME, 2845 M_NOWAIT); 2846 if (ac == NULL) { 2847 free(clabel, M_RAIDFRAME); 2848 goto oomem; 2849 } 2850 strlcpy(ac->devname, cname, sizeof(ac->devname)); 2851 ac->dev = dev; 2852 ac->vp = vp; 2853 ac->clabel = clabel; 2854 ac->next = ac_list; 2855 ac_list = ac; 2856 good_one = 1; 2857 } 2858 } 2859 if (!good_one) { 2860 /* cleanup */ 2861 free(clabel, M_RAIDFRAME); 2862 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2863 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2864 vput(vp); 2865 } 2866 return ac_list; 2867 } 2868 2869 RF_AutoConfig_t * 2870 rf_find_raid_components(void) 2871 { 2872 struct vnode *vp; 2873 struct disklabel label; 2874 device_t dv; 2875 dev_t dev; 2876 int bmajor, bminor, wedge; 2877 int error; 2878 int i; 2879 RF_AutoConfig_t *ac_list; 2880 2881 2882 /* initialize the AutoConfig list */ 2883 ac_list = NULL; 2884 2885 /* we begin by trolling through *all* the devices on the system */ 2886 2887 for (dv = alldevs.tqh_first; dv != NULL; 2888 dv = dv->dv_list.tqe_next) { 2889 2890 /* we are only interested in disks... */ 2891 if (device_class(dv) != DV_DISK) 2892 continue; 2893 2894 /* we don't care about floppies... */ 2895 if (device_is_a(dv, "fd")) { 2896 continue; 2897 } 2898 2899 /* we don't care about CD's... */ 2900 if (device_is_a(dv, "cd")) { 2901 continue; 2902 } 2903 2904 /* we don't care about md's... */ 2905 if (device_is_a(dv, "md")) { 2906 continue; 2907 } 2908 2909 /* hdfd is the Atari/Hades floppy driver */ 2910 if (device_is_a(dv, "hdfd")) { 2911 continue; 2912 } 2913 2914 /* fdisa is the Atari/Milan floppy driver */ 2915 if (device_is_a(dv, "fdisa")) { 2916 continue; 2917 } 2918 2919 /* need to find the device_name_to_block_device_major stuff */ 2920 bmajor = devsw_name2blk(device_xname(dv), NULL, 0); 2921 2922 /* get a vnode for the raw partition of this disk */ 2923 2924 wedge = device_is_a(dv, "dk"); 2925 bminor = minor(device_unit(dv)); 2926 dev = wedge ? makedev(bmajor, bminor) : 2927 MAKEDISKDEV(bmajor, bminor, RAW_PART); 2928 if (bdevvp(dev, &vp)) 2929 panic("RAID can't alloc vnode"); 2930 2931 error = VOP_OPEN(vp, FREAD, NOCRED); 2932 2933 if (error) { 2934 /* "Who cares." Continue looking 2935 for something that exists*/ 2936 vput(vp); 2937 continue; 2938 } 2939 2940 if (wedge) { 2941 struct dkwedge_info dkw; 2942 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, 2943 NOCRED); 2944 if (error) { 2945 printf("RAIDframe: can't get wedge info for " 2946 "dev %s (%d)\n", device_xname(dv), error); 2947 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2948 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2949 vput(vp); 2950 continue; 2951 } 2952 2953 if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) { 2954 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2955 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2956 vput(vp); 2957 continue; 2958 } 2959 2960 ac_list = rf_get_component(ac_list, dev, vp, 2961 device_xname(dv), dkw.dkw_size); 2962 continue; 2963 } 2964 2965 /* Ok, the disk exists. Go get the disklabel. */ 2966 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED); 2967 if (error) { 2968 /* 2969 * XXX can't happen - open() would 2970 * have errored out (or faked up one) 2971 */ 2972 if (error != ENOTTY) 2973 printf("RAIDframe: can't get label for dev " 2974 "%s (%d)\n", device_xname(dv), error); 2975 } 2976 2977 /* don't need this any more. We'll allocate it again 2978 a little later if we really do... */ 2979 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2980 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED); 2981 vput(vp); 2982 2983 if (error) 2984 continue; 2985 2986 for (i = 0; i < label.d_npartitions; i++) { 2987 char cname[sizeof(ac_list->devname)]; 2988 2989 /* We only support partitions marked as RAID */ 2990 if (label.d_partitions[i].p_fstype != FS_RAID) 2991 continue; 2992 2993 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 2994 if (bdevvp(dev, &vp)) 2995 panic("RAID can't alloc vnode"); 2996 2997 error = VOP_OPEN(vp, FREAD, NOCRED); 2998 if (error) { 2999 /* Whatever... */ 3000 vput(vp); 3001 continue; 3002 } 3003 snprintf(cname, sizeof(cname), "%s%c", 3004 device_xname(dv), 'a' + i); 3005 ac_list = rf_get_component(ac_list, dev, vp, cname, 3006 label.d_partitions[i].p_size); 3007 } 3008 } 3009 return ac_list; 3010 } 3011 3012 3013 static int 3014 rf_reasonable_label(RF_ComponentLabel_t *clabel) 3015 { 3016 3017 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 3018 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 3019 ((clabel->clean == RF_RAID_CLEAN) || 3020 (clabel->clean == RF_RAID_DIRTY)) && 3021 clabel->row >=0 && 3022 clabel->column >= 0 && 3023 clabel->num_rows > 0 && 3024 clabel->num_columns > 0 && 3025 clabel->row < clabel->num_rows && 3026 clabel->column < clabel->num_columns && 3027 clabel->blockSize > 0 && 3028 clabel->numBlocks > 0) { 3029 /* label looks reasonable enough... */ 3030 return(1); 3031 } 3032 return(0); 3033 } 3034 3035 3036 #ifdef DEBUG 3037 void 3038 rf_print_component_label(RF_ComponentLabel_t *clabel) 3039 { 3040 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 3041 clabel->row, clabel->column, 3042 clabel->num_rows, clabel->num_columns); 3043 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 3044 clabel->version, clabel->serial_number, 3045 clabel->mod_counter); 3046 printf(" Clean: %s Status: %d\n", 3047 clabel->clean ? "Yes" : "No", clabel->status ); 3048 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 3049 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 3050 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 3051 (char) clabel->parityConfig, clabel->blockSize, 3052 clabel->numBlocks); 3053 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 3054 printf(" Contains root partition: %s\n", 3055 clabel->root_partition ? "Yes" : "No" ); 3056 printf(" Last configured as: raid%d\n", clabel->last_unit ); 3057 #if 0 3058 printf(" Config order: %d\n", clabel->config_order); 3059 #endif 3060 3061 } 3062 #endif 3063 3064 RF_ConfigSet_t * 3065 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 3066 { 3067 RF_AutoConfig_t *ac; 3068 RF_ConfigSet_t *config_sets; 3069 RF_ConfigSet_t *cset; 3070 RF_AutoConfig_t *ac_next; 3071 3072 3073 config_sets = NULL; 3074 3075 /* Go through the AutoConfig list, and figure out which components 3076 belong to what sets. */ 3077 ac = ac_list; 3078 while(ac!=NULL) { 3079 /* we're going to putz with ac->next, so save it here 3080 for use at the end of the loop */ 3081 ac_next = ac->next; 3082 3083 if (config_sets == NULL) { 3084 /* will need at least this one... */ 3085 config_sets = (RF_ConfigSet_t *) 3086 malloc(sizeof(RF_ConfigSet_t), 3087 M_RAIDFRAME, M_NOWAIT); 3088 if (config_sets == NULL) { 3089 panic("rf_create_auto_sets: No memory!"); 3090 } 3091 /* this one is easy :) */ 3092 config_sets->ac = ac; 3093 config_sets->next = NULL; 3094 config_sets->rootable = 0; 3095 ac->next = NULL; 3096 } else { 3097 /* which set does this component fit into? */ 3098 cset = config_sets; 3099 while(cset!=NULL) { 3100 if (rf_does_it_fit(cset, ac)) { 3101 /* looks like it matches... */ 3102 ac->next = cset->ac; 3103 cset->ac = ac; 3104 break; 3105 } 3106 cset = cset->next; 3107 } 3108 if (cset==NULL) { 3109 /* didn't find a match above... new set..*/ 3110 cset = (RF_ConfigSet_t *) 3111 malloc(sizeof(RF_ConfigSet_t), 3112 M_RAIDFRAME, M_NOWAIT); 3113 if (cset == NULL) { 3114 panic("rf_create_auto_sets: No memory!"); 3115 } 3116 cset->ac = ac; 3117 ac->next = NULL; 3118 cset->next = config_sets; 3119 cset->rootable = 0; 3120 config_sets = cset; 3121 } 3122 } 3123 ac = ac_next; 3124 } 3125 3126 3127 return(config_sets); 3128 } 3129 3130 static int 3131 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 3132 { 3133 RF_ComponentLabel_t *clabel1, *clabel2; 3134 3135 /* If this one matches the *first* one in the set, that's good 3136 enough, since the other members of the set would have been 3137 through here too... */ 3138 /* note that we are not checking partitionSize here.. 3139 3140 Note that we are also not checking the mod_counters here. 3141 If everything else matches execpt the mod_counter, that's 3142 good enough for this test. We will deal with the mod_counters 3143 a little later in the autoconfiguration process. 3144 3145 (clabel1->mod_counter == clabel2->mod_counter) && 3146 3147 The reason we don't check for this is that failed disks 3148 will have lower modification counts. If those disks are 3149 not added to the set they used to belong to, then they will 3150 form their own set, which may result in 2 different sets, 3151 for example, competing to be configured at raid0, and 3152 perhaps competing to be the root filesystem set. If the 3153 wrong ones get configured, or both attempt to become /, 3154 weird behaviour and or serious lossage will occur. Thus we 3155 need to bring them into the fold here, and kick them out at 3156 a later point. 3157 3158 */ 3159 3160 clabel1 = cset->ac->clabel; 3161 clabel2 = ac->clabel; 3162 if ((clabel1->version == clabel2->version) && 3163 (clabel1->serial_number == clabel2->serial_number) && 3164 (clabel1->num_rows == clabel2->num_rows) && 3165 (clabel1->num_columns == clabel2->num_columns) && 3166 (clabel1->sectPerSU == clabel2->sectPerSU) && 3167 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 3168 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 3169 (clabel1->parityConfig == clabel2->parityConfig) && 3170 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 3171 (clabel1->blockSize == clabel2->blockSize) && 3172 (clabel1->numBlocks == clabel2->numBlocks) && 3173 (clabel1->autoconfigure == clabel2->autoconfigure) && 3174 (clabel1->root_partition == clabel2->root_partition) && 3175 (clabel1->last_unit == clabel2->last_unit) && 3176 (clabel1->config_order == clabel2->config_order)) { 3177 /* if it get's here, it almost *has* to be a match */ 3178 } else { 3179 /* it's not consistent with somebody in the set.. 3180 punt */ 3181 return(0); 3182 } 3183 /* all was fine.. it must fit... */ 3184 return(1); 3185 } 3186 3187 int 3188 rf_have_enough_components(RF_ConfigSet_t *cset) 3189 { 3190 RF_AutoConfig_t *ac; 3191 RF_AutoConfig_t *auto_config; 3192 RF_ComponentLabel_t *clabel; 3193 int c; 3194 int num_cols; 3195 int num_missing; 3196 int mod_counter; 3197 int mod_counter_found; 3198 int even_pair_failed; 3199 char parity_type; 3200 3201 3202 /* check to see that we have enough 'live' components 3203 of this set. If so, we can configure it if necessary */ 3204 3205 num_cols = cset->ac->clabel->num_columns; 3206 parity_type = cset->ac->clabel->parityConfig; 3207 3208 /* XXX Check for duplicate components!?!?!? */ 3209 3210 /* Determine what the mod_counter is supposed to be for this set. */ 3211 3212 mod_counter_found = 0; 3213 mod_counter = 0; 3214 ac = cset->ac; 3215 while(ac!=NULL) { 3216 if (mod_counter_found==0) { 3217 mod_counter = ac->clabel->mod_counter; 3218 mod_counter_found = 1; 3219 } else { 3220 if (ac->clabel->mod_counter > mod_counter) { 3221 mod_counter = ac->clabel->mod_counter; 3222 } 3223 } 3224 ac = ac->next; 3225 } 3226 3227 num_missing = 0; 3228 auto_config = cset->ac; 3229 3230 even_pair_failed = 0; 3231 for(c=0; c<num_cols; c++) { 3232 ac = auto_config; 3233 while(ac!=NULL) { 3234 if ((ac->clabel->column == c) && 3235 (ac->clabel->mod_counter == mod_counter)) { 3236 /* it's this one... */ 3237 #ifdef DEBUG 3238 printf("Found: %s at %d\n", 3239 ac->devname,c); 3240 #endif 3241 break; 3242 } 3243 ac=ac->next; 3244 } 3245 if (ac==NULL) { 3246 /* Didn't find one here! */ 3247 /* special case for RAID 1, especially 3248 where there are more than 2 3249 components (where RAIDframe treats 3250 things a little differently :( ) */ 3251 if (parity_type == '1') { 3252 if (c%2 == 0) { /* even component */ 3253 even_pair_failed = 1; 3254 } else { /* odd component. If 3255 we're failed, and 3256 so is the even 3257 component, it's 3258 "Good Night, Charlie" */ 3259 if (even_pair_failed == 1) { 3260 return(0); 3261 } 3262 } 3263 } else { 3264 /* normal accounting */ 3265 num_missing++; 3266 } 3267 } 3268 if ((parity_type == '1') && (c%2 == 1)) { 3269 /* Just did an even component, and we didn't 3270 bail.. reset the even_pair_failed flag, 3271 and go on to the next component.... */ 3272 even_pair_failed = 0; 3273 } 3274 } 3275 3276 clabel = cset->ac->clabel; 3277 3278 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3279 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3280 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3281 /* XXX this needs to be made *much* more general */ 3282 /* Too many failures */ 3283 return(0); 3284 } 3285 /* otherwise, all is well, and we've got enough to take a kick 3286 at autoconfiguring this set */ 3287 return(1); 3288 } 3289 3290 void 3291 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3292 RF_Raid_t *raidPtr) 3293 { 3294 RF_ComponentLabel_t *clabel; 3295 int i; 3296 3297 clabel = ac->clabel; 3298 3299 /* 1. Fill in the common stuff */ 3300 config->numRow = clabel->num_rows = 1; 3301 config->numCol = clabel->num_columns; 3302 config->numSpare = 0; /* XXX should this be set here? */ 3303 config->sectPerSU = clabel->sectPerSU; 3304 config->SUsPerPU = clabel->SUsPerPU; 3305 config->SUsPerRU = clabel->SUsPerRU; 3306 config->parityConfig = clabel->parityConfig; 3307 /* XXX... */ 3308 strcpy(config->diskQueueType,"fifo"); 3309 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3310 config->layoutSpecificSize = 0; /* XXX ?? */ 3311 3312 while(ac!=NULL) { 3313 /* row/col values will be in range due to the checks 3314 in reasonable_label() */ 3315 strcpy(config->devnames[0][ac->clabel->column], 3316 ac->devname); 3317 ac = ac->next; 3318 } 3319 3320 for(i=0;i<RF_MAXDBGV;i++) { 3321 config->debugVars[i][0] = 0; 3322 } 3323 } 3324 3325 int 3326 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3327 { 3328 RF_ComponentLabel_t clabel; 3329 struct vnode *vp; 3330 dev_t dev; 3331 int column; 3332 int sparecol; 3333 3334 raidPtr->autoconfigure = new_value; 3335 3336 for(column=0; column<raidPtr->numCol; column++) { 3337 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3338 dev = raidPtr->Disks[column].dev; 3339 vp = raidPtr->raid_cinfo[column].ci_vp; 3340 raidread_component_label(dev, vp, &clabel); 3341 clabel.autoconfigure = new_value; 3342 raidwrite_component_label(dev, vp, &clabel); 3343 } 3344 } 3345 for(column = 0; column < raidPtr->numSpare ; column++) { 3346 sparecol = raidPtr->numCol + column; 3347 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3348 dev = raidPtr->Disks[sparecol].dev; 3349 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3350 raidread_component_label(dev, vp, &clabel); 3351 clabel.autoconfigure = new_value; 3352 raidwrite_component_label(dev, vp, &clabel); 3353 } 3354 } 3355 return(new_value); 3356 } 3357 3358 int 3359 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3360 { 3361 RF_ComponentLabel_t clabel; 3362 struct vnode *vp; 3363 dev_t dev; 3364 int column; 3365 int sparecol; 3366 3367 raidPtr->root_partition = new_value; 3368 for(column=0; column<raidPtr->numCol; column++) { 3369 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3370 dev = raidPtr->Disks[column].dev; 3371 vp = raidPtr->raid_cinfo[column].ci_vp; 3372 raidread_component_label(dev, vp, &clabel); 3373 clabel.root_partition = new_value; 3374 raidwrite_component_label(dev, vp, &clabel); 3375 } 3376 } 3377 for(column = 0; column < raidPtr->numSpare ; column++) { 3378 sparecol = raidPtr->numCol + column; 3379 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3380 dev = raidPtr->Disks[sparecol].dev; 3381 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3382 raidread_component_label(dev, vp, &clabel); 3383 clabel.root_partition = new_value; 3384 raidwrite_component_label(dev, vp, &clabel); 3385 } 3386 } 3387 return(new_value); 3388 } 3389 3390 void 3391 rf_release_all_vps(RF_ConfigSet_t *cset) 3392 { 3393 RF_AutoConfig_t *ac; 3394 3395 ac = cset->ac; 3396 while(ac!=NULL) { 3397 /* Close the vp, and give it back */ 3398 if (ac->vp) { 3399 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3400 VOP_CLOSE(ac->vp, FREAD, NOCRED); 3401 vput(ac->vp); 3402 ac->vp = NULL; 3403 } 3404 ac = ac->next; 3405 } 3406 } 3407 3408 3409 void 3410 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3411 { 3412 RF_AutoConfig_t *ac; 3413 RF_AutoConfig_t *next_ac; 3414 3415 ac = cset->ac; 3416 while(ac!=NULL) { 3417 next_ac = ac->next; 3418 /* nuke the label */ 3419 free(ac->clabel, M_RAIDFRAME); 3420 /* cleanup the config structure */ 3421 free(ac, M_RAIDFRAME); 3422 /* "next.." */ 3423 ac = next_ac; 3424 } 3425 /* and, finally, nuke the config set */ 3426 free(cset, M_RAIDFRAME); 3427 } 3428 3429 3430 void 3431 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3432 { 3433 /* current version number */ 3434 clabel->version = RF_COMPONENT_LABEL_VERSION; 3435 clabel->serial_number = raidPtr->serial_number; 3436 clabel->mod_counter = raidPtr->mod_counter; 3437 clabel->num_rows = 1; 3438 clabel->num_columns = raidPtr->numCol; 3439 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3440 clabel->status = rf_ds_optimal; /* "It's good!" */ 3441 3442 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3443 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3444 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3445 3446 clabel->blockSize = raidPtr->bytesPerSector; 3447 clabel->numBlocks = raidPtr->sectorsPerDisk; 3448 3449 /* XXX not portable */ 3450 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3451 clabel->maxOutstanding = raidPtr->maxOutstanding; 3452 clabel->autoconfigure = raidPtr->autoconfigure; 3453 clabel->root_partition = raidPtr->root_partition; 3454 clabel->last_unit = raidPtr->raidid; 3455 clabel->config_order = raidPtr->config_order; 3456 } 3457 3458 int 3459 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3460 { 3461 RF_Raid_t *raidPtr; 3462 RF_Config_t *config; 3463 int raidID; 3464 int retcode; 3465 3466 #ifdef DEBUG 3467 printf("RAID autoconfigure\n"); 3468 #endif 3469 3470 retcode = 0; 3471 *unit = -1; 3472 3473 /* 1. Create a config structure */ 3474 3475 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3476 M_RAIDFRAME, 3477 M_NOWAIT); 3478 if (config==NULL) { 3479 printf("Out of mem!?!?\n"); 3480 /* XXX do something more intelligent here. */ 3481 return(1); 3482 } 3483 3484 memset(config, 0, sizeof(RF_Config_t)); 3485 3486 /* 3487 2. Figure out what RAID ID this one is supposed to live at 3488 See if we can get the same RAID dev that it was configured 3489 on last time.. 3490 */ 3491 3492 raidID = cset->ac->clabel->last_unit; 3493 if ((raidID < 0) || (raidID >= numraid)) { 3494 /* let's not wander off into lala land. */ 3495 raidID = numraid - 1; 3496 } 3497 if (raidPtrs[raidID]->valid != 0) { 3498 3499 /* 3500 Nope... Go looking for an alternative... 3501 Start high so we don't immediately use raid0 if that's 3502 not taken. 3503 */ 3504 3505 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3506 if (raidPtrs[raidID]->valid == 0) { 3507 /* can use this one! */ 3508 break; 3509 } 3510 } 3511 } 3512 3513 if (raidID < 0) { 3514 /* punt... */ 3515 printf("Unable to auto configure this set!\n"); 3516 printf("(Out of RAID devs!)\n"); 3517 free(config, M_RAIDFRAME); 3518 return(1); 3519 } 3520 3521 #ifdef DEBUG 3522 printf("Configuring raid%d:\n",raidID); 3523 #endif 3524 3525 raidPtr = raidPtrs[raidID]; 3526 3527 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3528 raidPtr->raidid = raidID; 3529 raidPtr->openings = RAIDOUTSTANDING; 3530 3531 /* 3. Build the configuration structure */ 3532 rf_create_configuration(cset->ac, config, raidPtr); 3533 3534 /* 4. Do the configuration */ 3535 retcode = rf_Configure(raidPtr, config, cset->ac); 3536 3537 if (retcode == 0) { 3538 3539 raidinit(raidPtrs[raidID]); 3540 3541 rf_markalldirty(raidPtrs[raidID]); 3542 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3543 if (cset->ac->clabel->root_partition==1) { 3544 /* everything configured just fine. Make a note 3545 that this set is eligible to be root. */ 3546 cset->rootable = 1; 3547 /* XXX do this here? */ 3548 raidPtrs[raidID]->root_partition = 1; 3549 } 3550 } 3551 3552 /* 5. Cleanup */ 3553 free(config, M_RAIDFRAME); 3554 3555 *unit = raidID; 3556 return(retcode); 3557 } 3558 3559 void 3560 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3561 { 3562 struct buf *bp; 3563 3564 bp = (struct buf *)desc->bp; 3565 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3566 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3567 } 3568 3569 void 3570 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3571 size_t xmin, size_t xmax) 3572 { 3573 pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO); 3574 pool_sethiwat(p, xmax); 3575 pool_prime(p, xmin); 3576 pool_setlowat(p, xmin); 3577 } 3578 3579 /* 3580 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see 3581 * if there is IO pending and if that IO could possibly be done for a 3582 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3583 * otherwise. 3584 * 3585 */ 3586 3587 int 3588 rf_buf_queue_check(int raidid) 3589 { 3590 if ((bufq_peek(raid_softc[raidid].buf_queue) != NULL) && 3591 raidPtrs[raidid]->openings > 0) { 3592 /* there is work to do */ 3593 return 0; 3594 } 3595 /* default is nothing to do */ 3596 return 1; 3597 } 3598 3599 int 3600 rf_getdisksize(struct vnode *vp, struct lwp *l, RF_RaidDisk_t *diskPtr) 3601 { 3602 struct partinfo dpart; 3603 struct dkwedge_info dkw; 3604 int error; 3605 3606 error = VOP_IOCTL(vp, DIOCGPART, &dpart, FREAD, l->l_cred); 3607 if (error == 0) { 3608 diskPtr->blockSize = dpart.disklab->d_secsize; 3609 diskPtr->numBlocks = dpart.part->p_size - rf_protectedSectors; 3610 diskPtr->partitionSize = dpart.part->p_size; 3611 return 0; 3612 } 3613 3614 error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD, l->l_cred); 3615 if (error == 0) { 3616 diskPtr->blockSize = 512; /* XXX */ 3617 diskPtr->numBlocks = dkw.dkw_size - rf_protectedSectors; 3618 diskPtr->partitionSize = dkw.dkw_size; 3619 return 0; 3620 } 3621 return error; 3622 } 3623 3624 static int 3625 raid_match(device_t self, cfdata_t cfdata, void *aux) 3626 { 3627 return 1; 3628 } 3629 3630 static void 3631 raid_attach(device_t parent, device_t self, void *aux) 3632 { 3633 3634 } 3635 3636 3637 static int 3638 raid_detach(device_t self, int flags) 3639 { 3640 int error; 3641 struct raid_softc *rs = &raid_softc[device_unit(self)]; 3642 3643 if ((error = raidlock(rs)) != 0) 3644 return (error); 3645 3646 error = raid_detach_unlocked(rs); 3647 3648 raidunlock(rs); 3649 3650 return error; 3651 } 3652 3653 static void 3654 rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr) 3655 { 3656 prop_dictionary_t disk_info, odisk_info, geom; 3657 disk_info = prop_dictionary_create(); 3658 geom = prop_dictionary_create(); 3659 prop_dictionary_set_uint64(geom, "sectors-per-unit", 3660 raidPtr->totalSectors); 3661 prop_dictionary_set_uint32(geom, "sector-size", 3662 raidPtr->bytesPerSector); 3663 3664 prop_dictionary_set_uint16(geom, "sectors-per-track", 3665 raidPtr->Layout.dataSectorsPerStripe); 3666 prop_dictionary_set_uint16(geom, "tracks-per-cylinder", 3667 4 * raidPtr->numCol); 3668 3669 prop_dictionary_set_uint64(geom, "cylinders-per-unit", 3670 raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe * 3671 (4 * raidPtr->numCol))); 3672 3673 prop_dictionary_set(disk_info, "geometry", geom); 3674 prop_object_release(geom); 3675 prop_dictionary_set(device_properties(rs->sc_dev), 3676 "disk-info", disk_info); 3677 odisk_info = rs->sc_dkdev.dk_info; 3678 rs->sc_dkdev.dk_info = disk_info; 3679 if (odisk_info) 3680 prop_object_release(odisk_info); 3681 } 3682 3683 /* 3684 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components. 3685 * We end up returning whatever error was returned by the first cache flush 3686 * that fails. 3687 */ 3688 3689 static int 3690 rf_sync_component_caches(RF_Raid_t *raidPtr) 3691 { 3692 int c, sparecol; 3693 int e,error; 3694 int force = 1; 3695 3696 error = 0; 3697 for (c = 0; c < raidPtr->numCol; c++) { 3698 if (raidPtr->Disks[c].status == rf_ds_optimal) { 3699 e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC, 3700 &force, FWRITE, NOCRED); 3701 if (e) { 3702 if (e != ENODEV) 3703 printf("raid%d: cache flush to component %s failed.\n", 3704 raidPtr->raidid, raidPtr->Disks[c].devname); 3705 if (error == 0) { 3706 error = e; 3707 } 3708 } 3709 } 3710 } 3711 3712 for( c = 0; c < raidPtr->numSpare ; c++) { 3713 sparecol = raidPtr->numCol + c; 3714 /* Need to ensure that the reconstruct actually completed! */ 3715 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3716 e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp, 3717 DIOCCACHESYNC, &force, FWRITE, NOCRED); 3718 if (e) { 3719 if (e != ENODEV) 3720 printf("raid%d: cache flush to component %s failed.\n", 3721 raidPtr->raidid, raidPtr->Disks[sparecol].devname); 3722 if (error == 0) { 3723 error = e; 3724 } 3725 } 3726 } 3727 } 3728 return error; 3729 } 3730