1 /* $NetBSD: rf_netbsdkintf.c,v 1.209 2006/06/12 22:49:35 oster Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 71 * 72 * @(#)cd.c 8.2 (Berkeley) 11/16/93 73 */ 74 75 /* 76 * Copyright (c) 1988 University of Utah. 77 * 78 * This code is derived from software contributed to Berkeley by 79 * the Systems Programming Group of the University of Utah Computer 80 * Science Department. 81 * 82 * Redistribution and use in source and binary forms, with or without 83 * modification, are permitted provided that the following conditions 84 * are met: 85 * 1. Redistributions of source code must retain the above copyright 86 * notice, this list of conditions and the following disclaimer. 87 * 2. Redistributions in binary form must reproduce the above copyright 88 * notice, this list of conditions and the following disclaimer in the 89 * documentation and/or other materials provided with the distribution. 90 * 3. All advertising materials mentioning features or use of this software 91 * must display the following acknowledgement: 92 * This product includes software developed by the University of 93 * California, Berkeley and its contributors. 94 * 4. Neither the name of the University nor the names of its contributors 95 * may be used to endorse or promote products derived from this software 96 * without specific prior written permission. 97 * 98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 108 * SUCH DAMAGE. 109 * 110 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 111 * 112 * @(#)cd.c 8.2 (Berkeley) 11/16/93 113 */ 114 115 /* 116 * Copyright (c) 1995 Carnegie-Mellon University. 117 * All rights reserved. 118 * 119 * Authors: Mark Holland, Jim Zelenka 120 * 121 * Permission to use, copy, modify and distribute this software and 122 * its documentation is hereby granted, provided that both the copyright 123 * notice and this permission notice appear in all copies of the 124 * software, derivative works or modified versions, and any portions 125 * thereof, and that both notices appear in supporting documentation. 126 * 127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 130 * 131 * Carnegie Mellon requests users of this software to return to 132 * 133 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 134 * School of Computer Science 135 * Carnegie Mellon University 136 * Pittsburgh PA 15213-3890 137 * 138 * any improvements or extensions that they make and grant Carnegie the 139 * rights to redistribute these changes. 140 */ 141 142 /*********************************************************** 143 * 144 * rf_kintf.c -- the kernel interface routines for RAIDframe 145 * 146 ***********************************************************/ 147 148 #include <sys/cdefs.h> 149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.209 2006/06/12 22:49:35 oster Exp $"); 150 151 #include <sys/param.h> 152 #include <sys/errno.h> 153 #include <sys/pool.h> 154 #include <sys/proc.h> 155 #include <sys/queue.h> 156 #include <sys/disk.h> 157 #include <sys/device.h> 158 #include <sys/stat.h> 159 #include <sys/ioctl.h> 160 #include <sys/fcntl.h> 161 #include <sys/systm.h> 162 #include <sys/namei.h> 163 #include <sys/vnode.h> 164 #include <sys/disklabel.h> 165 #include <sys/conf.h> 166 #include <sys/lock.h> 167 #include <sys/buf.h> 168 #include <sys/bufq.h> 169 #include <sys/user.h> 170 #include <sys/reboot.h> 171 #include <sys/kauth.h> 172 173 #include <dev/raidframe/raidframevar.h> 174 #include <dev/raidframe/raidframeio.h> 175 #include "raid.h" 176 #include "opt_raid_autoconfig.h" 177 #include "rf_raid.h" 178 #include "rf_copyback.h" 179 #include "rf_dag.h" 180 #include "rf_dagflags.h" 181 #include "rf_desc.h" 182 #include "rf_diskqueue.h" 183 #include "rf_etimer.h" 184 #include "rf_general.h" 185 #include "rf_kintf.h" 186 #include "rf_options.h" 187 #include "rf_driver.h" 188 #include "rf_parityscan.h" 189 #include "rf_threadstuff.h" 190 191 #ifdef DEBUG 192 int rf_kdebug_level = 0; 193 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 194 #else /* DEBUG */ 195 #define db1_printf(a) { } 196 #endif /* DEBUG */ 197 198 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 199 200 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 201 202 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 203 * spare table */ 204 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 205 * installation process */ 206 207 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 208 209 /* prototypes */ 210 static void KernelWakeupFunc(struct buf *); 211 static void InitBP(struct buf *, struct vnode *, unsigned, 212 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *), 213 void *, int, struct proc *); 214 static void raidinit(RF_Raid_t *); 215 216 void raidattach(int); 217 218 dev_type_open(raidopen); 219 dev_type_close(raidclose); 220 dev_type_read(raidread); 221 dev_type_write(raidwrite); 222 dev_type_ioctl(raidioctl); 223 dev_type_strategy(raidstrategy); 224 dev_type_dump(raiddump); 225 dev_type_size(raidsize); 226 227 const struct bdevsw raid_bdevsw = { 228 raidopen, raidclose, raidstrategy, raidioctl, 229 raiddump, raidsize, D_DISK 230 }; 231 232 const struct cdevsw raid_cdevsw = { 233 raidopen, raidclose, raidread, raidwrite, raidioctl, 234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 235 }; 236 237 /* XXX Not sure if the following should be replacing the raidPtrs above, 238 or if it should be used in conjunction with that... 239 */ 240 241 struct raid_softc { 242 int sc_flags; /* flags */ 243 int sc_cflags; /* configuration flags */ 244 size_t sc_size; /* size of the raid device */ 245 char sc_xname[20]; /* XXX external name */ 246 struct disk sc_dkdev; /* generic disk device info */ 247 struct bufq_state *buf_queue; /* used for the device queue */ 248 }; 249 /* sc_flags */ 250 #define RAIDF_INITED 0x01 /* unit has been initialized */ 251 #define RAIDF_WLABEL 0x02 /* label area is writable */ 252 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 253 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 254 #define RAIDF_LOCKED 0x80 /* unit is locked */ 255 256 #define raidunit(x) DISKUNIT(x) 257 int numraid = 0; 258 259 extern struct cfdriver raid_cd; 260 261 /* 262 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 263 * Be aware that large numbers can allow the driver to consume a lot of 264 * kernel memory, especially on writes, and in degraded mode reads. 265 * 266 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 267 * a single 64K write will typically require 64K for the old data, 268 * 64K for the old parity, and 64K for the new parity, for a total 269 * of 192K (if the parity buffer is not re-used immediately). 270 * Even it if is used immediately, that's still 128K, which when multiplied 271 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 272 * 273 * Now in degraded mode, for example, a 64K read on the above setup may 274 * require data reconstruction, which will require *all* of the 4 remaining 275 * disks to participate -- 4 * 32K/disk == 128K again. 276 */ 277 278 #ifndef RAIDOUTSTANDING 279 #define RAIDOUTSTANDING 6 280 #endif 281 282 #define RAIDLABELDEV(dev) \ 283 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 284 285 /* declared here, and made public, for the benefit of KVM stuff.. */ 286 struct raid_softc *raid_softc; 287 288 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 289 struct disklabel *); 290 static void raidgetdisklabel(dev_t); 291 static void raidmakedisklabel(struct raid_softc *); 292 293 static int raidlock(struct raid_softc *); 294 static void raidunlock(struct raid_softc *); 295 296 static void rf_markalldirty(RF_Raid_t *); 297 298 struct device *raidrootdev; 299 300 void rf_ReconThread(struct rf_recon_req *); 301 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 302 void rf_CopybackThread(RF_Raid_t *raidPtr); 303 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 304 int rf_autoconfig(struct device *self); 305 void rf_buildroothack(RF_ConfigSet_t *); 306 307 RF_AutoConfig_t *rf_find_raid_components(void); 308 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 309 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 310 static int rf_reasonable_label(RF_ComponentLabel_t *); 311 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 312 int rf_set_autoconfig(RF_Raid_t *, int); 313 int rf_set_rootpartition(RF_Raid_t *, int); 314 void rf_release_all_vps(RF_ConfigSet_t *); 315 void rf_cleanup_config_set(RF_ConfigSet_t *); 316 int rf_have_enough_components(RF_ConfigSet_t *); 317 int rf_auto_config_set(RF_ConfigSet_t *, int *); 318 319 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 320 allow autoconfig to take place. 321 Note that this is overridden by having 322 RAID_AUTOCONFIG as an option in the 323 kernel config file. */ 324 325 struct RF_Pools_s rf_pools; 326 327 void 328 raidattach(int num) 329 { 330 int raidID; 331 int i, rc; 332 333 #ifdef DEBUG 334 printf("raidattach: Asked for %d units\n", num); 335 #endif 336 337 if (num <= 0) { 338 #ifdef DIAGNOSTIC 339 panic("raidattach: count <= 0"); 340 #endif 341 return; 342 } 343 /* This is where all the initialization stuff gets done. */ 344 345 numraid = num; 346 347 /* Make some space for requested number of units... */ 348 349 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 350 if (raidPtrs == NULL) { 351 panic("raidPtrs is NULL!!"); 352 } 353 354 rf_mutex_init(&rf_sparet_wait_mutex); 355 356 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 357 358 for (i = 0; i < num; i++) 359 raidPtrs[i] = NULL; 360 rc = rf_BootRaidframe(); 361 if (rc == 0) 362 printf("Kernelized RAIDframe activated\n"); 363 else 364 panic("Serious error booting RAID!!"); 365 366 /* put together some datastructures like the CCD device does.. This 367 * lets us lock the device and what-not when it gets opened. */ 368 369 raid_softc = (struct raid_softc *) 370 malloc(num * sizeof(struct raid_softc), 371 M_RAIDFRAME, M_NOWAIT); 372 if (raid_softc == NULL) { 373 printf("WARNING: no memory for RAIDframe driver\n"); 374 return; 375 } 376 377 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 378 379 raidrootdev = (struct device *)malloc(num * sizeof(struct device), 380 M_RAIDFRAME, M_NOWAIT); 381 if (raidrootdev == NULL) { 382 panic("No memory for RAIDframe driver!!?!?!"); 383 } 384 385 for (raidID = 0; raidID < num; raidID++) { 386 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0); 387 pseudo_disk_init(&raid_softc[raidID].sc_dkdev); 388 389 /* XXXJRT Should use config_attach_pseudo() */ 390 391 raidrootdev[raidID].dv_class = DV_DISK; 392 raidrootdev[raidID].dv_cfdata = NULL; 393 raidrootdev[raidID].dv_unit = raidID; 394 raidrootdev[raidID].dv_parent = NULL; 395 raidrootdev[raidID].dv_flags = 0; 396 raidrootdev[raidID].dv_cfdriver = &raid_cd; 397 snprintf(raidrootdev[raidID].dv_xname, 398 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID); 399 400 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 401 (RF_Raid_t *)); 402 if (raidPtrs[raidID] == NULL) { 403 printf("WARNING: raidPtrs[%d] is NULL\n", raidID); 404 numraid = raidID; 405 return; 406 } 407 } 408 409 #ifdef RAID_AUTOCONFIG 410 raidautoconfig = 1; 411 #endif 412 413 /* 414 * Register a finalizer which will be used to auto-config RAID 415 * sets once all real hardware devices have been found. 416 */ 417 if (config_finalize_register(NULL, rf_autoconfig) != 0) 418 printf("WARNING: unable to register RAIDframe finalizer\n"); 419 } 420 421 int 422 rf_autoconfig(struct device *self) 423 { 424 RF_AutoConfig_t *ac_list; 425 RF_ConfigSet_t *config_sets; 426 427 if (raidautoconfig == 0) 428 return (0); 429 430 /* XXX This code can only be run once. */ 431 raidautoconfig = 0; 432 433 /* 1. locate all RAID components on the system */ 434 #ifdef DEBUG 435 printf("Searching for RAID components...\n"); 436 #endif 437 ac_list = rf_find_raid_components(); 438 439 /* 2. Sort them into their respective sets. */ 440 config_sets = rf_create_auto_sets(ac_list); 441 442 /* 443 * 3. Evaluate each set andconfigure the valid ones. 444 * This gets done in rf_buildroothack(). 445 */ 446 rf_buildroothack(config_sets); 447 448 return (1); 449 } 450 451 void 452 rf_buildroothack(RF_ConfigSet_t *config_sets) 453 { 454 RF_ConfigSet_t *cset; 455 RF_ConfigSet_t *next_cset; 456 int retcode; 457 int raidID; 458 int rootID; 459 int num_root; 460 461 rootID = 0; 462 num_root = 0; 463 cset = config_sets; 464 while(cset != NULL ) { 465 next_cset = cset->next; 466 if (rf_have_enough_components(cset) && 467 cset->ac->clabel->autoconfigure==1) { 468 retcode = rf_auto_config_set(cset,&raidID); 469 if (!retcode) { 470 if (cset->rootable) { 471 rootID = raidID; 472 num_root++; 473 } 474 } else { 475 /* The autoconfig didn't work :( */ 476 #if DEBUG 477 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 478 #endif 479 rf_release_all_vps(cset); 480 } 481 } else { 482 /* we're not autoconfiguring this set... 483 release the associated resources */ 484 rf_release_all_vps(cset); 485 } 486 /* cleanup */ 487 rf_cleanup_config_set(cset); 488 cset = next_cset; 489 } 490 491 /* we found something bootable... */ 492 493 if (num_root == 1) { 494 booted_device = &raidrootdev[rootID]; 495 } else if (num_root > 1) { 496 /* we can't guess.. require the user to answer... */ 497 boothowto |= RB_ASKNAME; 498 } 499 } 500 501 502 int 503 raidsize(dev_t dev) 504 { 505 struct raid_softc *rs; 506 struct disklabel *lp; 507 int part, unit, omask, size; 508 509 unit = raidunit(dev); 510 if (unit >= numraid) 511 return (-1); 512 rs = &raid_softc[unit]; 513 514 if ((rs->sc_flags & RAIDF_INITED) == 0) 515 return (-1); 516 517 part = DISKPART(dev); 518 omask = rs->sc_dkdev.dk_openmask & (1 << part); 519 lp = rs->sc_dkdev.dk_label; 520 521 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 522 return (-1); 523 524 if (lp->d_partitions[part].p_fstype != FS_SWAP) 525 size = -1; 526 else 527 size = lp->d_partitions[part].p_size * 528 (lp->d_secsize / DEV_BSIZE); 529 530 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 531 return (-1); 532 533 return (size); 534 535 } 536 537 int 538 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 539 { 540 /* Not implemented. */ 541 return ENXIO; 542 } 543 /* ARGSUSED */ 544 int 545 raidopen(dev_t dev, int flags, int fmt, struct lwp *l) 546 { 547 int unit = raidunit(dev); 548 struct raid_softc *rs; 549 struct disklabel *lp; 550 int part, pmask; 551 int error = 0; 552 553 if (unit >= numraid) 554 return (ENXIO); 555 rs = &raid_softc[unit]; 556 557 if ((error = raidlock(rs)) != 0) 558 return (error); 559 lp = rs->sc_dkdev.dk_label; 560 561 part = DISKPART(dev); 562 pmask = (1 << part); 563 564 if ((rs->sc_flags & RAIDF_INITED) && 565 (rs->sc_dkdev.dk_openmask == 0)) 566 raidgetdisklabel(dev); 567 568 /* make sure that this partition exists */ 569 570 if (part != RAW_PART) { 571 if (((rs->sc_flags & RAIDF_INITED) == 0) || 572 ((part >= lp->d_npartitions) || 573 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 574 error = ENXIO; 575 raidunlock(rs); 576 return (error); 577 } 578 } 579 /* Prevent this unit from being unconfigured while open. */ 580 switch (fmt) { 581 case S_IFCHR: 582 rs->sc_dkdev.dk_copenmask |= pmask; 583 break; 584 585 case S_IFBLK: 586 rs->sc_dkdev.dk_bopenmask |= pmask; 587 break; 588 } 589 590 if ((rs->sc_dkdev.dk_openmask == 0) && 591 ((rs->sc_flags & RAIDF_INITED) != 0)) { 592 /* First one... mark things as dirty... Note that we *MUST* 593 have done a configure before this. I DO NOT WANT TO BE 594 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 595 THAT THEY BELONG TOGETHER!!!!! */ 596 /* XXX should check to see if we're only open for reading 597 here... If so, we needn't do this, but then need some 598 other way of keeping track of what's happened.. */ 599 600 rf_markalldirty( raidPtrs[unit] ); 601 } 602 603 604 rs->sc_dkdev.dk_openmask = 605 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 606 607 raidunlock(rs); 608 609 return (error); 610 611 612 } 613 /* ARGSUSED */ 614 int 615 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 616 { 617 int unit = raidunit(dev); 618 struct raid_softc *rs; 619 int error = 0; 620 int part; 621 622 if (unit >= numraid) 623 return (ENXIO); 624 rs = &raid_softc[unit]; 625 626 if ((error = raidlock(rs)) != 0) 627 return (error); 628 629 part = DISKPART(dev); 630 631 /* ...that much closer to allowing unconfiguration... */ 632 switch (fmt) { 633 case S_IFCHR: 634 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 635 break; 636 637 case S_IFBLK: 638 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 639 break; 640 } 641 rs->sc_dkdev.dk_openmask = 642 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 643 644 if ((rs->sc_dkdev.dk_openmask == 0) && 645 ((rs->sc_flags & RAIDF_INITED) != 0)) { 646 /* Last one... device is not unconfigured yet. 647 Device shutdown has taken care of setting the 648 clean bits if RAIDF_INITED is not set 649 mark things as clean... */ 650 651 rf_update_component_labels(raidPtrs[unit], 652 RF_FINAL_COMPONENT_UPDATE); 653 if (doing_shutdown) { 654 /* last one, and we're going down, so 655 lights out for this RAID set too. */ 656 error = rf_Shutdown(raidPtrs[unit]); 657 658 /* It's no longer initialized... */ 659 rs->sc_flags &= ~RAIDF_INITED; 660 661 /* Detach the disk. */ 662 pseudo_disk_detach(&rs->sc_dkdev); 663 } 664 } 665 666 raidunlock(rs); 667 return (0); 668 669 } 670 671 void 672 raidstrategy(struct buf *bp) 673 { 674 int s; 675 676 unsigned int raidID = raidunit(bp->b_dev); 677 RF_Raid_t *raidPtr; 678 struct raid_softc *rs = &raid_softc[raidID]; 679 int wlabel; 680 681 if ((rs->sc_flags & RAIDF_INITED) ==0) { 682 bp->b_error = ENXIO; 683 bp->b_flags |= B_ERROR; 684 goto done; 685 } 686 if (raidID >= numraid || !raidPtrs[raidID]) { 687 bp->b_error = ENODEV; 688 bp->b_flags |= B_ERROR; 689 goto done; 690 } 691 raidPtr = raidPtrs[raidID]; 692 if (!raidPtr->valid) { 693 bp->b_error = ENODEV; 694 bp->b_flags |= B_ERROR; 695 goto done; 696 } 697 if (bp->b_bcount == 0) { 698 db1_printf(("b_bcount is zero..\n")); 699 goto done; 700 } 701 702 /* 703 * Do bounds checking and adjust transfer. If there's an 704 * error, the bounds check will flag that for us. 705 */ 706 707 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 708 if (DISKPART(bp->b_dev) == RAW_PART) { 709 uint64_t size; /* device size in DEV_BSIZE unit */ 710 711 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 712 size = raidPtr->totalSectors << 713 (raidPtr->logBytesPerSector - DEV_BSHIFT); 714 } else { 715 size = raidPtr->totalSectors >> 716 (DEV_BSHIFT - raidPtr->logBytesPerSector); 717 } 718 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 719 goto done; 720 } 721 } else { 722 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 723 db1_printf(("Bounds check failed!!:%d %d\n", 724 (int) bp->b_blkno, (int) wlabel)); 725 goto done; 726 } 727 } 728 s = splbio(); 729 730 bp->b_resid = 0; 731 732 /* stuff it onto our queue */ 733 BUFQ_PUT(rs->buf_queue, bp); 734 735 /* scheduled the IO to happen at the next convenient time */ 736 wakeup(&(raidPtrs[raidID]->iodone)); 737 738 splx(s); 739 return; 740 741 done: 742 bp->b_resid = bp->b_bcount; 743 biodone(bp); 744 } 745 /* ARGSUSED */ 746 int 747 raidread(dev_t dev, struct uio *uio, int flags) 748 { 749 int unit = raidunit(dev); 750 struct raid_softc *rs; 751 752 if (unit >= numraid) 753 return (ENXIO); 754 rs = &raid_softc[unit]; 755 756 if ((rs->sc_flags & RAIDF_INITED) == 0) 757 return (ENXIO); 758 759 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 760 761 } 762 /* ARGSUSED */ 763 int 764 raidwrite(dev_t dev, struct uio *uio, int flags) 765 { 766 int unit = raidunit(dev); 767 struct raid_softc *rs; 768 769 if (unit >= numraid) 770 return (ENXIO); 771 rs = &raid_softc[unit]; 772 773 if ((rs->sc_flags & RAIDF_INITED) == 0) 774 return (ENXIO); 775 776 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 777 778 } 779 780 int 781 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 782 { 783 int unit = raidunit(dev); 784 int error = 0; 785 int part, pmask; 786 struct raid_softc *rs; 787 RF_Config_t *k_cfg, *u_cfg; 788 RF_Raid_t *raidPtr; 789 RF_RaidDisk_t *diskPtr; 790 RF_AccTotals_t *totals; 791 RF_DeviceConfig_t *d_cfg, **ucfgp; 792 u_char *specific_buf; 793 int retcode = 0; 794 int column; 795 int raidid; 796 struct rf_recon_req *rrcopy, *rr; 797 RF_ComponentLabel_t *clabel; 798 RF_ComponentLabel_t *ci_label; 799 RF_ComponentLabel_t **clabel_ptr; 800 RF_SingleComponent_t *sparePtr,*componentPtr; 801 RF_SingleComponent_t component; 802 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 803 int i, j, d; 804 #ifdef __HAVE_OLD_DISKLABEL 805 struct disklabel newlabel; 806 #endif 807 808 if (unit >= numraid) 809 return (ENXIO); 810 rs = &raid_softc[unit]; 811 raidPtr = raidPtrs[unit]; 812 813 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 814 (int) DISKPART(dev), (int) unit, (int) cmd)); 815 816 /* Must be open for writes for these commands... */ 817 switch (cmd) { 818 case DIOCSDINFO: 819 case DIOCWDINFO: 820 #ifdef __HAVE_OLD_DISKLABEL 821 case ODIOCWDINFO: 822 case ODIOCSDINFO: 823 #endif 824 case DIOCWLABEL: 825 if ((flag & FWRITE) == 0) 826 return (EBADF); 827 } 828 829 /* Must be initialized for these... */ 830 switch (cmd) { 831 case DIOCGDINFO: 832 case DIOCSDINFO: 833 case DIOCWDINFO: 834 #ifdef __HAVE_OLD_DISKLABEL 835 case ODIOCGDINFO: 836 case ODIOCWDINFO: 837 case ODIOCSDINFO: 838 case ODIOCGDEFLABEL: 839 #endif 840 case DIOCGPART: 841 case DIOCWLABEL: 842 case DIOCGDEFLABEL: 843 case RAIDFRAME_SHUTDOWN: 844 case RAIDFRAME_REWRITEPARITY: 845 case RAIDFRAME_GET_INFO: 846 case RAIDFRAME_RESET_ACCTOTALS: 847 case RAIDFRAME_GET_ACCTOTALS: 848 case RAIDFRAME_KEEP_ACCTOTALS: 849 case RAIDFRAME_GET_SIZE: 850 case RAIDFRAME_FAIL_DISK: 851 case RAIDFRAME_COPYBACK: 852 case RAIDFRAME_CHECK_RECON_STATUS: 853 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 854 case RAIDFRAME_GET_COMPONENT_LABEL: 855 case RAIDFRAME_SET_COMPONENT_LABEL: 856 case RAIDFRAME_ADD_HOT_SPARE: 857 case RAIDFRAME_REMOVE_HOT_SPARE: 858 case RAIDFRAME_INIT_LABELS: 859 case RAIDFRAME_REBUILD_IN_PLACE: 860 case RAIDFRAME_CHECK_PARITY: 861 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 862 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 863 case RAIDFRAME_CHECK_COPYBACK_STATUS: 864 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 865 case RAIDFRAME_SET_AUTOCONFIG: 866 case RAIDFRAME_SET_ROOT: 867 case RAIDFRAME_DELETE_COMPONENT: 868 case RAIDFRAME_INCORPORATE_HOT_SPARE: 869 if ((rs->sc_flags & RAIDF_INITED) == 0) 870 return (ENXIO); 871 } 872 873 switch (cmd) { 874 875 /* configure the system */ 876 case RAIDFRAME_CONFIGURE: 877 878 if (raidPtr->valid) { 879 /* There is a valid RAID set running on this unit! */ 880 printf("raid%d: Device already configured!\n",unit); 881 return(EINVAL); 882 } 883 884 /* copy-in the configuration information */ 885 /* data points to a pointer to the configuration structure */ 886 887 u_cfg = *((RF_Config_t **) data); 888 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 889 if (k_cfg == NULL) { 890 return (ENOMEM); 891 } 892 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 893 if (retcode) { 894 RF_Free(k_cfg, sizeof(RF_Config_t)); 895 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 896 retcode)); 897 return (retcode); 898 } 899 /* allocate a buffer for the layout-specific data, and copy it 900 * in */ 901 if (k_cfg->layoutSpecificSize) { 902 if (k_cfg->layoutSpecificSize > 10000) { 903 /* sanity check */ 904 RF_Free(k_cfg, sizeof(RF_Config_t)); 905 return (EINVAL); 906 } 907 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 908 (u_char *)); 909 if (specific_buf == NULL) { 910 RF_Free(k_cfg, sizeof(RF_Config_t)); 911 return (ENOMEM); 912 } 913 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 914 k_cfg->layoutSpecificSize); 915 if (retcode) { 916 RF_Free(k_cfg, sizeof(RF_Config_t)); 917 RF_Free(specific_buf, 918 k_cfg->layoutSpecificSize); 919 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 920 retcode)); 921 return (retcode); 922 } 923 } else 924 specific_buf = NULL; 925 k_cfg->layoutSpecific = specific_buf; 926 927 /* should do some kind of sanity check on the configuration. 928 * Store the sum of all the bytes in the last byte? */ 929 930 /* configure the system */ 931 932 /* 933 * Clear the entire RAID descriptor, just to make sure 934 * there is no stale data left in the case of a 935 * reconfiguration 936 */ 937 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 938 raidPtr->raidid = unit; 939 940 retcode = rf_Configure(raidPtr, k_cfg, NULL); 941 942 if (retcode == 0) { 943 944 /* allow this many simultaneous IO's to 945 this RAID device */ 946 raidPtr->openings = RAIDOUTSTANDING; 947 948 raidinit(raidPtr); 949 rf_markalldirty(raidPtr); 950 } 951 /* free the buffers. No return code here. */ 952 if (k_cfg->layoutSpecificSize) { 953 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 954 } 955 RF_Free(k_cfg, sizeof(RF_Config_t)); 956 957 return (retcode); 958 959 /* shutdown the system */ 960 case RAIDFRAME_SHUTDOWN: 961 962 if ((error = raidlock(rs)) != 0) 963 return (error); 964 965 /* 966 * If somebody has a partition mounted, we shouldn't 967 * shutdown. 968 */ 969 970 part = DISKPART(dev); 971 pmask = (1 << part); 972 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 973 ((rs->sc_dkdev.dk_bopenmask & pmask) && 974 (rs->sc_dkdev.dk_copenmask & pmask))) { 975 raidunlock(rs); 976 return (EBUSY); 977 } 978 979 retcode = rf_Shutdown(raidPtr); 980 981 /* It's no longer initialized... */ 982 rs->sc_flags &= ~RAIDF_INITED; 983 984 /* Detach the disk. */ 985 pseudo_disk_detach(&rs->sc_dkdev); 986 987 raidunlock(rs); 988 989 return (retcode); 990 case RAIDFRAME_GET_COMPONENT_LABEL: 991 clabel_ptr = (RF_ComponentLabel_t **) data; 992 /* need to read the component label for the disk indicated 993 by row,column in clabel */ 994 995 /* For practice, let's get it directly fromdisk, rather 996 than from the in-core copy */ 997 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 998 (RF_ComponentLabel_t *)); 999 if (clabel == NULL) 1000 return (ENOMEM); 1001 1002 retcode = copyin( *clabel_ptr, clabel, 1003 sizeof(RF_ComponentLabel_t)); 1004 1005 if (retcode) { 1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1007 return(retcode); 1008 } 1009 1010 clabel->row = 0; /* Don't allow looking at anything else.*/ 1011 1012 column = clabel->column; 1013 1014 if ((column < 0) || (column >= raidPtr->numCol + 1015 raidPtr->numSpare)) { 1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1017 return(EINVAL); 1018 } 1019 1020 retcode = raidread_component_label(raidPtr->Disks[column].dev, 1021 raidPtr->raid_cinfo[column].ci_vp, 1022 clabel ); 1023 1024 if (retcode == 0) { 1025 retcode = copyout(clabel, *clabel_ptr, 1026 sizeof(RF_ComponentLabel_t)); 1027 } 1028 RF_Free(clabel, sizeof(RF_ComponentLabel_t)); 1029 return (retcode); 1030 1031 case RAIDFRAME_SET_COMPONENT_LABEL: 1032 clabel = (RF_ComponentLabel_t *) data; 1033 1034 /* XXX check the label for valid stuff... */ 1035 /* Note that some things *should not* get modified -- 1036 the user should be re-initing the labels instead of 1037 trying to patch things. 1038 */ 1039 1040 raidid = raidPtr->raidid; 1041 #if DEBUG 1042 printf("raid%d: Got component label:\n", raidid); 1043 printf("raid%d: Version: %d\n", raidid, clabel->version); 1044 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1045 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1046 printf("raid%d: Column: %d\n", raidid, clabel->column); 1047 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1048 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1049 printf("raid%d: Status: %d\n", raidid, clabel->status); 1050 #endif 1051 clabel->row = 0; 1052 column = clabel->column; 1053 1054 if ((column < 0) || (column >= raidPtr->numCol)) { 1055 return(EINVAL); 1056 } 1057 1058 /* XXX this isn't allowed to do anything for now :-) */ 1059 1060 /* XXX and before it is, we need to fill in the rest 1061 of the fields!?!?!?! */ 1062 #if 0 1063 raidwrite_component_label( 1064 raidPtr->Disks[column].dev, 1065 raidPtr->raid_cinfo[column].ci_vp, 1066 clabel ); 1067 #endif 1068 return (0); 1069 1070 case RAIDFRAME_INIT_LABELS: 1071 clabel = (RF_ComponentLabel_t *) data; 1072 /* 1073 we only want the serial number from 1074 the above. We get all the rest of the information 1075 from the config that was used to create this RAID 1076 set. 1077 */ 1078 1079 raidPtr->serial_number = clabel->serial_number; 1080 1081 RF_Malloc(ci_label, sizeof(RF_ComponentLabel_t), 1082 (RF_ComponentLabel_t *)); 1083 if (ci_label == NULL) 1084 return (ENOMEM); 1085 1086 raid_init_component_label(raidPtr, ci_label); 1087 ci_label->serial_number = clabel->serial_number; 1088 ci_label->row = 0; /* we dont' pretend to support more */ 1089 1090 for(column=0;column<raidPtr->numCol;column++) { 1091 diskPtr = &raidPtr->Disks[column]; 1092 if (!RF_DEAD_DISK(diskPtr->status)) { 1093 ci_label->partitionSize = diskPtr->partitionSize; 1094 ci_label->column = column; 1095 raidwrite_component_label( 1096 raidPtr->Disks[column].dev, 1097 raidPtr->raid_cinfo[column].ci_vp, 1098 ci_label ); 1099 } 1100 } 1101 RF_Free(ci_label, sizeof(RF_ComponentLabel_t)); 1102 1103 return (retcode); 1104 case RAIDFRAME_SET_AUTOCONFIG: 1105 d = rf_set_autoconfig(raidPtr, *(int *) data); 1106 printf("raid%d: New autoconfig value is: %d\n", 1107 raidPtr->raidid, d); 1108 *(int *) data = d; 1109 return (retcode); 1110 1111 case RAIDFRAME_SET_ROOT: 1112 d = rf_set_rootpartition(raidPtr, *(int *) data); 1113 printf("raid%d: New rootpartition value is: %d\n", 1114 raidPtr->raidid, d); 1115 *(int *) data = d; 1116 return (retcode); 1117 1118 /* initialize all parity */ 1119 case RAIDFRAME_REWRITEPARITY: 1120 1121 if (raidPtr->Layout.map->faultsTolerated == 0) { 1122 /* Parity for RAID 0 is trivially correct */ 1123 raidPtr->parity_good = RF_RAID_CLEAN; 1124 return(0); 1125 } 1126 1127 if (raidPtr->parity_rewrite_in_progress == 1) { 1128 /* Re-write is already in progress! */ 1129 return(EINVAL); 1130 } 1131 1132 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1133 rf_RewriteParityThread, 1134 raidPtr,"raid_parity"); 1135 return (retcode); 1136 1137 1138 case RAIDFRAME_ADD_HOT_SPARE: 1139 sparePtr = (RF_SingleComponent_t *) data; 1140 memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t)); 1141 retcode = rf_add_hot_spare(raidPtr, &component); 1142 return(retcode); 1143 1144 case RAIDFRAME_REMOVE_HOT_SPARE: 1145 return(retcode); 1146 1147 case RAIDFRAME_DELETE_COMPONENT: 1148 componentPtr = (RF_SingleComponent_t *)data; 1149 memcpy( &component, componentPtr, 1150 sizeof(RF_SingleComponent_t)); 1151 retcode = rf_delete_component(raidPtr, &component); 1152 return(retcode); 1153 1154 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1155 componentPtr = (RF_SingleComponent_t *)data; 1156 memcpy( &component, componentPtr, 1157 sizeof(RF_SingleComponent_t)); 1158 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1159 return(retcode); 1160 1161 case RAIDFRAME_REBUILD_IN_PLACE: 1162 1163 if (raidPtr->Layout.map->faultsTolerated == 0) { 1164 /* Can't do this on a RAID 0!! */ 1165 return(EINVAL); 1166 } 1167 1168 if (raidPtr->recon_in_progress == 1) { 1169 /* a reconstruct is already in progress! */ 1170 return(EINVAL); 1171 } 1172 1173 componentPtr = (RF_SingleComponent_t *) data; 1174 memcpy( &component, componentPtr, 1175 sizeof(RF_SingleComponent_t)); 1176 component.row = 0; /* we don't support any more */ 1177 column = component.column; 1178 1179 if ((column < 0) || (column >= raidPtr->numCol)) { 1180 return(EINVAL); 1181 } 1182 1183 RF_LOCK_MUTEX(raidPtr->mutex); 1184 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1185 (raidPtr->numFailures > 0)) { 1186 /* XXX 0 above shouldn't be constant!!! */ 1187 /* some component other than this has failed. 1188 Let's not make things worse than they already 1189 are... */ 1190 printf("raid%d: Unable to reconstruct to disk at:\n", 1191 raidPtr->raidid); 1192 printf("raid%d: Col: %d Too many failures.\n", 1193 raidPtr->raidid, column); 1194 RF_UNLOCK_MUTEX(raidPtr->mutex); 1195 return (EINVAL); 1196 } 1197 if (raidPtr->Disks[column].status == 1198 rf_ds_reconstructing) { 1199 printf("raid%d: Unable to reconstruct to disk at:\n", 1200 raidPtr->raidid); 1201 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1202 1203 RF_UNLOCK_MUTEX(raidPtr->mutex); 1204 return (EINVAL); 1205 } 1206 if (raidPtr->Disks[column].status == rf_ds_spared) { 1207 RF_UNLOCK_MUTEX(raidPtr->mutex); 1208 return (EINVAL); 1209 } 1210 RF_UNLOCK_MUTEX(raidPtr->mutex); 1211 1212 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1213 if (rrcopy == NULL) 1214 return(ENOMEM); 1215 1216 rrcopy->raidPtr = (void *) raidPtr; 1217 rrcopy->col = column; 1218 1219 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1220 rf_ReconstructInPlaceThread, 1221 rrcopy,"raid_reconip"); 1222 return(retcode); 1223 1224 case RAIDFRAME_GET_INFO: 1225 if (!raidPtr->valid) 1226 return (ENODEV); 1227 ucfgp = (RF_DeviceConfig_t **) data; 1228 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1229 (RF_DeviceConfig_t *)); 1230 if (d_cfg == NULL) 1231 return (ENOMEM); 1232 d_cfg->rows = 1; /* there is only 1 row now */ 1233 d_cfg->cols = raidPtr->numCol; 1234 d_cfg->ndevs = raidPtr->numCol; 1235 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1236 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1237 return (ENOMEM); 1238 } 1239 d_cfg->nspares = raidPtr->numSpare; 1240 if (d_cfg->nspares >= RF_MAX_DISKS) { 1241 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1242 return (ENOMEM); 1243 } 1244 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1245 d = 0; 1246 for (j = 0; j < d_cfg->cols; j++) { 1247 d_cfg->devs[d] = raidPtr->Disks[j]; 1248 d++; 1249 } 1250 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1251 d_cfg->spares[i] = raidPtr->Disks[j]; 1252 } 1253 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1254 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1255 1256 return (retcode); 1257 1258 case RAIDFRAME_CHECK_PARITY: 1259 *(int *) data = raidPtr->parity_good; 1260 return (0); 1261 1262 case RAIDFRAME_RESET_ACCTOTALS: 1263 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1264 return (0); 1265 1266 case RAIDFRAME_GET_ACCTOTALS: 1267 totals = (RF_AccTotals_t *) data; 1268 *totals = raidPtr->acc_totals; 1269 return (0); 1270 1271 case RAIDFRAME_KEEP_ACCTOTALS: 1272 raidPtr->keep_acc_totals = *(int *)data; 1273 return (0); 1274 1275 case RAIDFRAME_GET_SIZE: 1276 *(int *) data = raidPtr->totalSectors; 1277 return (0); 1278 1279 /* fail a disk & optionally start reconstruction */ 1280 case RAIDFRAME_FAIL_DISK: 1281 1282 if (raidPtr->Layout.map->faultsTolerated == 0) { 1283 /* Can't do this on a RAID 0!! */ 1284 return(EINVAL); 1285 } 1286 1287 rr = (struct rf_recon_req *) data; 1288 rr->row = 0; 1289 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1290 return (EINVAL); 1291 1292 1293 RF_LOCK_MUTEX(raidPtr->mutex); 1294 if (raidPtr->status == rf_rs_reconstructing) { 1295 /* you can't fail a disk while we're reconstructing! */ 1296 /* XXX wrong for RAID6 */ 1297 RF_UNLOCK_MUTEX(raidPtr->mutex); 1298 return (EINVAL); 1299 } 1300 if ((raidPtr->Disks[rr->col].status == 1301 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1302 /* some other component has failed. Let's not make 1303 things worse. XXX wrong for RAID6 */ 1304 RF_UNLOCK_MUTEX(raidPtr->mutex); 1305 return (EINVAL); 1306 } 1307 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1308 /* Can't fail a spared disk! */ 1309 RF_UNLOCK_MUTEX(raidPtr->mutex); 1310 return (EINVAL); 1311 } 1312 RF_UNLOCK_MUTEX(raidPtr->mutex); 1313 1314 /* make a copy of the recon request so that we don't rely on 1315 * the user's buffer */ 1316 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1317 if (rrcopy == NULL) 1318 return(ENOMEM); 1319 memcpy(rrcopy, rr, sizeof(*rr)); 1320 rrcopy->raidPtr = (void *) raidPtr; 1321 1322 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1323 rf_ReconThread, 1324 rrcopy,"raid_recon"); 1325 return (0); 1326 1327 /* invoke a copyback operation after recon on whatever disk 1328 * needs it, if any */ 1329 case RAIDFRAME_COPYBACK: 1330 1331 if (raidPtr->Layout.map->faultsTolerated == 0) { 1332 /* This makes no sense on a RAID 0!! */ 1333 return(EINVAL); 1334 } 1335 1336 if (raidPtr->copyback_in_progress == 1) { 1337 /* Copyback is already in progress! */ 1338 return(EINVAL); 1339 } 1340 1341 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1342 rf_CopybackThread, 1343 raidPtr,"raid_copyback"); 1344 return (retcode); 1345 1346 /* return the percentage completion of reconstruction */ 1347 case RAIDFRAME_CHECK_RECON_STATUS: 1348 if (raidPtr->Layout.map->faultsTolerated == 0) { 1349 /* This makes no sense on a RAID 0, so tell the 1350 user it's done. */ 1351 *(int *) data = 100; 1352 return(0); 1353 } 1354 if (raidPtr->status != rf_rs_reconstructing) 1355 *(int *) data = 100; 1356 else { 1357 if (raidPtr->reconControl->numRUsTotal > 0) { 1358 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1359 } else { 1360 *(int *) data = 0; 1361 } 1362 } 1363 return (0); 1364 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1365 progressInfoPtr = (RF_ProgressInfo_t **) data; 1366 if (raidPtr->status != rf_rs_reconstructing) { 1367 progressInfo.remaining = 0; 1368 progressInfo.completed = 100; 1369 progressInfo.total = 100; 1370 } else { 1371 progressInfo.total = 1372 raidPtr->reconControl->numRUsTotal; 1373 progressInfo.completed = 1374 raidPtr->reconControl->numRUsComplete; 1375 progressInfo.remaining = progressInfo.total - 1376 progressInfo.completed; 1377 } 1378 retcode = copyout(&progressInfo, *progressInfoPtr, 1379 sizeof(RF_ProgressInfo_t)); 1380 return (retcode); 1381 1382 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1383 if (raidPtr->Layout.map->faultsTolerated == 0) { 1384 /* This makes no sense on a RAID 0, so tell the 1385 user it's done. */ 1386 *(int *) data = 100; 1387 return(0); 1388 } 1389 if (raidPtr->parity_rewrite_in_progress == 1) { 1390 *(int *) data = 100 * 1391 raidPtr->parity_rewrite_stripes_done / 1392 raidPtr->Layout.numStripe; 1393 } else { 1394 *(int *) data = 100; 1395 } 1396 return (0); 1397 1398 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1399 progressInfoPtr = (RF_ProgressInfo_t **) data; 1400 if (raidPtr->parity_rewrite_in_progress == 1) { 1401 progressInfo.total = raidPtr->Layout.numStripe; 1402 progressInfo.completed = 1403 raidPtr->parity_rewrite_stripes_done; 1404 progressInfo.remaining = progressInfo.total - 1405 progressInfo.completed; 1406 } else { 1407 progressInfo.remaining = 0; 1408 progressInfo.completed = 100; 1409 progressInfo.total = 100; 1410 } 1411 retcode = copyout(&progressInfo, *progressInfoPtr, 1412 sizeof(RF_ProgressInfo_t)); 1413 return (retcode); 1414 1415 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1416 if (raidPtr->Layout.map->faultsTolerated == 0) { 1417 /* This makes no sense on a RAID 0 */ 1418 *(int *) data = 100; 1419 return(0); 1420 } 1421 if (raidPtr->copyback_in_progress == 1) { 1422 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1423 raidPtr->Layout.numStripe; 1424 } else { 1425 *(int *) data = 100; 1426 } 1427 return (0); 1428 1429 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1430 progressInfoPtr = (RF_ProgressInfo_t **) data; 1431 if (raidPtr->copyback_in_progress == 1) { 1432 progressInfo.total = raidPtr->Layout.numStripe; 1433 progressInfo.completed = 1434 raidPtr->copyback_stripes_done; 1435 progressInfo.remaining = progressInfo.total - 1436 progressInfo.completed; 1437 } else { 1438 progressInfo.remaining = 0; 1439 progressInfo.completed = 100; 1440 progressInfo.total = 100; 1441 } 1442 retcode = copyout(&progressInfo, *progressInfoPtr, 1443 sizeof(RF_ProgressInfo_t)); 1444 return (retcode); 1445 1446 /* the sparetable daemon calls this to wait for the kernel to 1447 * need a spare table. this ioctl does not return until a 1448 * spare table is needed. XXX -- calling mpsleep here in the 1449 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1450 * -- I should either compute the spare table in the kernel, 1451 * or have a different -- XXX XXX -- interface (a different 1452 * character device) for delivering the table -- XXX */ 1453 #if 0 1454 case RAIDFRAME_SPARET_WAIT: 1455 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1456 while (!rf_sparet_wait_queue) 1457 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1458 waitreq = rf_sparet_wait_queue; 1459 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1460 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1461 1462 /* structure assignment */ 1463 *((RF_SparetWait_t *) data) = *waitreq; 1464 1465 RF_Free(waitreq, sizeof(*waitreq)); 1466 return (0); 1467 1468 /* wakes up a process waiting on SPARET_WAIT and puts an error 1469 * code in it that will cause the dameon to exit */ 1470 case RAIDFRAME_ABORT_SPARET_WAIT: 1471 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1472 waitreq->fcol = -1; 1473 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1474 waitreq->next = rf_sparet_wait_queue; 1475 rf_sparet_wait_queue = waitreq; 1476 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1477 wakeup(&rf_sparet_wait_queue); 1478 return (0); 1479 1480 /* used by the spare table daemon to deliver a spare table 1481 * into the kernel */ 1482 case RAIDFRAME_SEND_SPARET: 1483 1484 /* install the spare table */ 1485 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1486 1487 /* respond to the requestor. the return status of the spare 1488 * table installation is passed in the "fcol" field */ 1489 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1490 waitreq->fcol = retcode; 1491 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1492 waitreq->next = rf_sparet_resp_queue; 1493 rf_sparet_resp_queue = waitreq; 1494 wakeup(&rf_sparet_resp_queue); 1495 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1496 1497 return (retcode); 1498 #endif 1499 1500 default: 1501 break; /* fall through to the os-specific code below */ 1502 1503 } 1504 1505 if (!raidPtr->valid) 1506 return (EINVAL); 1507 1508 /* 1509 * Add support for "regular" device ioctls here. 1510 */ 1511 1512 switch (cmd) { 1513 case DIOCGDINFO: 1514 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1515 break; 1516 #ifdef __HAVE_OLD_DISKLABEL 1517 case ODIOCGDINFO: 1518 newlabel = *(rs->sc_dkdev.dk_label); 1519 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1520 return ENOTTY; 1521 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1522 break; 1523 #endif 1524 1525 case DIOCGPART: 1526 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1527 ((struct partinfo *) data)->part = 1528 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1529 break; 1530 1531 case DIOCWDINFO: 1532 case DIOCSDINFO: 1533 #ifdef __HAVE_OLD_DISKLABEL 1534 case ODIOCWDINFO: 1535 case ODIOCSDINFO: 1536 #endif 1537 { 1538 struct disklabel *lp; 1539 #ifdef __HAVE_OLD_DISKLABEL 1540 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1541 memset(&newlabel, 0, sizeof newlabel); 1542 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1543 lp = &newlabel; 1544 } else 1545 #endif 1546 lp = (struct disklabel *)data; 1547 1548 if ((error = raidlock(rs)) != 0) 1549 return (error); 1550 1551 rs->sc_flags |= RAIDF_LABELLING; 1552 1553 error = setdisklabel(rs->sc_dkdev.dk_label, 1554 lp, 0, rs->sc_dkdev.dk_cpulabel); 1555 if (error == 0) { 1556 if (cmd == DIOCWDINFO 1557 #ifdef __HAVE_OLD_DISKLABEL 1558 || cmd == ODIOCWDINFO 1559 #endif 1560 ) 1561 error = writedisklabel(RAIDLABELDEV(dev), 1562 raidstrategy, rs->sc_dkdev.dk_label, 1563 rs->sc_dkdev.dk_cpulabel); 1564 } 1565 rs->sc_flags &= ~RAIDF_LABELLING; 1566 1567 raidunlock(rs); 1568 1569 if (error) 1570 return (error); 1571 break; 1572 } 1573 1574 case DIOCWLABEL: 1575 if (*(int *) data != 0) 1576 rs->sc_flags |= RAIDF_WLABEL; 1577 else 1578 rs->sc_flags &= ~RAIDF_WLABEL; 1579 break; 1580 1581 case DIOCGDEFLABEL: 1582 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1583 break; 1584 1585 #ifdef __HAVE_OLD_DISKLABEL 1586 case ODIOCGDEFLABEL: 1587 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1588 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1589 return ENOTTY; 1590 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1591 break; 1592 #endif 1593 1594 default: 1595 retcode = ENOTTY; 1596 } 1597 return (retcode); 1598 1599 } 1600 1601 1602 /* raidinit -- complete the rest of the initialization for the 1603 RAIDframe device. */ 1604 1605 1606 static void 1607 raidinit(RF_Raid_t *raidPtr) 1608 { 1609 struct raid_softc *rs; 1610 int unit; 1611 1612 unit = raidPtr->raidid; 1613 1614 rs = &raid_softc[unit]; 1615 1616 /* XXX should check return code first... */ 1617 rs->sc_flags |= RAIDF_INITED; 1618 1619 /* XXX doesn't check bounds. */ 1620 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1621 1622 rs->sc_dkdev.dk_name = rs->sc_xname; 1623 1624 /* disk_attach actually creates space for the CPU disklabel, among 1625 * other things, so it's critical to call this *BEFORE* we try putzing 1626 * with disklabels. */ 1627 1628 pseudo_disk_attach(&rs->sc_dkdev); 1629 1630 /* XXX There may be a weird interaction here between this, and 1631 * protectedSectors, as used in RAIDframe. */ 1632 1633 rs->sc_size = raidPtr->totalSectors; 1634 } 1635 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1636 /* wake up the daemon & tell it to get us a spare table 1637 * XXX 1638 * the entries in the queues should be tagged with the raidPtr 1639 * so that in the extremely rare case that two recons happen at once, 1640 * we know for which device were requesting a spare table 1641 * XXX 1642 * 1643 * XXX This code is not currently used. GO 1644 */ 1645 int 1646 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1647 { 1648 int retcode; 1649 1650 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1651 req->next = rf_sparet_wait_queue; 1652 rf_sparet_wait_queue = req; 1653 wakeup(&rf_sparet_wait_queue); 1654 1655 /* mpsleep unlocks the mutex */ 1656 while (!rf_sparet_resp_queue) { 1657 tsleep(&rf_sparet_resp_queue, PRIBIO, 1658 "raidframe getsparetable", 0); 1659 } 1660 req = rf_sparet_resp_queue; 1661 rf_sparet_resp_queue = req->next; 1662 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1663 1664 retcode = req->fcol; 1665 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1666 * alloc'd */ 1667 return (retcode); 1668 } 1669 #endif 1670 1671 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1672 * bp & passes it down. 1673 * any calls originating in the kernel must use non-blocking I/O 1674 * do some extra sanity checking to return "appropriate" error values for 1675 * certain conditions (to make some standard utilities work) 1676 * 1677 * Formerly known as: rf_DoAccessKernel 1678 */ 1679 void 1680 raidstart(RF_Raid_t *raidPtr) 1681 { 1682 RF_SectorCount_t num_blocks, pb, sum; 1683 RF_RaidAddr_t raid_addr; 1684 struct partition *pp; 1685 daddr_t blocknum; 1686 int unit; 1687 struct raid_softc *rs; 1688 int do_async; 1689 struct buf *bp; 1690 int rc; 1691 1692 unit = raidPtr->raidid; 1693 rs = &raid_softc[unit]; 1694 1695 /* quick check to see if anything has died recently */ 1696 RF_LOCK_MUTEX(raidPtr->mutex); 1697 if (raidPtr->numNewFailures > 0) { 1698 RF_UNLOCK_MUTEX(raidPtr->mutex); 1699 rf_update_component_labels(raidPtr, 1700 RF_NORMAL_COMPONENT_UPDATE); 1701 RF_LOCK_MUTEX(raidPtr->mutex); 1702 raidPtr->numNewFailures--; 1703 } 1704 1705 /* Check to see if we're at the limit... */ 1706 while (raidPtr->openings > 0) { 1707 RF_UNLOCK_MUTEX(raidPtr->mutex); 1708 1709 /* get the next item, if any, from the queue */ 1710 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) { 1711 /* nothing more to do */ 1712 return; 1713 } 1714 1715 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1716 * partition.. Need to make it absolute to the underlying 1717 * device.. */ 1718 1719 blocknum = bp->b_blkno; 1720 if (DISKPART(bp->b_dev) != RAW_PART) { 1721 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1722 blocknum += pp->p_offset; 1723 } 1724 1725 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1726 (int) blocknum)); 1727 1728 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1729 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1730 1731 /* *THIS* is where we adjust what block we're going to... 1732 * but DO NOT TOUCH bp->b_blkno!!! */ 1733 raid_addr = blocknum; 1734 1735 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1736 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1737 sum = raid_addr + num_blocks + pb; 1738 if (1 || rf_debugKernelAccess) { 1739 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1740 (int) raid_addr, (int) sum, (int) num_blocks, 1741 (int) pb, (int) bp->b_resid)); 1742 } 1743 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1744 || (sum < num_blocks) || (sum < pb)) { 1745 bp->b_error = ENOSPC; 1746 bp->b_flags |= B_ERROR; 1747 bp->b_resid = bp->b_bcount; 1748 biodone(bp); 1749 RF_LOCK_MUTEX(raidPtr->mutex); 1750 continue; 1751 } 1752 /* 1753 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1754 */ 1755 1756 if (bp->b_bcount & raidPtr->sectorMask) { 1757 bp->b_error = EINVAL; 1758 bp->b_flags |= B_ERROR; 1759 bp->b_resid = bp->b_bcount; 1760 biodone(bp); 1761 RF_LOCK_MUTEX(raidPtr->mutex); 1762 continue; 1763 1764 } 1765 db1_printf(("Calling DoAccess..\n")); 1766 1767 1768 RF_LOCK_MUTEX(raidPtr->mutex); 1769 raidPtr->openings--; 1770 RF_UNLOCK_MUTEX(raidPtr->mutex); 1771 1772 /* 1773 * Everything is async. 1774 */ 1775 do_async = 1; 1776 1777 disk_busy(&rs->sc_dkdev); 1778 1779 /* XXX we're still at splbio() here... do we *really* 1780 need to be? */ 1781 1782 /* don't ever condition on bp->b_flags & B_WRITE. 1783 * always condition on B_READ instead */ 1784 1785 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1786 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1787 do_async, raid_addr, num_blocks, 1788 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1789 1790 if (rc) { 1791 bp->b_error = rc; 1792 bp->b_flags |= B_ERROR; 1793 bp->b_resid = bp->b_bcount; 1794 biodone(bp); 1795 /* continue loop */ 1796 } 1797 1798 RF_LOCK_MUTEX(raidPtr->mutex); 1799 } 1800 RF_UNLOCK_MUTEX(raidPtr->mutex); 1801 } 1802 1803 1804 1805 1806 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1807 1808 int 1809 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 1810 { 1811 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 1812 struct buf *bp; 1813 1814 req->queue = queue; 1815 1816 #if DIAGNOSTIC 1817 if (queue->raidPtr->raidid >= numraid) { 1818 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid, 1819 numraid); 1820 panic("Invalid Unit number in rf_DispatchKernelIO"); 1821 } 1822 #endif 1823 1824 bp = req->bp; 1825 1826 switch (req->type) { 1827 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 1828 /* XXX need to do something extra here.. */ 1829 /* I'm leaving this in, as I've never actually seen it used, 1830 * and I'd like folks to report it... GO */ 1831 printf(("WAKEUP CALLED\n")); 1832 queue->numOutstanding++; 1833 1834 bp->b_flags = 0; 1835 bp->b_private = req; 1836 1837 KernelWakeupFunc(bp); 1838 break; 1839 1840 case RF_IO_TYPE_READ: 1841 case RF_IO_TYPE_WRITE: 1842 #if RF_ACC_TRACE > 0 1843 if (req->tracerec) { 1844 RF_ETIMER_START(req->tracerec->timer); 1845 } 1846 #endif 1847 InitBP(bp, queue->rf_cinfo->ci_vp, 1848 op, queue->rf_cinfo->ci_dev, 1849 req->sectorOffset, req->numSector, 1850 req->buf, KernelWakeupFunc, (void *) req, 1851 queue->raidPtr->logBytesPerSector, req->b_proc); 1852 1853 if (rf_debugKernelAccess) { 1854 db1_printf(("dispatch: bp->b_blkno = %ld\n", 1855 (long) bp->b_blkno)); 1856 } 1857 queue->numOutstanding++; 1858 queue->last_deq_sector = req->sectorOffset; 1859 /* acc wouldn't have been let in if there were any pending 1860 * reqs at any other priority */ 1861 queue->curPriority = req->priority; 1862 1863 db1_printf(("Going for %c to unit %d col %d\n", 1864 req->type, queue->raidPtr->raidid, 1865 queue->col)); 1866 db1_printf(("sector %d count %d (%d bytes) %d\n", 1867 (int) req->sectorOffset, (int) req->numSector, 1868 (int) (req->numSector << 1869 queue->raidPtr->logBytesPerSector), 1870 (int) queue->raidPtr->logBytesPerSector)); 1871 VOP_STRATEGY(bp->b_vp, bp); 1872 1873 break; 1874 1875 default: 1876 panic("bad req->type in rf_DispatchKernelIO"); 1877 } 1878 db1_printf(("Exiting from DispatchKernelIO\n")); 1879 1880 return (0); 1881 } 1882 /* this is the callback function associated with a I/O invoked from 1883 kernel code. 1884 */ 1885 static void 1886 KernelWakeupFunc(struct buf *bp) 1887 { 1888 RF_DiskQueueData_t *req = NULL; 1889 RF_DiskQueue_t *queue; 1890 int s; 1891 1892 s = splbio(); 1893 db1_printf(("recovering the request queue:\n")); 1894 req = bp->b_private; 1895 1896 queue = (RF_DiskQueue_t *) req->queue; 1897 1898 #if RF_ACC_TRACE > 0 1899 if (req->tracerec) { 1900 RF_ETIMER_STOP(req->tracerec->timer); 1901 RF_ETIMER_EVAL(req->tracerec->timer); 1902 RF_LOCK_MUTEX(rf_tracing_mutex); 1903 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1904 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1905 req->tracerec->num_phys_ios++; 1906 RF_UNLOCK_MUTEX(rf_tracing_mutex); 1907 } 1908 #endif 1909 1910 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go 1911 * ballistic, and mark the component as hosed... */ 1912 1913 if (bp->b_flags & B_ERROR) { 1914 /* Mark the disk as dead */ 1915 /* but only mark it once... */ 1916 /* and only if it wouldn't leave this RAID set 1917 completely broken */ 1918 if (((queue->raidPtr->Disks[queue->col].status == 1919 rf_ds_optimal) || 1920 (queue->raidPtr->Disks[queue->col].status == 1921 rf_ds_used_spare)) && 1922 (queue->raidPtr->numFailures < 1923 queue->raidPtr->Layout.map->faultsTolerated)) { 1924 printf("raid%d: IO Error. Marking %s as failed.\n", 1925 queue->raidPtr->raidid, 1926 queue->raidPtr->Disks[queue->col].devname); 1927 queue->raidPtr->Disks[queue->col].status = 1928 rf_ds_failed; 1929 queue->raidPtr->status = rf_rs_degraded; 1930 queue->raidPtr->numFailures++; 1931 queue->raidPtr->numNewFailures++; 1932 } else { /* Disk is already dead... */ 1933 /* printf("Disk already marked as dead!\n"); */ 1934 } 1935 1936 } 1937 1938 /* Fill in the error value */ 1939 1940 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0; 1941 1942 simple_lock(&queue->raidPtr->iodone_lock); 1943 1944 /* Drop this one on the "finished" queue... */ 1945 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 1946 1947 /* Let the raidio thread know there is work to be done. */ 1948 wakeup(&(queue->raidPtr->iodone)); 1949 1950 simple_unlock(&queue->raidPtr->iodone_lock); 1951 1952 splx(s); 1953 } 1954 1955 1956 1957 /* 1958 * initialize a buf structure for doing an I/O in the kernel. 1959 */ 1960 static void 1961 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 1962 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf, 1963 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 1964 struct proc *b_proc) 1965 { 1966 /* bp->b_flags = B_PHYS | rw_flag; */ 1967 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ 1968 bp->b_bcount = numSect << logBytesPerSector; 1969 bp->b_bufsize = bp->b_bcount; 1970 bp->b_error = 0; 1971 bp->b_dev = dev; 1972 bp->b_data = bf; 1973 bp->b_blkno = startSect; 1974 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 1975 if (bp->b_bcount == 0) { 1976 panic("bp->b_bcount is zero in InitBP!!"); 1977 } 1978 bp->b_proc = b_proc; 1979 bp->b_iodone = cbFunc; 1980 bp->b_private = cbArg; 1981 bp->b_vp = b_vp; 1982 if ((bp->b_flags & B_READ) == 0) { 1983 bp->b_vp->v_numoutput++; 1984 } 1985 1986 } 1987 1988 static void 1989 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 1990 struct disklabel *lp) 1991 { 1992 memset(lp, 0, sizeof(*lp)); 1993 1994 /* fabricate a label... */ 1995 lp->d_secperunit = raidPtr->totalSectors; 1996 lp->d_secsize = raidPtr->bytesPerSector; 1997 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 1998 lp->d_ntracks = 4 * raidPtr->numCol; 1999 lp->d_ncylinders = raidPtr->totalSectors / 2000 (lp->d_nsectors * lp->d_ntracks); 2001 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2002 2003 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2004 lp->d_type = DTYPE_RAID; 2005 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2006 lp->d_rpm = 3600; 2007 lp->d_interleave = 1; 2008 lp->d_flags = 0; 2009 2010 lp->d_partitions[RAW_PART].p_offset = 0; 2011 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2012 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2013 lp->d_npartitions = RAW_PART + 1; 2014 2015 lp->d_magic = DISKMAGIC; 2016 lp->d_magic2 = DISKMAGIC; 2017 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2018 2019 } 2020 /* 2021 * Read the disklabel from the raid device. If one is not present, fake one 2022 * up. 2023 */ 2024 static void 2025 raidgetdisklabel(dev_t dev) 2026 { 2027 int unit = raidunit(dev); 2028 struct raid_softc *rs = &raid_softc[unit]; 2029 const char *errstring; 2030 struct disklabel *lp = rs->sc_dkdev.dk_label; 2031 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2032 RF_Raid_t *raidPtr; 2033 2034 db1_printf(("Getting the disklabel...\n")); 2035 2036 memset(clp, 0, sizeof(*clp)); 2037 2038 raidPtr = raidPtrs[unit]; 2039 2040 raidgetdefaultlabel(raidPtr, rs, lp); 2041 2042 /* 2043 * Call the generic disklabel extraction routine. 2044 */ 2045 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2046 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2047 if (errstring) 2048 raidmakedisklabel(rs); 2049 else { 2050 int i; 2051 struct partition *pp; 2052 2053 /* 2054 * Sanity check whether the found disklabel is valid. 2055 * 2056 * This is necessary since total size of the raid device 2057 * may vary when an interleave is changed even though exactly 2058 * same componets are used, and old disklabel may used 2059 * if that is found. 2060 */ 2061 if (lp->d_secperunit != rs->sc_size) 2062 printf("raid%d: WARNING: %s: " 2063 "total sector size in disklabel (%d) != " 2064 "the size of raid (%ld)\n", unit, rs->sc_xname, 2065 lp->d_secperunit, (long) rs->sc_size); 2066 for (i = 0; i < lp->d_npartitions; i++) { 2067 pp = &lp->d_partitions[i]; 2068 if (pp->p_offset + pp->p_size > rs->sc_size) 2069 printf("raid%d: WARNING: %s: end of partition `%c' " 2070 "exceeds the size of raid (%ld)\n", 2071 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size); 2072 } 2073 } 2074 2075 } 2076 /* 2077 * Take care of things one might want to take care of in the event 2078 * that a disklabel isn't present. 2079 */ 2080 static void 2081 raidmakedisklabel(struct raid_softc *rs) 2082 { 2083 struct disklabel *lp = rs->sc_dkdev.dk_label; 2084 db1_printf(("Making a label..\n")); 2085 2086 /* 2087 * For historical reasons, if there's no disklabel present 2088 * the raw partition must be marked FS_BSDFFS. 2089 */ 2090 2091 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2092 2093 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2094 2095 lp->d_checksum = dkcksum(lp); 2096 } 2097 /* 2098 * Lookup the provided name in the filesystem. If the file exists, 2099 * is a valid block device, and isn't being used by anyone else, 2100 * set *vpp to the file's vnode. 2101 * You'll find the original of this in ccd.c 2102 */ 2103 int 2104 raidlookup(char *path, struct lwp *l, struct vnode **vpp) 2105 { 2106 struct nameidata nd; 2107 struct vnode *vp; 2108 struct proc *p; 2109 struct vattr va; 2110 int error; 2111 2112 if (l == NULL) 2113 return(ESRCH); /* Is ESRCH the best choice? */ 2114 p = l->l_proc; 2115 2116 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l); 2117 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { 2118 return (error); 2119 } 2120 vp = nd.ni_vp; 2121 if (vp->v_usecount > 1) { 2122 VOP_UNLOCK(vp, 0); 2123 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l); 2124 return (EBUSY); 2125 } 2126 if ((error = VOP_GETATTR(vp, &va, p->p_cred, l)) != 0) { 2127 VOP_UNLOCK(vp, 0); 2128 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l); 2129 return (error); 2130 } 2131 /* XXX: eventually we should handle VREG, too. */ 2132 if (va.va_type != VBLK) { 2133 VOP_UNLOCK(vp, 0); 2134 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l); 2135 return (ENOTBLK); 2136 } 2137 VOP_UNLOCK(vp, 0); 2138 *vpp = vp; 2139 return (0); 2140 } 2141 /* 2142 * Wait interruptibly for an exclusive lock. 2143 * 2144 * XXX 2145 * Several drivers do this; it should be abstracted and made MP-safe. 2146 * (Hmm... where have we seen this warning before :-> GO ) 2147 */ 2148 static int 2149 raidlock(struct raid_softc *rs) 2150 { 2151 int error; 2152 2153 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2154 rs->sc_flags |= RAIDF_WANTED; 2155 if ((error = 2156 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2157 return (error); 2158 } 2159 rs->sc_flags |= RAIDF_LOCKED; 2160 return (0); 2161 } 2162 /* 2163 * Unlock and wake up any waiters. 2164 */ 2165 static void 2166 raidunlock(struct raid_softc *rs) 2167 { 2168 2169 rs->sc_flags &= ~RAIDF_LOCKED; 2170 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2171 rs->sc_flags &= ~RAIDF_WANTED; 2172 wakeup(rs); 2173 } 2174 } 2175 2176 2177 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2178 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2179 2180 int 2181 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2182 { 2183 RF_ComponentLabel_t clabel; 2184 raidread_component_label(dev, b_vp, &clabel); 2185 clabel.mod_counter = mod_counter; 2186 clabel.clean = RF_RAID_CLEAN; 2187 raidwrite_component_label(dev, b_vp, &clabel); 2188 return(0); 2189 } 2190 2191 2192 int 2193 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2194 { 2195 RF_ComponentLabel_t clabel; 2196 raidread_component_label(dev, b_vp, &clabel); 2197 clabel.mod_counter = mod_counter; 2198 clabel.clean = RF_RAID_DIRTY; 2199 raidwrite_component_label(dev, b_vp, &clabel); 2200 return(0); 2201 } 2202 2203 /* ARGSUSED */ 2204 int 2205 raidread_component_label(dev_t dev, struct vnode *b_vp, 2206 RF_ComponentLabel_t *clabel) 2207 { 2208 struct buf *bp; 2209 const struct bdevsw *bdev; 2210 int error; 2211 2212 /* XXX should probably ensure that we don't try to do this if 2213 someone has changed rf_protected_sectors. */ 2214 2215 if (b_vp == NULL) { 2216 /* For whatever reason, this component is not valid. 2217 Don't try to read a component label from it. */ 2218 return(EINVAL); 2219 } 2220 2221 /* get a block of the appropriate size... */ 2222 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2223 bp->b_dev = dev; 2224 2225 /* get our ducks in a row for the read */ 2226 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2227 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2228 bp->b_flags |= B_READ; 2229 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2230 2231 bdev = bdevsw_lookup(bp->b_dev); 2232 if (bdev == NULL) 2233 return (ENXIO); 2234 (*bdev->d_strategy)(bp); 2235 2236 error = biowait(bp); 2237 2238 if (!error) { 2239 memcpy(clabel, bp->b_data, 2240 sizeof(RF_ComponentLabel_t)); 2241 } 2242 2243 brelse(bp); 2244 return(error); 2245 } 2246 /* ARGSUSED */ 2247 int 2248 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 2249 RF_ComponentLabel_t *clabel) 2250 { 2251 struct buf *bp; 2252 const struct bdevsw *bdev; 2253 int error; 2254 2255 /* get a block of the appropriate size... */ 2256 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2257 bp->b_dev = dev; 2258 2259 /* get our ducks in a row for the write */ 2260 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2261 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2262 bp->b_flags |= B_WRITE; 2263 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2264 2265 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2266 2267 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2268 2269 bdev = bdevsw_lookup(bp->b_dev); 2270 if (bdev == NULL) 2271 return (ENXIO); 2272 (*bdev->d_strategy)(bp); 2273 error = biowait(bp); 2274 brelse(bp); 2275 if (error) { 2276 #if 1 2277 printf("Failed to write RAID component info!\n"); 2278 #endif 2279 } 2280 2281 return(error); 2282 } 2283 2284 void 2285 rf_markalldirty(RF_Raid_t *raidPtr) 2286 { 2287 RF_ComponentLabel_t clabel; 2288 int sparecol; 2289 int c; 2290 int j; 2291 int scol = -1; 2292 2293 raidPtr->mod_counter++; 2294 for (c = 0; c < raidPtr->numCol; c++) { 2295 /* we don't want to touch (at all) a disk that has 2296 failed */ 2297 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2298 raidread_component_label( 2299 raidPtr->Disks[c].dev, 2300 raidPtr->raid_cinfo[c].ci_vp, 2301 &clabel); 2302 if (clabel.status == rf_ds_spared) { 2303 /* XXX do something special... 2304 but whatever you do, don't 2305 try to access it!! */ 2306 } else { 2307 raidmarkdirty( 2308 raidPtr->Disks[c].dev, 2309 raidPtr->raid_cinfo[c].ci_vp, 2310 raidPtr->mod_counter); 2311 } 2312 } 2313 } 2314 2315 for( c = 0; c < raidPtr->numSpare ; c++) { 2316 sparecol = raidPtr->numCol + c; 2317 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2318 /* 2319 2320 we claim this disk is "optimal" if it's 2321 rf_ds_used_spare, as that means it should be 2322 directly substitutable for the disk it replaced. 2323 We note that too... 2324 2325 */ 2326 2327 for(j=0;j<raidPtr->numCol;j++) { 2328 if (raidPtr->Disks[j].spareCol == sparecol) { 2329 scol = j; 2330 break; 2331 } 2332 } 2333 2334 raidread_component_label( 2335 raidPtr->Disks[sparecol].dev, 2336 raidPtr->raid_cinfo[sparecol].ci_vp, 2337 &clabel); 2338 /* make sure status is noted */ 2339 2340 raid_init_component_label(raidPtr, &clabel); 2341 2342 clabel.row = 0; 2343 clabel.column = scol; 2344 /* Note: we *don't* change status from rf_ds_used_spare 2345 to rf_ds_optimal */ 2346 /* clabel.status = rf_ds_optimal; */ 2347 2348 raidmarkdirty(raidPtr->Disks[sparecol].dev, 2349 raidPtr->raid_cinfo[sparecol].ci_vp, 2350 raidPtr->mod_counter); 2351 } 2352 } 2353 } 2354 2355 2356 void 2357 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2358 { 2359 RF_ComponentLabel_t clabel; 2360 int sparecol; 2361 int c; 2362 int j; 2363 int scol; 2364 2365 scol = -1; 2366 2367 /* XXX should do extra checks to make sure things really are clean, 2368 rather than blindly setting the clean bit... */ 2369 2370 raidPtr->mod_counter++; 2371 2372 for (c = 0; c < raidPtr->numCol; c++) { 2373 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2374 raidread_component_label( 2375 raidPtr->Disks[c].dev, 2376 raidPtr->raid_cinfo[c].ci_vp, 2377 &clabel); 2378 /* make sure status is noted */ 2379 clabel.status = rf_ds_optimal; 2380 2381 /* bump the counter */ 2382 clabel.mod_counter = raidPtr->mod_counter; 2383 2384 raidwrite_component_label( 2385 raidPtr->Disks[c].dev, 2386 raidPtr->raid_cinfo[c].ci_vp, 2387 &clabel); 2388 if (final == RF_FINAL_COMPONENT_UPDATE) { 2389 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2390 raidmarkclean( 2391 raidPtr->Disks[c].dev, 2392 raidPtr->raid_cinfo[c].ci_vp, 2393 raidPtr->mod_counter); 2394 } 2395 } 2396 } 2397 /* else we don't touch it.. */ 2398 } 2399 2400 for( c = 0; c < raidPtr->numSpare ; c++) { 2401 sparecol = raidPtr->numCol + c; 2402 /* Need to ensure that the reconstruct actually completed! */ 2403 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2404 /* 2405 2406 we claim this disk is "optimal" if it's 2407 rf_ds_used_spare, as that means it should be 2408 directly substitutable for the disk it replaced. 2409 We note that too... 2410 2411 */ 2412 2413 for(j=0;j<raidPtr->numCol;j++) { 2414 if (raidPtr->Disks[j].spareCol == sparecol) { 2415 scol = j; 2416 break; 2417 } 2418 } 2419 2420 /* XXX shouldn't *really* need this... */ 2421 raidread_component_label( 2422 raidPtr->Disks[sparecol].dev, 2423 raidPtr->raid_cinfo[sparecol].ci_vp, 2424 &clabel); 2425 /* make sure status is noted */ 2426 2427 raid_init_component_label(raidPtr, &clabel); 2428 2429 clabel.mod_counter = raidPtr->mod_counter; 2430 clabel.column = scol; 2431 clabel.status = rf_ds_optimal; 2432 2433 raidwrite_component_label( 2434 raidPtr->Disks[sparecol].dev, 2435 raidPtr->raid_cinfo[sparecol].ci_vp, 2436 &clabel); 2437 if (final == RF_FINAL_COMPONENT_UPDATE) { 2438 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2439 raidmarkclean( raidPtr->Disks[sparecol].dev, 2440 raidPtr->raid_cinfo[sparecol].ci_vp, 2441 raidPtr->mod_counter); 2442 } 2443 } 2444 } 2445 } 2446 } 2447 2448 void 2449 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2450 { 2451 struct proc *p; 2452 struct lwp *l; 2453 2454 p = raidPtr->engine_thread; 2455 l = LIST_FIRST(&p->p_lwps); 2456 2457 if (vp != NULL) { 2458 if (auto_configured == 1) { 2459 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2460 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2461 vput(vp); 2462 2463 } else { 2464 (void) vn_close(vp, FREAD | FWRITE, p->p_cred, l); 2465 } 2466 } 2467 } 2468 2469 2470 void 2471 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2472 { 2473 int r,c; 2474 struct vnode *vp; 2475 int acd; 2476 2477 2478 /* We take this opportunity to close the vnodes like we should.. */ 2479 2480 for (c = 0; c < raidPtr->numCol; c++) { 2481 vp = raidPtr->raid_cinfo[c].ci_vp; 2482 acd = raidPtr->Disks[c].auto_configured; 2483 rf_close_component(raidPtr, vp, acd); 2484 raidPtr->raid_cinfo[c].ci_vp = NULL; 2485 raidPtr->Disks[c].auto_configured = 0; 2486 } 2487 2488 for (r = 0; r < raidPtr->numSpare; r++) { 2489 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2490 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2491 rf_close_component(raidPtr, vp, acd); 2492 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2493 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2494 } 2495 } 2496 2497 2498 void 2499 rf_ReconThread(struct rf_recon_req *req) 2500 { 2501 int s; 2502 RF_Raid_t *raidPtr; 2503 2504 s = splbio(); 2505 raidPtr = (RF_Raid_t *) req->raidPtr; 2506 raidPtr->recon_in_progress = 1; 2507 2508 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2509 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2510 2511 RF_Free(req, sizeof(*req)); 2512 2513 raidPtr->recon_in_progress = 0; 2514 splx(s); 2515 2516 /* That's all... */ 2517 kthread_exit(0); /* does not return */ 2518 } 2519 2520 void 2521 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2522 { 2523 int retcode; 2524 int s; 2525 2526 raidPtr->parity_rewrite_stripes_done = 0; 2527 raidPtr->parity_rewrite_in_progress = 1; 2528 s = splbio(); 2529 retcode = rf_RewriteParity(raidPtr); 2530 splx(s); 2531 if (retcode) { 2532 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2533 } else { 2534 /* set the clean bit! If we shutdown correctly, 2535 the clean bit on each component label will get 2536 set */ 2537 raidPtr->parity_good = RF_RAID_CLEAN; 2538 } 2539 raidPtr->parity_rewrite_in_progress = 0; 2540 2541 /* Anyone waiting for us to stop? If so, inform them... */ 2542 if (raidPtr->waitShutdown) { 2543 wakeup(&raidPtr->parity_rewrite_in_progress); 2544 } 2545 2546 /* That's all... */ 2547 kthread_exit(0); /* does not return */ 2548 } 2549 2550 2551 void 2552 rf_CopybackThread(RF_Raid_t *raidPtr) 2553 { 2554 int s; 2555 2556 raidPtr->copyback_in_progress = 1; 2557 s = splbio(); 2558 rf_CopybackReconstructedData(raidPtr); 2559 splx(s); 2560 raidPtr->copyback_in_progress = 0; 2561 2562 /* That's all... */ 2563 kthread_exit(0); /* does not return */ 2564 } 2565 2566 2567 void 2568 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2569 { 2570 int s; 2571 RF_Raid_t *raidPtr; 2572 2573 s = splbio(); 2574 raidPtr = req->raidPtr; 2575 raidPtr->recon_in_progress = 1; 2576 rf_ReconstructInPlace(raidPtr, req->col); 2577 RF_Free(req, sizeof(*req)); 2578 raidPtr->recon_in_progress = 0; 2579 splx(s); 2580 2581 /* That's all... */ 2582 kthread_exit(0); /* does not return */ 2583 } 2584 2585 RF_AutoConfig_t * 2586 rf_find_raid_components() 2587 { 2588 struct vnode *vp; 2589 struct disklabel label; 2590 struct device *dv; 2591 dev_t dev; 2592 int bmajor; 2593 int error; 2594 int i; 2595 int good_one; 2596 RF_ComponentLabel_t *clabel; 2597 RF_AutoConfig_t *ac_list; 2598 RF_AutoConfig_t *ac; 2599 2600 2601 /* initialize the AutoConfig list */ 2602 ac_list = NULL; 2603 2604 /* we begin by trolling through *all* the devices on the system */ 2605 2606 for (dv = alldevs.tqh_first; dv != NULL; 2607 dv = dv->dv_list.tqe_next) { 2608 2609 /* we are only interested in disks... */ 2610 if (device_class(dv) != DV_DISK) 2611 continue; 2612 2613 /* we don't care about floppies... */ 2614 if (device_is_a(dv, "fd")) { 2615 continue; 2616 } 2617 2618 /* we don't care about CD's... */ 2619 if (device_is_a(dv, "cd")) { 2620 continue; 2621 } 2622 2623 /* hdfd is the Atari/Hades floppy driver */ 2624 if (device_is_a(dv, "hdfd")) { 2625 continue; 2626 } 2627 2628 /* fdisa is the Atari/Milan floppy driver */ 2629 if (device_is_a(dv, "fdisa")) { 2630 continue; 2631 } 2632 2633 /* need to find the device_name_to_block_device_major stuff */ 2634 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0); 2635 2636 /* get a vnode for the raw partition of this disk */ 2637 2638 dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART); 2639 if (bdevvp(dev, &vp)) 2640 panic("RAID can't alloc vnode"); 2641 2642 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2643 2644 if (error) { 2645 /* "Who cares." Continue looking 2646 for something that exists*/ 2647 vput(vp); 2648 continue; 2649 } 2650 2651 /* Ok, the disk exists. Go get the disklabel. */ 2652 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0); 2653 if (error) { 2654 /* 2655 * XXX can't happen - open() would 2656 * have errored out (or faked up one) 2657 */ 2658 if (error != ENOTTY) 2659 printf("RAIDframe: can't get label for dev " 2660 "%s (%d)\n", dv->dv_xname, error); 2661 } 2662 2663 /* don't need this any more. We'll allocate it again 2664 a little later if we really do... */ 2665 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2666 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2667 vput(vp); 2668 2669 if (error) 2670 continue; 2671 2672 for (i=0; i < label.d_npartitions; i++) { 2673 /* We only support partitions marked as RAID */ 2674 if (label.d_partitions[i].p_fstype != FS_RAID) 2675 continue; 2676 2677 dev = MAKEDISKDEV(bmajor, device_unit(dv), i); 2678 if (bdevvp(dev, &vp)) 2679 panic("RAID can't alloc vnode"); 2680 2681 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2682 if (error) { 2683 /* Whatever... */ 2684 vput(vp); 2685 continue; 2686 } 2687 2688 good_one = 0; 2689 2690 clabel = (RF_ComponentLabel_t *) 2691 malloc(sizeof(RF_ComponentLabel_t), 2692 M_RAIDFRAME, M_NOWAIT); 2693 if (clabel == NULL) { 2694 while(ac_list) { 2695 ac = ac_list; 2696 if (ac->clabel) 2697 free(ac->clabel, M_RAIDFRAME); 2698 ac_list = ac_list->next; 2699 free(ac, M_RAIDFRAME); 2700 }; 2701 printf("RAID auto config: out of memory!\n"); 2702 return(NULL); /* XXX probably should panic? */ 2703 } 2704 2705 if (!raidread_component_label(dev, vp, clabel)) { 2706 /* Got the label. Does it look reasonable? */ 2707 if (rf_reasonable_label(clabel) && 2708 (clabel->partitionSize <= 2709 label.d_partitions[i].p_size)) { 2710 #if DEBUG 2711 printf("Component on: %s%c: %d\n", 2712 dv->dv_xname, 'a'+i, 2713 label.d_partitions[i].p_size); 2714 rf_print_component_label(clabel); 2715 #endif 2716 /* if it's reasonable, add it, 2717 else ignore it. */ 2718 ac = (RF_AutoConfig_t *) 2719 malloc(sizeof(RF_AutoConfig_t), 2720 M_RAIDFRAME, 2721 M_NOWAIT); 2722 if (ac == NULL) { 2723 /* XXX should panic?? */ 2724 while(ac_list) { 2725 ac = ac_list; 2726 if (ac->clabel) 2727 free(ac->clabel, 2728 M_RAIDFRAME); 2729 ac_list = ac_list->next; 2730 free(ac, M_RAIDFRAME); 2731 } 2732 free(clabel, M_RAIDFRAME); 2733 return(NULL); 2734 } 2735 2736 snprintf(ac->devname, 2737 sizeof(ac->devname), "%s%c", 2738 dv->dv_xname, 'a'+i); 2739 ac->dev = dev; 2740 ac->vp = vp; 2741 ac->clabel = clabel; 2742 ac->next = ac_list; 2743 ac_list = ac; 2744 good_one = 1; 2745 } 2746 } 2747 if (!good_one) { 2748 /* cleanup */ 2749 free(clabel, M_RAIDFRAME); 2750 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2751 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2752 vput(vp); 2753 } 2754 } 2755 } 2756 return(ac_list); 2757 } 2758 2759 static int 2760 rf_reasonable_label(RF_ComponentLabel_t *clabel) 2761 { 2762 2763 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2764 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2765 ((clabel->clean == RF_RAID_CLEAN) || 2766 (clabel->clean == RF_RAID_DIRTY)) && 2767 clabel->row >=0 && 2768 clabel->column >= 0 && 2769 clabel->num_rows > 0 && 2770 clabel->num_columns > 0 && 2771 clabel->row < clabel->num_rows && 2772 clabel->column < clabel->num_columns && 2773 clabel->blockSize > 0 && 2774 clabel->numBlocks > 0) { 2775 /* label looks reasonable enough... */ 2776 return(1); 2777 } 2778 return(0); 2779 } 2780 2781 2782 #if DEBUG 2783 void 2784 rf_print_component_label(RF_ComponentLabel_t *clabel) 2785 { 2786 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 2787 clabel->row, clabel->column, 2788 clabel->num_rows, clabel->num_columns); 2789 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 2790 clabel->version, clabel->serial_number, 2791 clabel->mod_counter); 2792 printf(" Clean: %s Status: %d\n", 2793 clabel->clean ? "Yes" : "No", clabel->status ); 2794 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 2795 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 2796 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 2797 (char) clabel->parityConfig, clabel->blockSize, 2798 clabel->numBlocks); 2799 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 2800 printf(" Contains root partition: %s\n", 2801 clabel->root_partition ? "Yes" : "No" ); 2802 printf(" Last configured as: raid%d\n", clabel->last_unit ); 2803 #if 0 2804 printf(" Config order: %d\n", clabel->config_order); 2805 #endif 2806 2807 } 2808 #endif 2809 2810 RF_ConfigSet_t * 2811 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 2812 { 2813 RF_AutoConfig_t *ac; 2814 RF_ConfigSet_t *config_sets; 2815 RF_ConfigSet_t *cset; 2816 RF_AutoConfig_t *ac_next; 2817 2818 2819 config_sets = NULL; 2820 2821 /* Go through the AutoConfig list, and figure out which components 2822 belong to what sets. */ 2823 ac = ac_list; 2824 while(ac!=NULL) { 2825 /* we're going to putz with ac->next, so save it here 2826 for use at the end of the loop */ 2827 ac_next = ac->next; 2828 2829 if (config_sets == NULL) { 2830 /* will need at least this one... */ 2831 config_sets = (RF_ConfigSet_t *) 2832 malloc(sizeof(RF_ConfigSet_t), 2833 M_RAIDFRAME, M_NOWAIT); 2834 if (config_sets == NULL) { 2835 panic("rf_create_auto_sets: No memory!"); 2836 } 2837 /* this one is easy :) */ 2838 config_sets->ac = ac; 2839 config_sets->next = NULL; 2840 config_sets->rootable = 0; 2841 ac->next = NULL; 2842 } else { 2843 /* which set does this component fit into? */ 2844 cset = config_sets; 2845 while(cset!=NULL) { 2846 if (rf_does_it_fit(cset, ac)) { 2847 /* looks like it matches... */ 2848 ac->next = cset->ac; 2849 cset->ac = ac; 2850 break; 2851 } 2852 cset = cset->next; 2853 } 2854 if (cset==NULL) { 2855 /* didn't find a match above... new set..*/ 2856 cset = (RF_ConfigSet_t *) 2857 malloc(sizeof(RF_ConfigSet_t), 2858 M_RAIDFRAME, M_NOWAIT); 2859 if (cset == NULL) { 2860 panic("rf_create_auto_sets: No memory!"); 2861 } 2862 cset->ac = ac; 2863 ac->next = NULL; 2864 cset->next = config_sets; 2865 cset->rootable = 0; 2866 config_sets = cset; 2867 } 2868 } 2869 ac = ac_next; 2870 } 2871 2872 2873 return(config_sets); 2874 } 2875 2876 static int 2877 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 2878 { 2879 RF_ComponentLabel_t *clabel1, *clabel2; 2880 2881 /* If this one matches the *first* one in the set, that's good 2882 enough, since the other members of the set would have been 2883 through here too... */ 2884 /* note that we are not checking partitionSize here.. 2885 2886 Note that we are also not checking the mod_counters here. 2887 If everything else matches execpt the mod_counter, that's 2888 good enough for this test. We will deal with the mod_counters 2889 a little later in the autoconfiguration process. 2890 2891 (clabel1->mod_counter == clabel2->mod_counter) && 2892 2893 The reason we don't check for this is that failed disks 2894 will have lower modification counts. If those disks are 2895 not added to the set they used to belong to, then they will 2896 form their own set, which may result in 2 different sets, 2897 for example, competing to be configured at raid0, and 2898 perhaps competing to be the root filesystem set. If the 2899 wrong ones get configured, or both attempt to become /, 2900 weird behaviour and or serious lossage will occur. Thus we 2901 need to bring them into the fold here, and kick them out at 2902 a later point. 2903 2904 */ 2905 2906 clabel1 = cset->ac->clabel; 2907 clabel2 = ac->clabel; 2908 if ((clabel1->version == clabel2->version) && 2909 (clabel1->serial_number == clabel2->serial_number) && 2910 (clabel1->num_rows == clabel2->num_rows) && 2911 (clabel1->num_columns == clabel2->num_columns) && 2912 (clabel1->sectPerSU == clabel2->sectPerSU) && 2913 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 2914 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 2915 (clabel1->parityConfig == clabel2->parityConfig) && 2916 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 2917 (clabel1->blockSize == clabel2->blockSize) && 2918 (clabel1->numBlocks == clabel2->numBlocks) && 2919 (clabel1->autoconfigure == clabel2->autoconfigure) && 2920 (clabel1->root_partition == clabel2->root_partition) && 2921 (clabel1->last_unit == clabel2->last_unit) && 2922 (clabel1->config_order == clabel2->config_order)) { 2923 /* if it get's here, it almost *has* to be a match */ 2924 } else { 2925 /* it's not consistent with somebody in the set.. 2926 punt */ 2927 return(0); 2928 } 2929 /* all was fine.. it must fit... */ 2930 return(1); 2931 } 2932 2933 int 2934 rf_have_enough_components(RF_ConfigSet_t *cset) 2935 { 2936 RF_AutoConfig_t *ac; 2937 RF_AutoConfig_t *auto_config; 2938 RF_ComponentLabel_t *clabel; 2939 int c; 2940 int num_cols; 2941 int num_missing; 2942 int mod_counter; 2943 int mod_counter_found; 2944 int even_pair_failed; 2945 char parity_type; 2946 2947 2948 /* check to see that we have enough 'live' components 2949 of this set. If so, we can configure it if necessary */ 2950 2951 num_cols = cset->ac->clabel->num_columns; 2952 parity_type = cset->ac->clabel->parityConfig; 2953 2954 /* XXX Check for duplicate components!?!?!? */ 2955 2956 /* Determine what the mod_counter is supposed to be for this set. */ 2957 2958 mod_counter_found = 0; 2959 mod_counter = 0; 2960 ac = cset->ac; 2961 while(ac!=NULL) { 2962 if (mod_counter_found==0) { 2963 mod_counter = ac->clabel->mod_counter; 2964 mod_counter_found = 1; 2965 } else { 2966 if (ac->clabel->mod_counter > mod_counter) { 2967 mod_counter = ac->clabel->mod_counter; 2968 } 2969 } 2970 ac = ac->next; 2971 } 2972 2973 num_missing = 0; 2974 auto_config = cset->ac; 2975 2976 even_pair_failed = 0; 2977 for(c=0; c<num_cols; c++) { 2978 ac = auto_config; 2979 while(ac!=NULL) { 2980 if ((ac->clabel->column == c) && 2981 (ac->clabel->mod_counter == mod_counter)) { 2982 /* it's this one... */ 2983 #if DEBUG 2984 printf("Found: %s at %d\n", 2985 ac->devname,c); 2986 #endif 2987 break; 2988 } 2989 ac=ac->next; 2990 } 2991 if (ac==NULL) { 2992 /* Didn't find one here! */ 2993 /* special case for RAID 1, especially 2994 where there are more than 2 2995 components (where RAIDframe treats 2996 things a little differently :( ) */ 2997 if (parity_type == '1') { 2998 if (c%2 == 0) { /* even component */ 2999 even_pair_failed = 1; 3000 } else { /* odd component. If 3001 we're failed, and 3002 so is the even 3003 component, it's 3004 "Good Night, Charlie" */ 3005 if (even_pair_failed == 1) { 3006 return(0); 3007 } 3008 } 3009 } else { 3010 /* normal accounting */ 3011 num_missing++; 3012 } 3013 } 3014 if ((parity_type == '1') && (c%2 == 1)) { 3015 /* Just did an even component, and we didn't 3016 bail.. reset the even_pair_failed flag, 3017 and go on to the next component.... */ 3018 even_pair_failed = 0; 3019 } 3020 } 3021 3022 clabel = cset->ac->clabel; 3023 3024 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3025 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3026 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3027 /* XXX this needs to be made *much* more general */ 3028 /* Too many failures */ 3029 return(0); 3030 } 3031 /* otherwise, all is well, and we've got enough to take a kick 3032 at autoconfiguring this set */ 3033 return(1); 3034 } 3035 3036 void 3037 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3038 RF_Raid_t *raidPtr) 3039 { 3040 RF_ComponentLabel_t *clabel; 3041 int i; 3042 3043 clabel = ac->clabel; 3044 3045 /* 1. Fill in the common stuff */ 3046 config->numRow = clabel->num_rows = 1; 3047 config->numCol = clabel->num_columns; 3048 config->numSpare = 0; /* XXX should this be set here? */ 3049 config->sectPerSU = clabel->sectPerSU; 3050 config->SUsPerPU = clabel->SUsPerPU; 3051 config->SUsPerRU = clabel->SUsPerRU; 3052 config->parityConfig = clabel->parityConfig; 3053 /* XXX... */ 3054 strcpy(config->diskQueueType,"fifo"); 3055 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3056 config->layoutSpecificSize = 0; /* XXX ?? */ 3057 3058 while(ac!=NULL) { 3059 /* row/col values will be in range due to the checks 3060 in reasonable_label() */ 3061 strcpy(config->devnames[0][ac->clabel->column], 3062 ac->devname); 3063 ac = ac->next; 3064 } 3065 3066 for(i=0;i<RF_MAXDBGV;i++) { 3067 config->debugVars[i][0] = 0; 3068 } 3069 } 3070 3071 int 3072 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3073 { 3074 RF_ComponentLabel_t clabel; 3075 struct vnode *vp; 3076 dev_t dev; 3077 int column; 3078 int sparecol; 3079 3080 raidPtr->autoconfigure = new_value; 3081 3082 for(column=0; column<raidPtr->numCol; column++) { 3083 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3084 dev = raidPtr->Disks[column].dev; 3085 vp = raidPtr->raid_cinfo[column].ci_vp; 3086 raidread_component_label(dev, vp, &clabel); 3087 clabel.autoconfigure = new_value; 3088 raidwrite_component_label(dev, vp, &clabel); 3089 } 3090 } 3091 for(column = 0; column < raidPtr->numSpare ; column++) { 3092 sparecol = raidPtr->numCol + column; 3093 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3094 dev = raidPtr->Disks[sparecol].dev; 3095 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3096 raidread_component_label(dev, vp, &clabel); 3097 clabel.autoconfigure = new_value; 3098 raidwrite_component_label(dev, vp, &clabel); 3099 } 3100 } 3101 return(new_value); 3102 } 3103 3104 int 3105 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3106 { 3107 RF_ComponentLabel_t clabel; 3108 struct vnode *vp; 3109 dev_t dev; 3110 int column; 3111 int sparecol; 3112 3113 raidPtr->root_partition = new_value; 3114 for(column=0; column<raidPtr->numCol; column++) { 3115 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3116 dev = raidPtr->Disks[column].dev; 3117 vp = raidPtr->raid_cinfo[column].ci_vp; 3118 raidread_component_label(dev, vp, &clabel); 3119 clabel.root_partition = new_value; 3120 raidwrite_component_label(dev, vp, &clabel); 3121 } 3122 } 3123 for(column = 0; column < raidPtr->numSpare ; column++) { 3124 sparecol = raidPtr->numCol + column; 3125 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3126 dev = raidPtr->Disks[sparecol].dev; 3127 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3128 raidread_component_label(dev, vp, &clabel); 3129 clabel.root_partition = new_value; 3130 raidwrite_component_label(dev, vp, &clabel); 3131 } 3132 } 3133 return(new_value); 3134 } 3135 3136 void 3137 rf_release_all_vps(RF_ConfigSet_t *cset) 3138 { 3139 RF_AutoConfig_t *ac; 3140 3141 ac = cset->ac; 3142 while(ac!=NULL) { 3143 /* Close the vp, and give it back */ 3144 if (ac->vp) { 3145 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3146 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 3147 vput(ac->vp); 3148 ac->vp = NULL; 3149 } 3150 ac = ac->next; 3151 } 3152 } 3153 3154 3155 void 3156 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3157 { 3158 RF_AutoConfig_t *ac; 3159 RF_AutoConfig_t *next_ac; 3160 3161 ac = cset->ac; 3162 while(ac!=NULL) { 3163 next_ac = ac->next; 3164 /* nuke the label */ 3165 free(ac->clabel, M_RAIDFRAME); 3166 /* cleanup the config structure */ 3167 free(ac, M_RAIDFRAME); 3168 /* "next.." */ 3169 ac = next_ac; 3170 } 3171 /* and, finally, nuke the config set */ 3172 free(cset, M_RAIDFRAME); 3173 } 3174 3175 3176 void 3177 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3178 { 3179 /* current version number */ 3180 clabel->version = RF_COMPONENT_LABEL_VERSION; 3181 clabel->serial_number = raidPtr->serial_number; 3182 clabel->mod_counter = raidPtr->mod_counter; 3183 clabel->num_rows = 1; 3184 clabel->num_columns = raidPtr->numCol; 3185 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3186 clabel->status = rf_ds_optimal; /* "It's good!" */ 3187 3188 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3189 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3190 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3191 3192 clabel->blockSize = raidPtr->bytesPerSector; 3193 clabel->numBlocks = raidPtr->sectorsPerDisk; 3194 3195 /* XXX not portable */ 3196 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3197 clabel->maxOutstanding = raidPtr->maxOutstanding; 3198 clabel->autoconfigure = raidPtr->autoconfigure; 3199 clabel->root_partition = raidPtr->root_partition; 3200 clabel->last_unit = raidPtr->raidid; 3201 clabel->config_order = raidPtr->config_order; 3202 } 3203 3204 int 3205 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3206 { 3207 RF_Raid_t *raidPtr; 3208 RF_Config_t *config; 3209 int raidID; 3210 int retcode; 3211 3212 #if DEBUG 3213 printf("RAID autoconfigure\n"); 3214 #endif 3215 3216 retcode = 0; 3217 *unit = -1; 3218 3219 /* 1. Create a config structure */ 3220 3221 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3222 M_RAIDFRAME, 3223 M_NOWAIT); 3224 if (config==NULL) { 3225 printf("Out of mem!?!?\n"); 3226 /* XXX do something more intelligent here. */ 3227 return(1); 3228 } 3229 3230 memset(config, 0, sizeof(RF_Config_t)); 3231 3232 /* 3233 2. Figure out what RAID ID this one is supposed to live at 3234 See if we can get the same RAID dev that it was configured 3235 on last time.. 3236 */ 3237 3238 raidID = cset->ac->clabel->last_unit; 3239 if ((raidID < 0) || (raidID >= numraid)) { 3240 /* let's not wander off into lala land. */ 3241 raidID = numraid - 1; 3242 } 3243 if (raidPtrs[raidID]->valid != 0) { 3244 3245 /* 3246 Nope... Go looking for an alternative... 3247 Start high so we don't immediately use raid0 if that's 3248 not taken. 3249 */ 3250 3251 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3252 if (raidPtrs[raidID]->valid == 0) { 3253 /* can use this one! */ 3254 break; 3255 } 3256 } 3257 } 3258 3259 if (raidID < 0) { 3260 /* punt... */ 3261 printf("Unable to auto configure this set!\n"); 3262 printf("(Out of RAID devs!)\n"); 3263 free(config, M_RAIDFRAME); 3264 return(1); 3265 } 3266 3267 #if DEBUG 3268 printf("Configuring raid%d:\n",raidID); 3269 #endif 3270 3271 raidPtr = raidPtrs[raidID]; 3272 3273 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3274 raidPtr->raidid = raidID; 3275 raidPtr->openings = RAIDOUTSTANDING; 3276 3277 /* 3. Build the configuration structure */ 3278 rf_create_configuration(cset->ac, config, raidPtr); 3279 3280 /* 4. Do the configuration */ 3281 retcode = rf_Configure(raidPtr, config, cset->ac); 3282 3283 if (retcode == 0) { 3284 3285 raidinit(raidPtrs[raidID]); 3286 3287 rf_markalldirty(raidPtrs[raidID]); 3288 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3289 if (cset->ac->clabel->root_partition==1) { 3290 /* everything configured just fine. Make a note 3291 that this set is eligible to be root. */ 3292 cset->rootable = 1; 3293 /* XXX do this here? */ 3294 raidPtrs[raidID]->root_partition = 1; 3295 } 3296 } 3297 3298 /* 5. Cleanup */ 3299 free(config, M_RAIDFRAME); 3300 3301 *unit = raidID; 3302 return(retcode); 3303 } 3304 3305 void 3306 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3307 { 3308 struct buf *bp; 3309 3310 bp = (struct buf *)desc->bp; 3311 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3312 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3313 } 3314 3315 void 3316 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3317 size_t xmin, size_t xmax) 3318 { 3319 pool_init(p, size, 0, 0, 0, w_chan, NULL); 3320 pool_sethiwat(p, xmax); 3321 pool_prime(p, xmin); 3322 pool_setlowat(p, xmin); 3323 } 3324 3325 /* 3326 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see 3327 * if there is IO pending and if that IO could possibly be done for a 3328 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3329 * otherwise. 3330 * 3331 */ 3332 3333 int 3334 rf_buf_queue_check(int raidid) 3335 { 3336 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) && 3337 raidPtrs[raidid]->openings > 0) { 3338 /* there is work to do */ 3339 return 0; 3340 } 3341 /* default is nothing to do */ 3342 return 1; 3343 } 3344